commit 6fa5435894fc593813354a27ad591e0fb092edb9
parent 4ebc559661477726f3d58366b7e3395ccd41812c
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Tue, 28 Apr 2026 17:47:33 +0200
Use temporary buffers to store subtree data
This validation prepares for the parallelization of subtree
construction: each subtree is stored in its own memory buffers before
being copied into the tree's buffers.
The goal is to minimize synchronization between threads. Even if a
thread may still be blocked by the memory allocation of the temporary
buffers, it can then operate without mutual exclusion.
Note that copying the temporary buffers into the tree buffers is a
choice aimed at keeping the tree structure as simple as possible. These
buffers could simply be temporary and referenced as sub-buffers of the
main buffers. This would avoid memory swapping, the additional memory
overhead during the copy, and finally, synchronization between threads.
However, the tree structure would then be more complex and would also
depend on the number of subtrees actually constructed. Keeping the tree
structure independent of how it was constructed sucks less by separating
the structures specific to the tree's construction from the tree
structure itself.
Note that to limit the memory overhead during construction, the
temporary buffers are freed as soon as they are no longer in use.
Diffstat:
| M | src/sln_tree_build.c | | | 223 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------- |
1 file changed, 181 insertions(+), 42 deletions(-)
diff --git a/src/sln_tree_build.c b/src/sln_tree_build.c
@@ -116,27 +116,32 @@ build_leaf_polyline_scratch_release
}
static INLINE res_T
-build_leaf_polyline_from_1line(struct sln_tree* tree, struct sln_node* leaf)
+build_leaf_polyline_from_1line
+ (struct sln_tree* tree,
+ struct sln_node* leaf,
+ struct darray_vertex* vertices)
{
size_t vertices_range[2];
res_T res = RES_OK;
+ ASSERT(tree && leaf && vertices);
+
/* Assume that there is only one line per leaf */
ASSERT(leaf->range[0] == leaf->range[1]);
/* Line meshing */
res = line_mesh
(/* in */ tree, leaf->range[0], tree->args.nvertices_hint,
- /* out */ &tree->vertices, vertices_range);
+ /* out */ vertices, vertices_range);
if(res != RES_OK) goto error;
/* Decimate the line mesh */
- res = polyline_decimate(tree->sln, darray_vertex_data_get(&tree->vertices),
+ res = polyline_decimate(tree->sln, darray_vertex_data_get(vertices),
vertices_range, (float)tree->args.mesh_decimation_err, tree->args.mesh_type);
if(res != RES_OK) goto error;
/* Shrink the size of the vertices */
- darray_vertex_resize(&tree->vertices, vertices_range[1] + 1);
+ darray_vertex_resize(vertices, vertices_range[1] + 1);
/* Setup the leaf polyline */
leaf->ivertex = vertices_range[0];
@@ -167,7 +172,8 @@ static INLINE res_T
build_leaf_polyline_from_Nlines
(struct sln_tree* tree,
struct sln_node* leaf,
- struct build_leaf_polyline_scratch* scratch)
+ struct build_leaf_polyline_scratch* scratch,
+ struct darray_vertex* vertices)
{
const struct sln_vertex* src = NULL;
struct sln_vertex* dst = NULL;
@@ -178,7 +184,7 @@ build_leaf_polyline_from_Nlines
unsigned i = 0;
res_T res = RES_OK;
- ASSERT(tree && leaf && scratch);
+ ASSERT(tree && leaf && scratch && vertices);
/* Compute the number of lines of the leaf */
nlines = size_t_to_unsigned(leaf->range[1] - leaf->range[0] + 1/*inclusive*/);
@@ -235,15 +241,15 @@ build_leaf_polyline_from_Nlines
if(res != RES_OK) goto error;
/* Setup the leaf */
- leaf->ivertex = darray_vertex_size_get(&tree->vertices);
+ leaf->ivertex = darray_vertex_size_get(vertices);
leaf->nvertices = NODE(0/*leaf*/)->nvertices;
/* Copy the leaf vertices from temporary buffer to the vertex buffer of the
* tree */
- res = darray_vertex_resize(&tree->vertices, leaf->ivertex + leaf->nvertices);
+ res = darray_vertex_resize(vertices, leaf->ivertex + leaf->nvertices);
if(res != RES_OK) goto error;
src = darray_vertex_cdata_get(&scratch->vertices) + NODE(0/*leaf*/)->ivertex;
- dst = darray_vertex_data_get(&tree->vertices) + leaf->ivertex;
+ dst = darray_vertex_data_get(vertices) + leaf->ivertex;
memsz = leaf->nvertices * sizeof(*src);
memcpy(dst, src, memsz);
@@ -259,7 +265,8 @@ static INLINE res_T
build_leaf_polyline
(struct sln_tree* tree,
struct sln_node* leaf,
- struct build_leaf_polyline_scratch* scratch)
+ struct build_leaf_polyline_scratch* scratch,
+ struct darray_vertex* vertices)
{
size_t nlines = 0; /* Number of lines in the leaf */
ASSERT(tree && leaf);
@@ -267,9 +274,9 @@ build_leaf_polyline
nlines = leaf->range[1] - leaf->range[0] + 1;
if(nlines == 1) {
- return build_leaf_polyline_from_1line(tree, leaf);
+ return build_leaf_polyline_from_1line(tree, leaf, vertices);
} else {
- return build_leaf_polyline_from_Nlines(tree, leaf, scratch);
+ return build_leaf_polyline_from_Nlines(tree, leaf, scratch, vertices);
}
}
@@ -714,7 +721,8 @@ static res_T
build_polylines
(struct sln_tree* tree,
const size_t root_index,
- const size_t nodes_count) /* Total number of nodes in the tree (for debug) */
+ const size_t nodes_count, /* Total number of nodes in the tree (for debug) */
+ struct darray_vertex* vertices)
{
/* Stack */
#define STACK_SIZE (SLN_TREE_DEPTH_MAX*SLN_TREE_ARITY_MAX)
@@ -750,7 +758,7 @@ build_polylines
const size_t istack_saved = istack;
if(IS_LEAF(inode)) {
- res = build_leaf_polyline(tree, NODE(inode), &scratch);
+ res = build_leaf_polyline(tree, NODE(inode), &scratch, vertices);
if(res != RES_OK) goto error;
inode = stack[--istack]; /* Pop the next node */
@@ -772,10 +780,10 @@ build_polylines
#endif
if(tree->args.collapse_polylines) {
res = collapse_child_polylines(tree, inode, nchildren, &tree->nodes,
- &scratch.vertices_tmp, &tree->vertices);
+ &scratch.vertices_tmp, vertices);
} else {
res = merge_child_polylines
- (tree, inode, nchildren, &tree->nodes, &tree->vertices);
+ (tree, inode, nchildren, &tree->nodes, vertices);
}
if(res != RES_OK) goto error;
@@ -830,7 +838,8 @@ error:
static res_T
partition_lines_depth_first
(struct sln_tree* tree,
- const size_t root_index)
+ const size_t root_index,
+ struct darray_node* nodes)
{
/* Stack */
#define STACK_SIZE (SLN_TREE_DEPTH_MAX*(SLN_TREE_ARITY_MAX-1))
@@ -846,13 +855,14 @@ partition_lines_depth_first
size_t inode = 0;
res_T res = RES_OK;
- ASSERT(tree); /* Pre-condition */
- ASSERT(root_index < darray_node_size_get(&tree->nodes));
+ /* Pre-condition */
+ ASSERT(tree && nodes);
+ ASSERT(root_index < darray_node_size_get(nodes));
#define LOG_MSG "Partitioning: %3d%%\n"
- #define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id))
+ #define NODE(Id) (darray_node_data_get(nodes) + (Id))
#define CREATE_NODE { \
- res = darray_node_push_back(&tree->nodes, &SLN_NODE_NULL); \
+ res = darray_node_push_back(nodes, &SLN_NODE_NULL); \
if(res != RES_OK) goto error; \
} (void)0
@@ -898,7 +908,7 @@ partition_lines_depth_first
size_t i = 0;
/* Calculate the index of the first child */
- size_t ichildren = darray_node_size_get(&tree->nodes);
+ size_t ichildren = darray_node_size_get(nodes);
/* Compute how the number of children that node has */
nchildren = node_child_count(NODE(inode), tree->args.arity);
@@ -1108,6 +1118,145 @@ error:
goto exit;
}
+static res_T
+build_subtrees(struct sln_tree* tree, const size_t subtrees[], size_t nsubtrees)
+{
+ /* Subtree temporary data */
+ struct scratch {
+ struct darray_node nodes;
+ struct darray_vertex vertices;
+ size_t nnodes;
+ }* scratches;
+
+ /* Miscellaneous */
+ struct mem_allocator* allocator = NULL;
+ size_t i = 0;
+ ATOMIC res = RES_OK;
+
+ /* Pre-conditions */
+ ASSERT(tree && subtrees && nsubtrees >= 1);
+
+ #define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id))
+
+ allocator = tree->sln->allocator;
+
+ /* Allocate the per subtree scratch */
+ scratches = MEM_CALLOC(allocator, nsubtrees, sizeof(*scratches));
+ if(!scratches) { res = RES_MEM_ERR; goto error; }
+ FOR_EACH(i, 0, nsubtrees) {
+ darray_node_init(allocator, &scratches[i].nodes);
+ darray_vertex_init(allocator, &scratches[i].vertices);
+ }
+
+ /* This loop should be parallelized, with a block size of 1 */
+ for(i=0; i < nsubtrees; ++i) {
+ struct sln_node root = SLN_NODE_NULL;
+ struct scratch* scratch = scratches + i;
+ size_t nnodes_s = 0;
+ size_t nnodes_t = 0;
+ res_T res2 = RES_OK;
+
+ /* Skip the remaining subtrees in case of an error */
+ if(ATOMIC_GET(&res) != RES_OK) continue;
+
+ /* Copy the subtree root in the temporary node buffer */
+ #pragma omp critical
+ {
+ root = *NODE(subtrees[i]);
+ }
+ res2 = darray_node_push_back(&scratch->nodes, &root);
+ if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; }
+
+ /* Partition the line */
+ res2 = partition_lines_depth_first(tree, 0/*root*/, &scratch->nodes);
+ if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; }
+
+ #pragma omp critical
+ {
+ struct sln_node* dst = NULL;
+ const struct sln_node* src = NULL;
+
+ /* Increase the node buffer to store the subtree nodes */
+ nnodes_s = darray_node_size_get(&scratch->nodes);
+ nnodes_t = darray_node_size_get(&tree->nodes);
+ res2 = darray_node_resize(&tree->nodes, nnodes_t + nnodes_s - 1/*root*/);
+ if(res2 == RES_OK) {
+ /* Set the offset to the first child of the root node of the subtree */
+ NODE(subtrees[i])->offset = ui64_to_ui32(nnodes_t - subtrees[i]);
+
+ /* Copy the subtree nodes in the main node buffer */
+ src = darray_node_cdata_get(&scratch->nodes) + 1/*discard root*/;
+ dst = darray_node_data_get(&tree->nodes) + nnodes_t;
+ memcpy(dst, src, (nnodes_s-1/*discard root*/)*sizeof(*src));
+ }
+ }
+ if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; }
+
+ /* Free the temporary buffer but retain the number of nodes in the subtree.
+ * This number is used later when constructing polylines (see below) */
+ darray_node_purge(&scratch->nodes);
+ scratch->nnodes = nnodes_s;
+ }
+
+ /* This loop should be parallelized, with a block size of 1 */
+ for(i=0; i < nsubtrees; ++i) {
+ struct scratch* scratch = scratches + i;
+ size_t inode = subtrees[i];
+ size_t nverts_s = 0;
+ size_t nverts_t = 0;
+ size_t j = 0;
+ res_T res2 = RES_OK;
+
+ /* Skip the remaining subtrees in case of an error */
+ if(ATOMIC_GET(&res) != RES_OK) continue;
+
+ res2 = build_polylines(tree, inode, scratch->nnodes, &scratch->vertices);
+ if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; }
+
+ #pragma omp critical
+ {
+ struct sln_vertex* dst = NULL;
+ const struct sln_vertex* src = NULL;
+
+ /* Increase the vertex buffer to store the subtree polylines */
+ nverts_s = darray_vertex_size_get(&scratch->vertices);
+ nverts_t = darray_vertex_size_get(&tree->vertices);
+ res2 = darray_vertex_resize(&tree->vertices, nverts_t + nverts_s);
+ if(res2 == RES_OK) {
+ /* Copy the subtree nodes in the main vertex buffer */
+ src = darray_vertex_cdata_get(&scratch->vertices);
+ dst = darray_vertex_data_get(&tree->vertices) + nverts_t;
+ memcpy(dst, src, nverts_s*sizeof(*src));
+ }
+ }
+ if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; }
+
+ /* Update the index of the first vertex of the polyline for the nodes in the
+ * subtree based on their new locations, i.e., the main vertex buffer */
+ NODE(subtrees[i])->ivertex += nverts_t;
+ FOR_EACH(j, 0, scratch->nnodes-1/*The root has been just treated*/) {
+ inode = subtrees[i] + NODE(subtrees[i])->offset + j;
+ NODE(inode)->ivertex += nverts_t;
+ }
+
+ /* Free the temporary vertex buffer */
+ darray_node_purge(&scratch->nodes);
+ }
+
+ #undef NODE
+
+exit:
+ /* Clean up temporary data */
+ FOR_EACH(i, 0, nsubtrees) {
+ darray_node_release(&scratches[i].nodes);
+ darray_vertex_release(&scratches[i].vertices);
+ }
+ MEM_RM(allocator, scratches);
+ return (res_T)res;
+error:
+ goto exit;
+}
+
/*******************************************************************************
* Local functions
******************************************************************************/
@@ -1128,12 +1277,15 @@ tree_build(struct sln_tree* tree)
/* Setup the root node */
node.range[0] = 0;
node.range[1] = nlines - 1/* inclusive bound */;
- if((res = darray_node_push_back(&tree->nodes, &node)) != RES_OK) goto error;
+ res = darray_node_push_back(&tree->nodes, &node);
+ if(res != RES_OK) goto error;
- if((res = partition_lines_depth_first(tree, iroot)) != RES_OK) goto error;
+ res = partition_lines_depth_first(tree, iroot, &tree->nodes);
+ if(res != RES_OK) goto error;
nnodes = darray_node_size_get(&tree->nodes);
- if((res = build_polylines(tree, iroot, nnodes)) != RES_OK) goto error;
+ res = build_polylines(tree, iroot, nnodes, &tree->vertices);
+ if(res != RES_OK) goto error;
exit:
return res;
@@ -1213,29 +1365,16 @@ tree_build2(struct sln_tree* tree)
nnodes_upper_tree = darray_node_size_get(&tree->nodes);
nnodes_upper_tree -= nsubtrees; /* Exclude the root node of the subtrees */
- /* This will the loop to parallelize, with a chunk size of 1 */
- for(i=0; i < nsubtrees; ++i) {
- size_t nnodes_subtree = darray_node_size_get(&tree->nodes);
-
- res = partition_lines_depth_first(tree, subtrees[i]);
- if(res != RES_OK) goto error;
-
- nnodes_subtree = darray_node_size_get(&tree->nodes) - nnodes_subtree;
- nnodes_subtree += 1; /* Do not forget the root node */
- res = build_polylines(tree, subtrees[i], nnodes_subtree);
- if(res != RES_OK) goto error;
- }
+ res = build_subtrees(tree, subtrees, nsubtrees);
+ if(res != RES_OK) goto error;
/* Build the polylines of the upper level if necessary */
if(nnodes_upper_tree != 0) {
- res = build_polylines(tree, 0/*root*/, nnodes_upper_tree);
+ res = build_polylines(tree, 0/*root*/, nnodes_upper_tree, &tree->vertices);
if(res != RES_OK) goto error;
}
- #undef NODE
- #undef CREATE_NODE
- #undef ENQUEUE
- #undef DEQUEUE
+ #undef STACK_SIZE
exit:
return res;