sln_tree_build.c (52530B)
1 /* Copyright (C) 2022, 2026 |Méso|Star> (contact@meso-star.com) 2 * Copyright (C) 2026 Université de Lorraine 3 * Copyright (C) 2022 Centre National de la Recherche Scientifique 4 * Copyright (C) 2022 Université Paul Sabatier 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19 #include "sln.h" 20 #include "sln_device_c.h" 21 #include "sln_polyline.h" 22 #include "sln_tree_c.h" 23 24 #include <star/shtr.h> 25 26 #include <rsys/cstr.h> 27 #include <rsys/math.h> 28 29 #include <omp.h> 30 31 /* Structure used to track the progress of a construction phase */ 32 struct progress { 33 /* Absolute progression */ 34 size_t total; /* Total number of items to process */ 35 size_t processed; /* Number of items already processed */ 36 37 /* Relative progression */ 38 int pcent; /* Currently displayed percentage */ 39 int pcent_step; /* Increment between displayed percentages */ 40 41 const char* msg; /* Message to add before the displayed percentage */ 42 }; 43 #define PROGRESS_DEFAULT__ {0,0,0,10,NULL} 44 static const struct progress PROGRESS_DEFAULT = PROGRESS_DEFAULT__; 45 46 /* Structure containing temporary variables used to create polyline on a leaf 47 * These variables are not declared locally within the function responsible for 48 * constructing this polyline in order to avoid having to allocate and 49 * deallocate them with each call to the function. Instead, they are passed as 50 * function inputs so as to share, as much as possible, the memory space 51 * allocated during previous calls to the function, thereby reducing the cost of 52 * allocation and deallocation. */ 53 struct build_leaf_polyline_scratch { 54 struct darray_vertex vertices; /* Polyline vertices */ 55 struct darray_node nodes; /* Dummy leaf nodes */ 56 57 /* Temp vertex buffer used when merging polylines into a single one */ 58 struct darray_vertex vertices_tmp; 59 }; 60 61 static res_T 62 merge_child_polylines 63 (struct sln_tree* tree, 64 const size_t inode, /* Node at which child polylines are merged */ 65 const unsigned nchildren, /* #children to merge */ 66 struct darray_node* nodes, /* List of nodes */ 67 struct darray_vertex* vertices); /* List of polyline vertices */ 68 69 static res_T 70 collapse_child_polylines 71 (struct sln_tree* tree, 72 const size_t inode, /* Node at which child polylines are merged */ 73 const unsigned nchildren, /* #children to collapse */ 74 struct darray_node* nodes, /* List of nodes */ 75 struct darray_vertex* scratch, /* Temporary buffer of vertices */ 76 struct darray_vertex* vertices); /* List of polyline vertices */ 77 78 /* A multiplier to be applied to the calculated ka values in order to mitigate 79 * numerical issues related to ka interpolation in the case of a tree with an 80 * upper bound. This interpolation must ensure that the interpolated ka value is 81 * well above the actual ka value, which may not be the case due to numerical 82 * inaccuracies. Hence this constant, which defines a percentage of ka to be 83 * added to the calculated ka values to ensure that any interpolated value is 84 * well above the actual ka value. */ 85 #define KA_ADJUSTEMENT 1.01 86 87 /* Maximum queue size used for the breadth-first tree traversal. For a perfectly 88 * balanced tree, this corresponds to the maximum number of leaves a tree can 89 * have. Note that this value does not need to be too high, since the 90 * breadth-first traversal is only used to partition the first few levels of the 91 * tree until a sufficient number of subtrees have been identified so that they 92 * can then be constructed in parallel using a depth-first algorithm */ 93 #define QUEUE_SIZE_MAX 256 94 95 /******************************************************************************* 96 * Helper functions 97 ******************************************************************************/ 98 static FINLINE uint32_t 99 ui64_to_ui32(const uint64_t ui64) 100 { 101 if(ui64 > UINT32_MAX) 102 VFATAL("%s: overflow %lu.\n", ARG2(FUNC_NAME, ((unsigned long)ui64))); 103 return (uint32_t)ui64; 104 } 105 106 static FINLINE unsigned 107 size_t_to_unsigned(const size_t sz) 108 { 109 if(sz > UINT_MAX) 110 VFATAL("%s: overflow %lu.\n", ARG2(FUNC_NAME, ((unsigned long)sz))); 111 return (unsigned)sz; 112 } 113 114 static void 115 progress_print(struct sln_device* dev, const struct progress* progress) 116 { 117 ASSERT(dev && progress); 118 if(progress->msg) { 119 INFO(dev, "%s%3d%%\n", progress->msg, progress->pcent); 120 } else { 121 INFO(dev, "%3d%%\n", progress->pcent); 122 } 123 } 124 125 static void 126 progress_update 127 (struct sln_device* dev, 128 struct progress* progress, 129 const size_t count) 130 { 131 int pcent = 0; 132 ASSERT(dev && progress); 133 134 #pragma omp critical 135 { 136 progress->processed += count; 137 ASSERT(progress->processed <= progress->total); 138 139 pcent = (int) 140 ( (double)progress->processed*100.0 141 / (double)progress->total 142 + 0.5 /* round */); 143 144 if(pcent/progress->pcent_step > progress->pcent/progress->pcent_step) { 145 progress->pcent = pcent; 146 progress_print(dev, progress); 147 } 148 } 149 } 150 151 static INLINE void 152 build_leaf_polyline_scratch_init 153 (struct mem_allocator* allocator, 154 struct build_leaf_polyline_scratch* scratch) 155 { 156 ASSERT(scratch); 157 darray_vertex_init(allocator, &scratch->vertices); 158 darray_vertex_init(allocator, &scratch->vertices_tmp); 159 darray_node_init(allocator, &scratch->nodes); 160 } 161 162 static INLINE void 163 build_leaf_polyline_scratch_release 164 (struct build_leaf_polyline_scratch* scratch) 165 { 166 ASSERT(scratch); 167 darray_vertex_release(&scratch->vertices); 168 darray_vertex_release(&scratch->vertices_tmp); 169 darray_node_release(&scratch->nodes); 170 } 171 172 static INLINE res_T 173 build_leaf_polyline_from_1line 174 (struct sln_tree* tree, 175 struct sln_node* leaf, 176 struct darray_vertex* vertices) 177 { 178 size_t vertices_range[2]; 179 res_T res = RES_OK; 180 181 ASSERT(tree && leaf && vertices); 182 183 /* Assume that there is only one line per leaf */ 184 ASSERT(leaf->range[0] == leaf->range[1]); 185 186 /* Line meshing */ 187 res = line_mesh 188 (/* in */ tree, leaf->range[0], tree->args.nvertices_hint, 189 /* out */ vertices, vertices_range); 190 if(res != RES_OK) goto error; 191 192 /* Decimate the line mesh */ 193 res = polyline_decimate(tree->sln, darray_vertex_data_get(vertices), 194 vertices_range, (float)tree->args.mesh_decimation_err, tree->args.mesh_type); 195 if(res != RES_OK) goto error; 196 197 /* Shrink the size of the vertices */ 198 darray_vertex_resize(vertices, vertices_range[1] + 1); 199 200 /* Setup the leaf polyline */ 201 leaf->ivertex = vertices_range[0]; 202 leaf->nvertices = ui64_to_ui32(vertices_range[1] - vertices_range[0] + 1); 203 204 exit: 205 return res; 206 error: 207 goto exit; 208 } 209 210 /* Build the polyline of a leaf node containing more than one line. 211 * 212 * Each line is drawn as if it were the only one on the leaf. This allows the 213 * line meshing routine to be used, and consequently the algorithm that relies 214 * on the properties of the lines (symmetry, monotonicity on one half). Next, 215 * this set of polylines is merged/collapsed into a single polyline, which is 216 * the leaf polyline. 217 * 218 * To reuse the merging/collapsing designed for internal nodes, the function 219 * treats a leaf as if it were an internal node. A list of temporary nodes is 220 * thus created, containing not only the leaf node but also its “children” 221 * i.e., its lines. 222 * 223 * Once created, the leaf’s polyline is finally copied into the tree’s vertex 224 * buffer */ 225 static INLINE res_T 226 build_leaf_polyline_from_Nlines 227 (struct sln_tree* tree, 228 struct sln_node* leaf, 229 struct build_leaf_polyline_scratch* scratch, 230 struct darray_vertex* vertices) 231 { 232 const struct sln_vertex* src = NULL; 233 struct sln_vertex* dst = NULL; 234 size_t memsz = 0; 235 236 size_t nnodes = 0; 237 unsigned nlines = 0; /* Number of lines in the leaf */ 238 unsigned i = 0; 239 res_T res = RES_OK; 240 241 ASSERT(tree && leaf && scratch && vertices); 242 243 /* Compute the number of lines of the leaf */ 244 nlines = size_t_to_unsigned(leaf->range[1] - leaf->range[0] + 1/*inclusive*/); 245 ASSERT(nlines > 1); /* Assume there is more than one line per leaf */ 246 247 nnodes = nlines + 1/*leaf*/; 248 249 /* Helper macro */ 250 #define NODE(Id) (darray_node_data_get(&scratch->nodes) + (Id)) 251 252 /* Allocate the temporary list of nodes, one node per leaf to which on 253 * additionnal node is added for the leaf */ 254 res = darray_node_resize(&scratch->nodes, nnodes); 255 if(res != RES_OK) goto error; 256 memset(NODE(0), 0, sizeof(struct sln_node)*nnodes); 257 258 /* The leaf node will be the first one. Its "child" representing the node of 259 * each lines, will be store after it in the node list */ 260 NODE(0)->offset = 1; 261 NODE(0)->range[0] = leaf->range[0]; 262 NODE(0)->range[1] = leaf->range[1]; 263 264 FOR_EACH(i, 0, nlines) { 265 size_t vertices_range[2] = {0, 0}; /* Range in the vertex buffer */ 266 const size_t ichild = NODE(0)->offset + i; /* Index of the child */ 267 const size_t iline = leaf->range[0] + i; 268 269 /* Mesh the line in the temporary vertex buffer */ 270 res = line_mesh(tree, iline, tree->args.nvertices_hint, &scratch->vertices, 271 vertices_range); 272 if(res != RES_OK) goto error; 273 274 /* Decimate the line mesh */ 275 res = polyline_decimate(tree->sln, 276 darray_vertex_data_get(&scratch->vertices), vertices_range, 277 (float)tree->args.mesh_decimation_err, tree->args.mesh_type); 278 if(res != RES_OK) goto error; 279 280 NODE(ichild)->ivertex = vertices_range[0]; 281 NODE(ichild)->nvertices = ui64_to_ui32(vertices_range[1] - vertices_range[0] + 1); 282 NODE(ichild)->range[0] = iline; 283 NODE(ichild)->range[1] = iline; 284 } 285 286 /* Merge/collapse the polylines of each lines associated to nodes. 287 * These nodes are the "children" of the leaf */ 288 if(tree->args.collapse_polylines) { 289 res = collapse_child_polylines(tree, 0, nlines, 290 &scratch->nodes, &scratch->vertices_tmp, &scratch->vertices); 291 } else { 292 res = merge_child_polylines(tree, 0, nlines, 293 &scratch->nodes, &scratch->vertices); 294 } 295 if(res != RES_OK) goto error; 296 297 /* Setup the leaf */ 298 leaf->ivertex = darray_vertex_size_get(vertices); 299 leaf->nvertices = NODE(0/*leaf*/)->nvertices; 300 301 /* Copy the leaf vertices from temporary buffer to the vertex buffer of the 302 * tree */ 303 res = darray_vertex_resize(vertices, leaf->ivertex + leaf->nvertices); 304 if(res != RES_OK) goto error; 305 src = darray_vertex_cdata_get(&scratch->vertices) + NODE(0/*leaf*/)->ivertex; 306 dst = darray_vertex_data_get(vertices) + leaf->ivertex; 307 memsz = leaf->nvertices * sizeof(*src); 308 memcpy(dst, src, memsz); 309 310 #undef NODE 311 312 exit: 313 return res; 314 error: 315 goto exit; 316 } 317 318 static INLINE res_T 319 build_leaf_polyline 320 (struct sln_tree* tree, 321 struct sln_node* leaf, 322 struct build_leaf_polyline_scratch* scratch, 323 struct darray_vertex* vertices) 324 { 325 size_t nlines = 0; /* Number of lines in the leaf */ 326 ASSERT(tree && leaf); 327 328 nlines = leaf->range[1] - leaf->range[0] + 1; 329 330 if(nlines == 1) { 331 return build_leaf_polyline_from_1line(tree, leaf, vertices); 332 } else { 333 return build_leaf_polyline_from_Nlines(tree, leaf, scratch, vertices); 334 } 335 } 336 337 static INLINE double 338 eval_ka 339 (const struct sln_node* node, 340 const struct sln_vertex* vertices, 341 const double wavenumber) 342 { 343 struct sln_mesh mesh = SLN_MESH_NULL; 344 double ka = 0; 345 ASSERT(node && vertices); 346 347 /* Whether the mesh to be constructed corresponds to the spectrum or its upper 348 * limit, use the node mesh to calculate the value of ka at a given wave 349 * number. Calculating the value from the node lines would take far too long*/ 350 mesh.vertices = vertices + node->ivertex; 351 mesh.nvertices = node->nvertices; 352 ka = sln_mesh_eval(&mesh, wavenumber); 353 return ka; 354 } 355 356 /* Merge all child polylines in a single step. The polylines are merged before 357 * being decimated */ 358 res_T 359 merge_child_polylines 360 (struct sln_tree* tree, 361 const size_t inode, 362 const unsigned nchildren, 363 struct darray_node* nodes, 364 struct darray_vertex* vertex_list) 365 { 366 /* Helper constant */ 367 static const size_t NO_MORE_VERTEX = SIZE_MAX; 368 369 /* Polyline vertices */ 370 #define NCHILDREN_MAX \ 371 ( SLN_TREE_ARITY_MAX > SLN_LEAF_NLINES_MAX \ 372 ? SLN_TREE_ARITY_MAX : SLN_LEAF_NLINES_MAX) 373 size_t children_ivtx[NCHILDREN_MAX] = {0}; 374 375 size_t vertices_range[2] = {0,0}; 376 struct sln_vertex* vertices = NULL; 377 size_t ivtx = 0; 378 size_t nvertices = 0; 379 380 /* Miscellaneous */ 381 unsigned i = 0; 382 res_T res = RES_OK; 383 384 /* Pre-conditions */ 385 ASSERT(tree && inode < darray_node_size_get(nodes)); 386 ASSERT(nchildren >= 2 && nchildren <= NCHILDREN_MAX); 387 388 #define NODE(Id) (darray_node_data_get(nodes) + (Id)) 389 390 /* Compute the number of vertices to be merged, 391 * i.e., the sum of vertices of the children */ 392 nvertices = 0; 393 FOR_EACH(i, 0, nchildren) { 394 const size_t ichild = inode + NODE(inode)->offset + i; 395 nvertices += NODE(ichild)->nvertices; 396 } 397 398 /* Define the vertices range of the merged polyline */ 399 vertices_range[0] = darray_vertex_size_get(vertex_list); 400 vertices_range[1] = vertices_range[0] + nvertices - 1/*inclusive bound*/; 401 402 /* Allocate the memory space to store the new polyline */ 403 res = darray_vertex_resize(vertex_list, vertices_range[1]+1); 404 if(res != RES_OK) { 405 ERROR(tree->sln, "Error in merging polylines -- %s.\n", res_to_cstr(res)); 406 goto error; 407 } 408 vertices = darray_vertex_data_get(vertex_list); 409 410 /* Initialize the vertex index list. For each child, the initial value 411 * corresponds to the index of its first vertex. This index will be 412 * incremented as vertices are merged into the parent polyline. */ 413 FOR_EACH(i, 0, nchildren) { 414 const size_t ichild = inode + NODE(inode)->offset + i; 415 children_ivtx[i] = NODE(ichild)->ivertex; 416 } 417 418 FOR_EACH(ivtx, vertices_range[0], vertices_range[1]+1/*inclusive bound*/) { 419 double ka = 0; 420 double nu = INF; 421 422 /* The number of vertices corresponding to the current wave number for which 423 * the parent ka is calculated. It is at least equal to one, since this nu 424 * is defined by the child vertices, but may be greater if multiple children 425 * share the same vertex, i.e., a ka value calculated for the same nu */ 426 unsigned nvertices_merged = 0; 427 428 /* Find the minimum wave number among the vertices of the child vertices 429 * that are candidates for merging */ 430 FOR_EACH(i, 0, nchildren) { 431 const size_t child_ivtx = children_ivtx[i]; 432 if(child_ivtx != NO_MORE_VERTEX) { 433 nu = MMIN(nu, vertices[child_ivtx].wavenumber); 434 } 435 } 436 ASSERT(nu != INF); /* At least one vertex must have been found */ 437 438 /* Compute the value of ka at the wave number determined above */ 439 FOR_EACH(i, 0, nchildren) { 440 const size_t child_ivtx = children_ivtx[i]; 441 const struct sln_node* child = NODE(inode + NODE(inode)->offset + i); 442 443 if(child_ivtx == NO_MORE_VERTEX 444 || nu != vertices[child_ivtx].wavenumber) { 445 /* The wave number does not correspond to a vertex in the current 446 * child's mesh. Therefore, its contribution to the parent node's ka is 447 * computed */ 448 ka += eval_ka(child, vertices, nu); 449 450 } else { 451 /* The wave number is the one for which the child node stores a ka 452 * value. Add it to the parent node's ka value and designate the child's 453 * next vertex as a candidate for merging into the parent. The exception 454 * is when all vertices of the child have already been merged. In this 455 * case, report that the child no longer has any candidate vertices */ 456 ka += vertices[child_ivtx].ka; 457 ++children_ivtx[i]; 458 if(children_ivtx[i] >= child->ivertex + child->nvertices) { 459 children_ivtx[i] = NO_MORE_VERTEX; 460 } 461 ++nvertices_merged; /* Record that a vertex has been merged */ 462 } 463 } 464 465 /* Setup the parent vertex */ 466 vertices[ivtx].wavenumber = (float)nu; 467 vertices[ivtx].ka = (float)ka; 468 469 /* If multiple child vertices have been merged, then a single wave number 470 * corresponds to a vertex with multiple children. The number of parent 471 * vertices is therefore no longer the sum of the number of its children's 472 * vertices, since some vertices are duplicated. Hence the following 473 * adjustment, which removes the duplicate vertices. */ 474 vertices_range[1] -= (nvertices_merged-1); 475 } 476 477 /* Decimate the resulting polyline */ 478 res = polyline_decimate(tree->sln, darray_vertex_data_get(vertex_list), 479 vertices_range, (float)tree->args.mesh_decimation_err, tree->args.mesh_type); 480 if(res != RES_OK) goto error; 481 482 /* Setup the node polyline */ 483 NODE(inode)->ivertex = vertices_range[0]; 484 NODE(inode)->nvertices = ui64_to_ui32(vertices_range[1] - vertices_range[0] + 1); 485 486 /* It is necessary to ensure that the recorded vertices define a polyline 487 * along which any value (calculated by linear interpolation) is well above 488 * the sum of the corresponding values of the polylines to be merged. However, 489 * although this is guaranteed by definition for the vertices of the polyline, 490 * numerical uncertainty may nevertheless introduce errors that violate this 491 * criterion. Hence the following adjustment, which slightly increases the ka 492 * of the mesh so as to guarantee this constraint between the mesh of a node 493 * and that of its children */ 494 if(tree->args.mesh_type == SLN_MESH_UPPER) { 495 FOR_EACH(ivtx, vertices_range[0], vertices_range[1]+1/*inclusive bound*/) { 496 const double ka = vertices[ivtx].ka; 497 vertices[ivtx].ka = (float)(ka*KA_ADJUSTEMENT); 498 } 499 } 500 501 /* Shrink the size of the vertices */ 502 darray_vertex_resize(vertex_list, vertices_range[1] + 1); 503 504 #undef NODE 505 #undef NCHILDREN_MAX 506 507 exit: 508 return res; 509 error: 510 goto exit; 511 } 512 513 /* Merge child polylines by combining them in pairs (the resulting polyline is 514 * then simplified), and repeat this process until only a single polyline 515 * remains. This polyline becomes the node's polyline */ 516 static res_T 517 collapse_child_polylines 518 (struct sln_tree* tree, 519 const size_t inode, 520 const unsigned nchildren, 521 struct darray_node* nodes, 522 struct darray_vertex* scratch, 523 struct darray_vertex* vertex_list) 524 { 525 /* Polylines to be collapsed, i.e., the ids of their first and last vertices */ 526 #define NCHILDREN_MAX \ 527 ( SLN_TREE_ARITY_MAX > SLN_LEAF_NLINES_MAX \ 528 ? SLN_TREE_ARITY_MAX : SLN_LEAF_NLINES_MAX) 529 size_t poly_parts[NCHILDREN_MAX][2] = {0}; 530 size_t nparts; 531 532 /* Indices of the first and last vertices of the resulting polyline. 533 * These indices are absolute to the tree's vertex list */ 534 size_t vertices_range[2] = {0,0}; 535 536 /* Redux double buffering */ 537 struct sln_vertex* buf[2] = {NULL, NULL}; 538 int r, w; /* Index of the buffer in read/write */ 539 540 /* Miscellaneous */ 541 struct sln_vertex* vertices = NULL; /* Pointer to the tree's vertex buffer */ 542 size_t range_merge[2] = {0,0}; /* vertex range of a merged polyline */ 543 size_t nvertices = 0; /* #vertices of the resulting polyline */ 544 size_t ivtx = 0; 545 unsigned i = 0; 546 int ncollapses = 0; /* Number of collapse steps */ 547 res_T res = RES_OK; 548 549 /* Pre-conditions */ 550 ASSERT(tree && inode < darray_node_size_get(nodes)); 551 ASSERT(nchildren >= 2 && nchildren <= NCHILDREN_MAX); 552 553 #define NODE(Id) (darray_node_data_get(nodes) + (Id)) 554 555 /* Compute the number of vertices to be merged, 556 * i.e., the sum of vertices of the children */ 557 nvertices = 0; 558 FOR_EACH(i, 0, nchildren) { 559 const size_t ichild = inode + NODE(inode)->offset + i; 560 nvertices += NODE(ichild)->nvertices; 561 } 562 563 /* Define the vertices range of the merged polyline */ 564 vertices_range[0] = darray_vertex_size_get(vertex_list); 565 vertices_range[1] = vertices_range[0] + nvertices - 1/*inclusive bound*/; 566 567 /* Allocate the memory space required to store the new polyline and the 568 * temporary buffer. This is the memory space in which the collapse 569 * procedure's double buffering will take place. Each buffer will be used 570 * alternately as a read/write buffer when reducing the polylines of the 571 * child nodes */ 572 if((res = darray_vertex_resize(vertex_list, vertices_range[1]+1)) != RES_OK 573 || (res = darray_vertex_resize(scratch, nvertices)) != RES_OK) { 574 ERROR(tree->sln, "Error in merging polylines -- %s.\n", res_to_cstr(res)); 575 goto error; 576 } 577 578 /* Recover the memory space of the tree's vertices */ 579 vertices = darray_vertex_data_get(vertex_list); 580 581 /* Recover the memory space to be used in the Redux process. 582 * 583 * In the vertex buffer, this refers to the newly allocated memory space used 584 * during the collapse. The beginning of the buffer contains the vertices of 585 * the registered nodes and must therefore not be modified. At the end of the 586 * collapse, this space will contain the vertices of the polylines resulting 587 * from the collapse of the child nodes' polylines. 588 * 589 * The scratch buffer is used as-is, since its sole purpose is to temporarily 590 * store the vertices of the polylines to be merged. */ 591 buf[0] = vertices + vertices_range[0]; 592 buf[1] = darray_vertex_data_get(scratch); 593 594 /* Initially, the number of partitions to be reduced is equal to the number of 595 * children */ 596 nparts = nchildren; 597 598 /* Calculate the number of reduction steps, i.e., the logarithm of the power 599 * of 2 that is greater than or equal to the number of polylines to be 600 * reduced, to which one is added to obtain the number of steps, not the index 601 * of the last step */ 602 ncollapses = log2i((int)round_up_pow2(nparts)) + 1; 603 604 /* Set the index of the write buffer so that, once the collapse process is 605 * complete, the last write buffer is the one whose memory space is allocated 606 * in the tree's vertex buffer. This way, no additional copies will be needed 607 * to store the result of the reduction */ 608 w = ncollapses % 2 ? 0 : 1; 609 610 /* Initialize the polylines to be reduced, i.e., copy the child vertices into 611 * a collapse buffer and record their index ranges */ 612 ivtx = 0; 613 for(i = 0; i < nchildren; ++i) { 614 const size_t ichild = inode + NODE(inode)->offset + i; 615 const struct sln_node* child = NODE(ichild); 616 const size_t memsz = child->nvertices * sizeof(struct sln_vertex); 617 const struct sln_vertex* src = vertices + child->ivertex; 618 struct sln_vertex* dst = buf[w] + ivtx; 619 620 memcpy(dst, src, memsz); 621 poly_parts[i][0] = ivtx; 622 poly_parts[i][1] = ivtx + child->nvertices - 1/*inclusive bound*/; 623 624 ivtx += child->nvertices; 625 } 626 627 r = w; /* Index of the buffer from which to read the data */ 628 w =!r; /* Index of the buffer into which to write the data */ 629 630 631 /* As long as the number of segments is not equal to one, there are still 632 * polylines to be merged in pairs */ 633 while(nparts > 1) { 634 size_t ipart = 0; 635 636 for(ipart=0; ipart < nparts-1; ipart+=2) { 637 struct sln_mesh mesh0 = SLN_MESH_NULL; 638 struct sln_mesh mesh1 = SLN_MESH_NULL; 639 640 /* Retrieve the two polylines to be merged */ 641 const size_t* part0 = poly_parts[ipart+0]; 642 const size_t* part1 = poly_parts[ipart+1]; 643 644 /* Calculate the partition index of the merged polyline, that is, the index 645 * that will contain the information for the polyline resulting from the 646 * merge: the first and last indices of its in the vertex array currently 647 * being written */ 648 const size_t ipart_merge = ipart/2; 649 650 /* Compute the number of vertices of the merged polyline */ 651 const size_t nvtx = 652 ((part0[1] - part0[0]) + 1/*inclusive bound*/) 653 + ((part1[1] - part1[0]) + 1/*inclusive bound*/); 654 655 /* For each child, initialized its vertex index to its first vertex. This 656 * index will be incremented as vertices are merged into the merged 657 * polyline */ 658 size_t ivtx0 = part0[0]; 659 size_t ivtx1 = part1[0]; 660 661 /* Set the vertex index range for the merged polyline in the write buffer */ 662 range_merge[0] = part0[0]; 663 range_merge[1] = range_merge[0] + nvtx-1/*inclusive bound*/; 664 665 /* Setup the mesh of the two polylines to be merged */ 666 mesh0.vertices = buf[r] + part0[0]; mesh0.nvertices = part0[1]-part0[0]+1; 667 mesh1.vertices = buf[r] + part1[0]; mesh1.nvertices = part1[1]-part1[0]+1; 668 669 /* Merge the polylines */ 670 FOR_EACH(ivtx, range_merge[0], range_merge[1]+1/*inclusive bound*/) { 671 const double nu0 = ivtx0 <= part0[1] ? buf[r][ivtx0].wavenumber : INF; 672 const double nu1 = ivtx1 <= part1[1] ? buf[r][ivtx1].wavenumber : INF; 673 double ka = 0; 674 double nu = 0; 675 676 /* Find the minimum wave number among the vertices of the child vertices 677 * that are candidates for merging */ 678 if(nu0 < nu1) { /* The vertex comes from the child0 */ 679 nu = buf[r][ivtx0].wavenumber; 680 ka = buf[r][ivtx0].ka + sln_mesh_eval(&mesh1, nu); 681 ++ivtx0; 682 683 } else if(nu0 > nu1) { /* The vertex comes from the child1 */ 684 nu = buf[r][ivtx1].wavenumber; 685 ka = buf[r][ivtx1].ka + sln_mesh_eval(&mesh0, nu); 686 ++ivtx1; 687 688 } else { /* The vertex is shared by node0 and node1 */ 689 nu = buf[r][ivtx0].wavenumber; 690 ka = buf[r][ivtx0].ka + buf[r][ivtx1].ka; 691 --range_merge[1]; /* Remove duplicate */ 692 ++ivtx0; 693 ++ivtx1; 694 } 695 buf[w][ivtx].wavenumber = (float)nu; 696 buf[w][ivtx].ka = (float)ka; 697 } 698 699 /* Decimate the resulting polyline */ 700 res = polyline_decimate(tree->sln, buf[w], range_merge, 701 (float)tree->args.mesh_decimation_err, tree->args.mesh_type); 702 if(res != RES_OK) goto error; 703 704 /* Setup the partition of the merge polyline */ 705 poly_parts[ipart_merge][0] = range_merge[0]; 706 poly_parts[ipart_merge][1] = range_merge[1]; 707 } 708 709 /* If there is a polyline that has not been merged, copy its vertices to the 710 * write buffer so that it can be processed in the next reduction step */ 711 if(nparts % 2) { 712 const size_t* remain_part = poly_parts[nparts-1]; 713 const size_t nvtx = remain_part[1] - remain_part[0] + 1/*inclusive*/; 714 const size_t memsz = nvtx * sizeof(struct sln_vertex); 715 const struct sln_vertex* src = buf[r] + remain_part[0]; 716 struct sln_vertex* dst = buf[w] + remain_part[0]; 717 718 memcpy(dst, src, memsz); 719 poly_parts[nparts/2][0] = remain_part[0]; 720 poly_parts[nparts/2][1] = remain_part[1]; 721 } 722 723 #ifndef NDEBUG 724 FOR_EACH(i, 1, (nparts+1/*ceil*/)/2) { 725 ASSERT(poly_parts[i][0] > poly_parts[i-1][1]); 726 } 727 #endif 728 729 /* Update the number of partitions to be reduced */ 730 nparts = (nparts + 1/*ceil*/)/2; 731 732 /* Swap read/write buffers */ 733 r = !r; 734 w = !w; 735 } 736 737 nvertices = (range_merge[1] - range_merge[0]) + 1/*inclusive bound*/; 738 vertices_range[1] = vertices_range[0] + nvertices - 1/*inclusive bound*/; 739 740 /* Assumed that double buffering was configured to ensure that the resulting 741 * polyline is stored in the tree vertex buffer */ 742 ASSERT(buf[r] != darray_vertex_cdata_get(scratch)); 743 744 /* Setup the node */ 745 NODE(inode)->ivertex = vertices_range[0]; 746 NODE(inode)->nvertices = ui64_to_ui32(nvertices); 747 748 /* It is necessary to ensure that the recorded vertices define a polyline 749 * along which any value (calculated by linear interpolation) is well above 750 * the sum of the corresponding values of the polylines to be merged. However, 751 * although this is guaranteed by definition for the vertices of the polyline, 752 * numerical uncertainty may nevertheless introduce errors that violate this 753 * criterion. Hence the following adjustment, which slightly increases the ka 754 * of the mesh so as to guarantee this constraint between the mesh of a node 755 * and that of its children */ 756 if(tree->args.mesh_type == SLN_MESH_UPPER) { 757 FOR_EACH(ivtx, vertices_range[0], vertices_range[1]+1/*inclusive bound*/) { 758 const double ka = vertices[ivtx].ka; 759 vertices[ivtx].ka = (float)(ka*KA_ADJUSTEMENT); 760 } 761 } 762 763 /* Shrink the size of the vertices */ 764 darray_vertex_resize(vertex_list, vertices_range[1] + 1); 765 766 #undef NCHILDREN 767 #undef NODE 768 769 exit: 770 return res; 771 error: 772 goto exit; 773 } 774 static res_T 775 build_polylines 776 (struct sln_tree* tree, 777 const size_t root_index, 778 const size_t nodes_count, /* Total number of nodes in the tree (for debug) */ 779 struct darray_vertex* vertices, 780 struct progress* progress) 781 { 782 /* Stack */ 783 #define STACK_SIZE (SLN_TREE_DEPTH_MAX*SLN_TREE_ARITY_MAX) 784 size_t stack[STACK_SIZE]; 785 size_t istack = 0; 786 787 /* Progress */ 788 size_t nnodes_processed = 0; 789 790 /* Miscellaneous */ 791 struct build_leaf_polyline_scratch scratch; 792 size_t inode = 0; 793 res_T res = RES_OK; 794 795 ASSERT(tree && nodes_count != 0); 796 ASSERT(root_index < darray_node_size_get(&tree->nodes)); 797 (void)nodes_count; /* Avoid "Unused variable" warning */ 798 799 build_leaf_polyline_scratch_init(tree->sln->allocator, &scratch); 800 801 #define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id)) 802 #define IS_LEAF(Id) (NODE(Id)->offset == 0) 803 804 /* Push back SIZE_MAX which, once pop up, will mark the end of recursion */ 805 stack[istack++] = SIZE_MAX; 806 807 inode = root_index; /* Root node */ 808 while(inode != SIZE_MAX) { 809 const size_t istack_saved = istack; 810 811 if(IS_LEAF(inode)) { 812 res = build_leaf_polyline(tree, NODE(inode), &scratch, vertices); 813 if(res != RES_OK) goto error; 814 815 inode = stack[--istack]; /* Pop the next node */ 816 817 } else { 818 const size_t ichild0 = inode + NODE(inode)->offset + 0; 819 const struct sln_node* node = darray_node_cdata_get(&tree->nodes)+inode; 820 const unsigned nchildren = node_child_count(node, tree->args.arity); 821 int child_polylines_are_missing = 1; 822 size_t i = 0; 823 824 FOR_EACH(i, 0, nchildren) { 825 child_polylines_are_missing = NODE(ichild0 + i)->nvertices == 0; 826 if(child_polylines_are_missing) break; 827 } 828 829 /* Child nodes have their polyline created */ 830 if(!child_polylines_are_missing) { 831 #ifndef NDEBUG 832 /* Check that all children have their polylines created */ 833 FOR_EACH(i, 1, nchildren) { 834 const size_t ichild = ichild0 + i; 835 ASSERT(NODE(ichild)->nvertices != 0); 836 } 837 #endif 838 if(tree->args.collapse_polylines) { 839 res = collapse_child_polylines(tree, inode, nchildren, &tree->nodes, 840 &scratch.vertices_tmp, vertices); 841 } else { 842 res = merge_child_polylines 843 (tree, inode, nchildren, &tree->nodes, vertices); 844 } 845 if(res != RES_OK) goto error; 846 847 inode = stack[--istack]; /* Pop the next node */ 848 849 /* Child nodes have NOT their polyline created */ 850 } else { 851 ASSERT(istack + (nchildren - 1/*ichild0*/ + 1/*inode*/) <= STACK_SIZE); 852 stack[istack++] = inode; /* Push the current node */ 853 854 /* Push the child nodes, except for those whose polyline has already 855 * been constructed, as is the case when the child node was the root 856 * of a separately constructed subtree */ 857 FOR_EACH_REVERSE(i, nchildren, 0) { 858 const size_t ichild = ichild0 + i-1; 859 if(NODE(ichild)->nvertices == 0) stack[istack++] = ichild; 860 } 861 862 /* Ensure that at least one child was pushed */ 863 ASSERT(stack[istack-1] != inode); 864 865 inode = stack[--istack]; /* Recursively build the polyline of the 1st child */ 866 } 867 } 868 869 /* Handle progression bar */ 870 if(istack < istack_saved) { 871 size_t nnodes = istack_saved - istack; 872 873 nnodes_processed += nnodes; 874 progress_update(tree->sln, progress, nnodes); 875 } 876 } 877 ASSERT(nnodes_processed == nodes_count); 878 879 #undef NODE 880 #undef IS_LEAF 881 #undef LOG_MSG 882 #undef STACK_SIZE 883 884 exit: 885 build_leaf_polyline_scratch_release(&scratch); 886 return res; 887 error: 888 goto exit; 889 } 890 891 static res_T 892 partition_lines_depth_first 893 (struct sln_tree* tree, 894 const size_t root_index, 895 struct darray_node* nodes, 896 struct progress* progress) 897 { 898 /* Stack */ 899 #define STACK_SIZE (SLN_TREE_DEPTH_MAX*(SLN_TREE_ARITY_MAX-1)) 900 size_t stack[STACK_SIZE]; 901 size_t istack = 0; 902 903 /* Progress */ 904 size_t nlines_total = 0; 905 size_t nlines_processed = 0; 906 907 /* Miscellaneous */ 908 size_t inode = 0; 909 res_T res = RES_OK; 910 911 /* Pre-condition */ 912 ASSERT(tree && nodes && progress); 913 ASSERT(root_index < darray_node_size_get(nodes)); 914 (void)nlines_total; /* Avoid "Unused variable" warning */ 915 916 #define NODE(Id) (darray_node_data_get(nodes) + (Id)) 917 #define CREATE_NODE { \ 918 res = darray_node_push_back(nodes, &SLN_NODE_NULL); \ 919 if(res != RES_OK) goto error; \ 920 } (void)0 921 922 nlines_total = NODE(root_index)->range[1] - NODE(root_index)->range[0] + 1; 923 nlines_processed = 0; 924 925 /* Push back SIZE_MAX which, once pop up, will mark the end of recursion */ 926 stack[istack++] = SIZE_MAX; 927 928 inode = root_index; /* Root node */ 929 while(inode != SIZE_MAX) { 930 /* #lines into the node */ 931 size_t nlines_node = NODE(inode)->range[1] - NODE(inode)->range[0] + 1; 932 933 /* Make a leaf */ 934 if(nlines_node <= tree->args.leaf_nlines) { 935 936 NODE(inode)->offset = 0; 937 inode = stack[--istack]; /* Pop the next node */ 938 939 nlines_processed += nlines_node; /* For debug */ 940 941 progress_update(tree->sln, progress, nlines_node); 942 943 /* Split the node */ 944 } else { 945 size_t node_range[2] = {0,0}; 946 size_t nlines_child_min = 0; /* Min #lines per child */ 947 size_t nlines_remain = 0; 948 size_t nchildren = 0; 949 size_t iline = 0; 950 size_t i = 0; 951 952 /* Calculate the index of the first child */ 953 size_t ichildren = darray_node_size_get(nodes); 954 955 /* Compute how the number of children that node has */ 956 nchildren = node_child_count(NODE(inode), tree->args.arity); 957 958 ASSERT(nchildren <= tree->args.arity); 959 ASSERT(ichildren > inode); 960 961 node_range[0] = NODE(inode)->range[0]; 962 node_range[1] = NODE(inode)->range[1]; 963 964 /* Define the offset from the current node to its children */ 965 NODE(inode)->offset = ui64_to_ui32((uint64_t)(ichildren - inode)); 966 967 nlines_child_min = nlines_node / nchildren; 968 nlines_remain = nlines_node % nchildren; 969 970 iline = node_range[0]; 971 FOR_EACH(i, 0, nchildren) { 972 /* Compute the number of lines per child. Start by assigning the minimum 973 * number of lines to each child, then distribute the remaining lines 974 * among the first children */ 975 size_t nlines_child = nlines_child_min + (i < nlines_remain); 976 977 CREATE_NODE; 978 979 /* Set the range of lines line for the newly created child. Note that 980 * the boundaries of the range are inclusive, which is why 1 is 981 * subtracted to the upper bound */ 982 NODE(ichildren+i)->range[0] = iline; 983 NODE(ichildren+i)->range[1] = iline + nlines_child - 1/*inclusive bound*/; 984 iline += nlines_child; 985 986 /* Check that the child's lines are a subset of the parent's lines */ 987 ASSERT(NODE(ichildren+i)->range[0] >= node_range[0]); 988 ASSERT(NODE(ichildren+i)->range[1] <= node_range[1]); 989 } 990 991 inode = ichildren; /* Make the first child the current node */ 992 993 /* Push the other children */ 994 ASSERT(istack + (nchildren-1/*1st child*/) <= STACK_SIZE); 995 FOR_EACH_REVERSE(i, nchildren-1, 0) stack[istack++] = ichildren + i; 996 } 997 } 998 ASSERT(nlines_processed == nlines_total); 999 1000 #undef NODE 1001 #undef CREATE_NODE 1002 #undef STACK_SIZE 1003 1004 exit: 1005 return res; 1006 error: 1007 goto exit; 1008 } 1009 1010 static res_T 1011 partition_lines_breadth_first 1012 (struct sln_tree* tree, 1013 const size_t root_index, 1014 const unsigned nleaves_max_hint) /* Advice on the maxium of leafs to create */ 1015 { 1016 /* Static memory space of the queue */ 1017 struct item { 1018 struct list_node link; 1019 size_t inode; 1020 } items[QUEUE_SIZE_MAX]; 1021 1022 /* Linked lists for managing queue data */ 1023 struct list_node free_items; 1024 struct list_node queue; 1025 size_t nnodes = 0; /* Number of enqueud nodes */ 1026 1027 /* Miscellaneous */ 1028 const size_t nleaves_max = MMIN(nleaves_max_hint, QUEUE_SIZE_MAX); 1029 size_t nlines_total = 0; 1030 size_t nleaves = 0; 1031 size_t i = 0; 1032 res_T res = RES_OK; 1033 1034 /* Pre-conditions */ 1035 ASSERT(tree); 1036 ASSERT(root_index < darray_node_size_get(&tree->nodes)); 1037 1038 /* Macros to help in managing nodes */ 1039 #define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id)) 1040 #define CREATE_NODE { \ 1041 res = darray_node_push_back(&tree->nodes, &SLN_NODE_NULL); \ 1042 if(res != RES_OK) goto error; \ 1043 } (void)0 1044 1045 /* Helper macro for the queue data structure */ 1046 #define ENQUEUE(INode) { \ 1047 struct item* item__ = NULL; \ 1048 ASSERT(!is_list_empty(&free_items)); \ 1049 item__ = CONTAINER_OF(list_head(&free_items), struct item, link); \ 1050 item__->inode = INode; \ 1051 list_move_tail(&item__->link, &queue); \ 1052 ++nnodes; \ 1053 } 1054 #define DEQUEUE \ 1055 (list_move_tail(list_head(&queue), &free_items), \ 1056 --nnodes, \ 1057 CONTAINER_OF(list_tail(&free_items), struct item, link)->inode) 1058 1059 /* Setup the linked lists */ 1060 list_init(&free_items); 1061 list_init(&queue); 1062 FOR_EACH(i, 0, sizeof(items)/sizeof(items[0])) { 1063 items[i].inode = SIZE_MAX; 1064 list_init(&items[i].link); 1065 list_add_tail(&free_items, &items[i].link); 1066 } 1067 1068 SHTR(line_list_get_size(tree->args.lines, &nlines_total)); 1069 1070 /* Setup the root node */ 1071 NODE(root_index)->range[0] = 0; 1072 NODE(root_index)->range[1] = nlines_total - 1; 1073 1074 /* Register the root node in the queue */ 1075 ENQUEUE(root_index); 1076 1077 /* Breadth-first partitioning */ 1078 while(!is_list_empty(&queue)) { 1079 size_t node_range[2] = {0,0}; 1080 size_t nlines_node = 0; /* #lines in the node */ 1081 size_t nlines_child_min = 0; /* Min #lines per child */ 1082 size_t nlines_remain = 0; /* Lines to be distributed among the children */ 1083 size_t nchildren = 0; /* #children of the node */ 1084 size_t ichildren = 0; /* Index of the node's first child */ 1085 size_t iline = 0; 1086 size_t inode = 0; 1087 1088 inode = DEQUEUE; /* Get the current node */ 1089 1090 /* Retrieve the range of lines contained within the node */ 1091 node_range[0] = NODE(inode)->range[0]; 1092 node_range[1] = NODE(inode)->range[1]; 1093 nlines_node = node_range[1] - node_range[0] + 1/*inclusive bound*/; 1094 ASSERT(nlines_node >= tree->args.leaf_nlines); 1095 1096 if(nlines_node <= tree->args.leaf_nlines) { 1097 /* Make a leaf */ 1098 ++nleaves; 1099 continue; 1100 } 1101 1102 /* Compute the number of children that node has */ 1103 nchildren = node_child_count(NODE(inode), tree->args.arity); 1104 ASSERT(nchildren <= tree->args.arity); 1105 1106 /* Check whether, after adding the child nodes to the queue, the total 1107 * number of nodes in the queue, plus the number of leaves already created, 1108 * does not exceed the maximum allowed number of leaves. If so, the 1109 * breadth-first partitioning is complete and the nodes currently in the 1110 * queue are all leaves */ 1111 if(nnodes + nchildren + nleaves > nleaves_max) { 1112 nleaves += nnodes + 1/*Do not forget the the current node*/; 1113 break; 1114 } 1115 1116 /* Calculate the index of the first child */ 1117 ichildren = darray_node_size_get(&tree->nodes); 1118 ASSERT(ichildren > inode); 1119 1120 /* Define the offset from the current node to its children */ 1121 NODE(inode)->offset = ui64_to_ui32((uint64_t)(ichildren - inode)); 1122 1123 /* Calculate the minimum number of lines per child and the number of 1124 * remaining lines to be distributed equally among them */ 1125 nlines_child_min = nlines_node / nchildren; 1126 nlines_remain = nlines_node % nchildren; 1127 1128 iline = node_range[0]; 1129 FOR_EACH(i, 0, nchildren) { 1130 /* Compute the number of lines per child. Start by assigning the minimum 1131 * number of lines to each child, then distribute the remaining lines 1132 * among the first children */ 1133 size_t nlines_child = nlines_child_min + (i < nlines_remain); 1134 1135 CREATE_NODE; 1136 1137 /* Set the range of lines line for the newly created child. Note that 1138 * the boundaries of the range are inclusive, which is why 1 is 1139 * subtracted to the upper bound */ 1140 NODE(ichildren+i)->range[0] = iline; 1141 NODE(ichildren+i)->range[1] = iline + nlines_child - 1/*inclusive bound*/; 1142 iline += nlines_child; 1143 1144 /* Check that the child's lines are a subset of the parent's lines */ 1145 ASSERT(NODE(ichildren+i)->range[0] >= node_range[0]); 1146 ASSERT(NODE(ichildren+i)->range[1] <= node_range[1]); 1147 1148 ENQUEUE(ichildren+i); /* Register the child node */ 1149 } 1150 } 1151 1152 #undef NODE 1153 #undef CREATE_NODE 1154 #undef ENQUEUE 1155 #undef DEQUEUE 1156 1157 exit: 1158 return res; 1159 error: 1160 goto exit; 1161 } 1162 1163 static res_T 1164 build_subtrees 1165 (struct sln_tree* tree, 1166 const size_t subtrees[], 1167 unsigned nsubtrees, 1168 struct progress* meshing) 1169 { 1170 /* Subtree temporary data */ 1171 struct scratch { 1172 struct darray_node nodes; 1173 struct darray_vertex vertices; 1174 size_t nnodes; 1175 }* scratches; 1176 1177 /* Miscellaneous */ 1178 struct progress partitioning = PROGRESS_DEFAULT; 1179 struct mem_allocator* allocator = NULL; 1180 size_t nlines = 0; 1181 unsigned nthreads = 0; 1182 int i = 0; 1183 ATOMIC res = RES_OK; 1184 1185 /* Pre-conditions */ 1186 ASSERT(tree && subtrees && nsubtrees >= 1); 1187 ASSERT(nsubtrees < INT_MAX); 1188 1189 #define NODE(Id) (darray_node_data_get(&tree->nodes) + (Id)) 1190 #define IS_LEAF(Buf, Id) (darray_node_cdata_get(Buf)[Id].offset == 0) 1191 1192 allocator = tree->sln->allocator; 1193 1194 /* Allocate the per subtree scratch */ 1195 scratches = MEM_CALLOC(allocator, nsubtrees, sizeof(*scratches)); 1196 if(!scratches) { res = RES_MEM_ERR; goto error; } 1197 FOR_EACH(i, 0, (int)nsubtrees) { 1198 darray_node_init(allocator, &scratches[i].nodes); 1199 darray_vertex_init(allocator, &scratches[i].vertices); 1200 } 1201 1202 /* Setup the progress bar and print that although nothing has been done yet, 1203 * the calculation is nevertheless in progress */ 1204 SHTR(line_list_get_size(tree->args.lines, &nlines)); 1205 partitioning.total = nlines; 1206 partitioning.msg = "Partitioning: "; 1207 progress_print(tree->sln, &partitioning); 1208 1209 /* Set the number of threads to use. The advice on the number of threads must 1210 * not exceed the number of threads available on the machine (see the 1211 * sln_tree_create function); the caller can only specify a lower number of 1212 * threads. urthermore, the number of subtrees is calculated so that each subtree 1213 * corresponds to at least one thread, ensuring that there are no more 1214 * subtrees than available threads. 1215 * 1216 * The actual number of threads to be used for constructing the subtrees in 1217 * parallel should therefore always be set to the number of subtrees. However, 1218 * to provide greater flexibility in the distribution of threads or the number 1219 * of subtrees, the number of threads is calculated below as the minimum 1220 * between the number of available threads and the number of subtrees */ 1221 nthreads = MMIN(tree->args.nthreads_hint, nsubtrees); 1222 omp_set_num_threads((int)nthreads); 1223 1224 #pragma omp parallel for 1225 for(i=0; i < (int)nsubtrees; ++i) { 1226 struct sln_node root = SLN_NODE_NULL; 1227 struct scratch* scratch = scratches + i; 1228 size_t nnodes_s = 0; 1229 size_t nnodes_t = 0; 1230 res_T res2 = RES_OK; 1231 1232 /* Skip the remaining subtrees in case of an error */ 1233 if(ATOMIC_GET(&res) != RES_OK) continue; 1234 1235 /* Copy the subtree root in the temporary node buffer */ 1236 #pragma omp critical 1237 { 1238 root = *NODE(subtrees[i]); 1239 } 1240 res2 = darray_node_push_back(&scratch->nodes, &root); 1241 if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; } 1242 1243 /* Partition the line */ 1244 res2 = partition_lines_depth_first 1245 (tree, 0/*root*/, &scratch->nodes, &partitioning); 1246 if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; } 1247 1248 #pragma omp critical 1249 { 1250 struct sln_node* dst = NULL; 1251 const struct sln_node* src = NULL; 1252 1253 /* Increase the node buffer to store the subtree nodes */ 1254 nnodes_s = darray_node_size_get(&scratch->nodes); 1255 nnodes_t = darray_node_size_get(&tree->nodes); 1256 res2 = darray_node_resize(&tree->nodes, nnodes_t + nnodes_s - 1/*root*/); 1257 if(res2 == RES_OK) { 1258 1259 if(!IS_LEAF(&scratch->nodes, 0)) { 1260 /* Set the offset to the first child of the root node of the subtree */ 1261 NODE(subtrees[i])->offset = ui64_to_ui32(nnodes_t - subtrees[i]); 1262 } 1263 1264 /* Copy the subtree nodes in the main node buffer */ 1265 src = darray_node_cdata_get(&scratch->nodes) + 1/*discard root*/; 1266 dst = darray_node_data_get(&tree->nodes) + nnodes_t; 1267 memcpy(dst, src, (nnodes_s-1/*discard root*/)*sizeof(*src)); 1268 } 1269 } 1270 if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; } 1271 1272 /* Free the temporary buffer but retain the number of nodes in the subtree. 1273 * This number is used later when constructing polylines (see below) */ 1274 darray_node_purge(&scratch->nodes); 1275 scratch->nnodes = nnodes_s; 1276 } 1277 1278 /* Setup the progress bar and print that although nothing has been done yet, 1279 * the calculation is nevertheless in progress */ 1280 *meshing = PROGRESS_DEFAULT; 1281 meshing->total = darray_node_size_get(&tree->nodes); 1282 meshing->msg = "Meshing: "; 1283 progress_print(tree->sln, meshing); 1284 1285 #pragma omp parallel for 1286 for(i=0; i < (int)nsubtrees; ++i) { 1287 struct scratch* scratch = scratches + i; 1288 size_t inode = subtrees[i]; 1289 size_t nverts_s = 0; 1290 size_t nverts_t = 0; 1291 size_t j = 0; 1292 res_T res2 = RES_OK; 1293 1294 /* Skip the remaining subtrees in case of an error */ 1295 if(ATOMIC_GET(&res) != RES_OK) continue; 1296 1297 res2 = build_polylines 1298 (tree, inode, scratch->nnodes, &scratch->vertices, meshing); 1299 if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; } 1300 1301 #pragma omp critical 1302 { 1303 struct sln_vertex* dst = NULL; 1304 const struct sln_vertex* src = NULL; 1305 1306 /* Increase the vertex buffer to store the subtree polylines */ 1307 nverts_s = darray_vertex_size_get(&scratch->vertices); 1308 nverts_t = darray_vertex_size_get(&tree->vertices); 1309 res2 = darray_vertex_resize(&tree->vertices, nverts_t + nverts_s); 1310 if(res2 == RES_OK) { 1311 /* Copy the subtree nodes in the main vertex buffer */ 1312 src = darray_vertex_cdata_get(&scratch->vertices); 1313 dst = darray_vertex_data_get(&tree->vertices) + nverts_t; 1314 memcpy(dst, src, nverts_s*sizeof(*src)); 1315 } 1316 } 1317 if(res2 != RES_OK) { ATOMIC_SET(&res, res2); continue; } 1318 1319 /* Update the index of the first vertex of the polyline for the nodes in the 1320 * subtree based on their new locations, i.e., the main vertex buffer */ 1321 NODE(subtrees[i])->ivertex += nverts_t; 1322 FOR_EACH(j, 0, scratch->nnodes-1/*The root has been just treated*/) { 1323 inode = subtrees[i] + NODE(subtrees[i])->offset + j; 1324 NODE(inode)->ivertex += nverts_t; 1325 } 1326 1327 /* Free the temporary vertex buffer */ 1328 darray_node_purge(&scratch->nodes); 1329 } 1330 1331 #undef NODE 1332 #undef IS_LEAF 1333 1334 exit: 1335 /* Clean up temporary data */ 1336 FOR_EACH(i, 0, (int)nsubtrees) { 1337 darray_node_release(&scratches[i].nodes); 1338 darray_vertex_release(&scratches[i].vertices); 1339 } 1340 MEM_RM(allocator, scratches); 1341 return (res_T)res; 1342 error: 1343 goto exit; 1344 } 1345 1346 static res_T 1347 tree_build_sequential(struct sln_tree* tree) 1348 { 1349 /* Progress statuses */ 1350 struct progress partitioning = PROGRESS_DEFAULT; 1351 struct progress meshing = PROGRESS_DEFAULT; 1352 1353 struct sln_node node = SLN_NODE_NULL; 1354 size_t nlines = 0; 1355 size_t nnodes = 0; 1356 size_t iroot = 0; 1357 res_T res = RES_OK; 1358 1359 SHTR(line_list_get_size(tree->args.lines, &nlines)); 1360 1361 iroot = darray_node_size_get(&tree->nodes); 1362 ASSERT(iroot == 0); /* assume that the tree contains no data */ 1363 1364 /* Setup the root node */ 1365 node.range[0] = 0; 1366 node.range[1] = nlines - 1/* inclusive bound */; 1367 res = darray_node_push_back(&tree->nodes, &node); 1368 if(res != RES_OK) goto error; 1369 1370 /* Partition lines */ 1371 partitioning.total = nlines; 1372 partitioning.msg = "Partitioning: "; 1373 progress_print(tree->sln, &partitioning); 1374 res = partition_lines_depth_first(tree, iroot, &tree->nodes, &partitioning); 1375 if(res != RES_OK) goto error; 1376 1377 /* Create polylines */ 1378 nnodes = darray_node_size_get(&tree->nodes); 1379 meshing.total = nnodes; 1380 meshing.msg = "Meshing: "; 1381 progress_print(tree->sln, &meshing); 1382 res = build_polylines(tree, iroot, nnodes, &tree->vertices, &meshing); 1383 if(res != RES_OK) goto error; 1384 1385 exit: 1386 return res; 1387 error: 1388 goto exit; 1389 } 1390 1391 static res_T 1392 tree_build_parallel(struct sln_tree* tree) 1393 { 1394 /* Stack data structure */ 1395 #define STACK_SIZE (SLN_TREE_DEPTH_MAX*(SLN_TREE_ARITY_MAX-1)) 1396 size_t stack[STACK_SIZE]; 1397 size_t istack = 0; 1398 1399 /* Subtrees */ 1400 size_t subtrees[QUEUE_SIZE_MAX]; /* List of sub-tree root indices */ 1401 unsigned nsubtrees = 0; 1402 1403 /* Progress message of the meshing step */ 1404 struct progress meshing = PROGRESS_DEFAULT; 1405 1406 /* Miscellaneous */ 1407 struct sln_node root = SLN_NODE_NULL; 1408 size_t nlines = 0; 1409 size_t nnodes_upper_tree = 0; /* #nodes from the root to the subtrees */ 1410 size_t iroot = 0; 1411 size_t inode = 0; 1412 size_t i = 0; 1413 res_T res = RES_OK; 1414 1415 ASSERT(tree); 1416 1417 iroot = darray_node_size_get(&tree->nodes); 1418 ASSERT(iroot == 0); /* assume that the tree contains no data */ 1419 1420 SHTR(line_list_get_size(tree->args.lines, &nlines)); 1421 1422 /* Setup the root node */ 1423 root.range[0] = 0; 1424 root.range[1] = nlines; 1425 res = darray_node_push_back(&tree->nodes, &root); 1426 if(res != RES_OK) goto error; 1427 1428 /* Partition the lines in bread-first fixing the maximum number of leafs to 1429 * the available number of threads */ 1430 res = partition_lines_breadth_first(tree, iroot, tree->args.nthreads_hint); 1431 if(res != RES_OK) goto error; 1432 1433 /* Push back SIZE_MAX which, once pop up, will mark the end of recursion */ 1434 stack[istack++] = SIZE_MAX; 1435 1436 /* Retrieve the leaf indices generated by bread-first partitioning. These 1437 * correspond to the roots of the subtree to be built */ 1438 inode = iroot; /* Root node */ 1439 while(inode != SIZE_MAX) { 1440 const struct sln_node* node = darray_node_cdata_get(&tree->nodes) + inode; 1441 ASSERT(inode < darray_node_size_get(&tree->nodes)); 1442 1443 if(sln_node_is_leaf(node)) { 1444 ASSERT(nsubtrees < QUEUE_SIZE_MAX); 1445 subtrees[nsubtrees++] = inode; 1446 inode = stack[--istack]; /* Pop the next node */ 1447 1448 } else { 1449 const size_t ichild0 = inode + node->offset; 1450 const unsigned nchildren = node_child_count(node, tree->args.arity); 1451 1452 /* Push the children except the 1st one */ 1453 ASSERT(istack + (nchildren-1/*ichild0*/) <= STACK_SIZE); 1454 FOR_EACH_REVERSE(i, nchildren-1, 0) stack[istack++] = ichild0 + i; 1455 1456 /* Recursively traverse the 1st child */ 1457 inode = ichild0; 1458 } 1459 } 1460 1461 /* Retrieves the number of registered nodes. This is the number of nodes from 1462 * the root to the subtrees, excluding the roots of those subtrees */ 1463 nnodes_upper_tree = darray_node_size_get(&tree->nodes); 1464 nnodes_upper_tree -= nsubtrees; /* Exclude the root node of the subtrees */ 1465 1466 res = build_subtrees(tree, subtrees, nsubtrees, &meshing); 1467 if(res != RES_OK) goto error; 1468 1469 /* Build the polylines of the upper level if necessary */ 1470 if(nnodes_upper_tree != 0) { 1471 res = build_polylines 1472 (tree, 0/*root*/, nnodes_upper_tree, &tree->vertices, &meshing); 1473 if(res != RES_OK) goto error; 1474 } 1475 1476 #undef STACK_SIZE 1477 1478 exit: 1479 return res; 1480 error: 1481 goto exit; 1482 } 1483 1484 /******************************************************************************* 1485 * Local functions 1486 ******************************************************************************/ 1487 res_T 1488 tree_build(struct sln_tree* tree) 1489 { 1490 res_T res = RES_OK; 1491 ASSERT(tree); 1492 1493 res = tree->args.nthreads_hint == 1 1494 ? tree_build_sequential(tree) 1495 : tree_build_parallel(tree); 1496 if(res != RES_OK) goto error; 1497 1498 exit: 1499 return res; 1500 error: 1501 darray_node_purge(&tree->nodes); 1502 darray_vertex_purge(&tree->vertices); 1503 goto exit; 1504 }