seed.c (63175B)
1 /* 2 Copyright 2023 The Plunder Authors 3 Use of this source code is governed by a BSD-style license that can be 4 found in the LICENSE file. 5 */ 6 7 #define _GNU_SOURCE 8 9 #include <stdio.h> 10 #include <stdint.h> 11 #include <stdbool.h> 12 #include <stdlib.h> 13 #include <ctype.h> 14 #include <string.h> 15 #include <errno.h> 16 #include <inttypes.h> 17 #include <gmp.h> 18 19 #include "seed.h" 20 #include "xxh3.h" 21 #include "libbase58.h" 22 #include "blake3.h" 23 24 25 // Shims /////////////////////////////////////////////////////////////////////// 26 27 #ifdef __clang__ 28 #define MUL_NO_OVERFLOW ((size_t)1 << (sizeof(size_t) * 4)) 29 static void *reallocarray(void *optr, size_t nmemb, size_t size) { 30 if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && 31 nmemb > 0 && SIZE_MAX / nmemb < size) { 32 errno = ENOMEM; 33 return NULL; 34 } 35 return realloc(optr, size * nmemb); 36 } 37 #endif 38 39 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) 40 41 #if defined(__APPLE__) || defined(__MACH__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) 42 #define BSD 43 #endif 44 45 46 // Forward Declarations //////////////////////////////////////////////////////// 47 48 static void print_fragment_outline (Seed, frag_t); 49 static void print_fragment (Seed, frag_t); 50 static void print_nat (Seed, nat_t); 51 static void print_tree_outline (Seed, treenode_t); 52 static void print_tree (Seed, treenode_t); 53 static void seed_debug_interior_nodes (Seed); 54 static void seed_debug_leaves (Seed, bool); 55 56 #if DEBUG 57 static void showbits_(char*, int wid, int num, uint64_t bits); 58 #define showbits(tag, wid, num, bits) showbits_(tag, wid, num, bits) 59 #else 60 #define showbits(tag, wid, num, bits) ; 61 #endif 62 63 static INLINE nat_t alloc_nat(Seed); 64 65 66 // Types /////////////////////////////////////////////////////////////////////// 67 68 typedef struct { 69 treenode_t head; 70 treenode_t tail; 71 uint32_t leaves; 72 } FragVal; 73 74 typedef struct treenode_value { 75 uint64_t word; 76 } treenode_value; 77 78 /* 79 A row in the nodes_table hash table. 80 81 It's empty if val.ent.word == UINT64_MAX 82 83 TODO Should we use pointer.ix == UINT32_MAX instead, for consistency? 84 */ 85 typedef struct { 86 treenode_value val; // (Word32, Word32) 87 uint32_t hax; // Bit mixed + truncated version of `val.word` 88 treenode_t ptr; 89 } NodeEntry; 90 91 /* 92 It's empty if ptr.ix == UINT32_MAX 93 94 TODO: Should `leaf` be a pointer? That would make the hash-table 95 smaller but would make scanning for equality slower. 96 This decision requires benchmarking. 97 */ 98 typedef struct leaves_table_entry { 99 leaf_t leaf; 100 treenode_t ptr; 101 } LeafEntry; 102 103 typedef struct seed_ctx { 104 // One entry per tree-node 105 treenode_value *treenodes; 106 uint32_t *refcounts; // number of references for each node 107 uint32_t *depths; // tree depth at each node. 108 int32_t treenodes_width; 109 int32_t treenodes_count; 110 111 // Number of external references: 112 uint32_t holes_count; 113 114 // Array of unique nat-leaves; 115 leaf_t *nats; 116 uint32_t nats_width; 117 uint32_t nats_count; 118 119 // Leaf Deduplication table. 120 LeafEntry *leaves_table; 121 uint32_t leaves_table_width; 122 uint32_t leaves_table_count; 123 124 // Interior-Node Deduplication table. 125 NodeEntry *nodes_table; 126 uint32_t nodes_table_width; 127 uint32_t nodes_table_count; 128 129 // Array of duplicate tree-nodes (and the top-level node). Each one 130 // is an index into `treenodes`. 131 FragVal *frags; 132 uint32_t frags_width; 133 uint32_t frags_count; 134 135 uint32_t *ordering; // Array(Index (into=nats)) 136 uint32_t *rev_ordering; // Array(Index (into=ordering)) 137 138 int32_t num_bytes; 139 int32_t num_words; 140 } *Seed; 141 142 143 // These are basically trivial macros, but written as inline functions 144 // for type-safety. 145 146 static INLINE treenode_value TAG_FRAG(frag_t frag) { 147 uint64_t index = (uint64_t) frag.ix; 148 uint64_t word = (index | 7ULL << 61); 149 return (treenode_value){ word }; 150 } 151 152 static INLINE treenode_value TAG_PIN(int hole_id) { 153 uint64_t word = (hole_id | 5ULL << 61); 154 return (treenode_value){ word }; 155 } 156 157 static INLINE treenode_value TAG_NAT(nat_t nat) { 158 uint64_t index = (uint64_t) nat.ix; 159 uint64_t word = (index | 6ULL << 61); 160 return (treenode_value){ word }; 161 } 162 163 // Pack two 32-bit indicies into one 64-bit words. Since these indicies 164 // should both fit in 31 bits, we rely on the top-bit being set to zero 165 // and use that as a type-tag. 166 static INLINE treenode_value TAG_PAIR(treenode_t hed, treenode_t tel) { 167 uint64_t hed_word = (uint64_t) hed.ix; 168 uint64_t tel_word = (uint64_t) tel.ix; 169 uint64_t result = ((hed_word << 32) | tel_word); 170 return (treenode_value){ .word = result }; 171 } 172 173 static INLINE uint64_t NODEVAL_TAG(treenode_value v) { 174 return (v.word >> 61); 175 } 176 177 // If the high bit is set, it is an atom, pin, or fragment. 178 static inline bool NODEVAL_ISBACKREF (treenode_value v) { 179 return (v.word >> 63) ? true : false; // TODO 180 } 181 182 183 static INLINE treenode_t NODEVAL_HEAD(treenode_value v) { 184 return (treenode_t){ .ix = (uint32_t) (v.word >> 32) }; 185 } 186 187 // We rely on the cast to drop the high-bits. 188 static INLINE treenode_t NODEVAL_TAIL(treenode_value v) { 189 return (treenode_t){ .ix = (uint32_t) v.word }; 190 } 191 192 // We rely on the cast to drop the high-bits. 193 static INLINE nat_t NODEVAL_NAT(treenode_value v) { 194 return (nat_t){ .ix = (uint32_t) v.word }; 195 } 196 197 // We rely on the cast to drop the high-bits. 198 static INLINE uint32_t NODEVAL_PIN(treenode_value v) { 199 return (uint32_t) v.word; 200 } 201 202 203 // We rely on the cast to drop the high-bits. 204 static INLINE frag_t NODEVAL_FRAG(treenode_value v) { 205 return (frag_t){ .ix = (uint32_t) v.word }; 206 } 207 208 209 // Memory Management /////////////////////////////////////////////////////////// 210 211 Seed seed_make () { 212 Seed res = calloc(1, sizeof(struct seed_ctx)); 213 214 // One entry per unique node (both leaves and interior nodes) 215 res->treenodes = calloc(64, sizeof(res->treenodes[0])); 216 res->refcounts = calloc(64, sizeof(res->refcounts[0])); 217 res->depths = calloc(64, sizeof(res->refcounts[0])); 218 res->treenodes_width = 64; 219 res->treenodes_count = 0; 220 221 // number of external references 222 res->holes_count = 0; 223 224 // Array of unique nat-leaves 225 res->nats = calloc(16, sizeof(res->nats[0])); 226 res->nats_width = 16; 227 res->nats_count = 0; 228 229 // Deduplication table for leaves 230 size_t leaves_table_bytes = (1<<6) * sizeof(res->leaves_table[0]); 231 res->leaves_table = malloc(leaves_table_bytes); 232 res->leaves_table_width = (1<<6); 233 res->leaves_table_count = 0; 234 memset(res->leaves_table, 255, leaves_table_bytes); 235 236 // Deduplication table for interior nodes 237 size_t nodes_table_bytes = (1<<6) * sizeof(res->nodes_table[0]); 238 res->nodes_table = malloc(nodes_table_bytes); 239 res->nodes_table_width = (1<<6); 240 res->nodes_table_count = 0; 241 memset(res->nodes_table, 255, nodes_table_bytes); 242 243 // Array of duplicate tree-nodes (and the top-level node). Each one 244 // is an index into `treenodes`. 245 res->frags = calloc(32, sizeof(res->frags[0])); 246 res->frags_width = 32; 247 res->frags_count = 0; 248 249 res->ordering = NULL; 250 res->rev_ordering = NULL; 251 252 return res; 253 } 254 255 /* 256 We don't free ny memory or shrink any tables, we just set the 257 size-counts of everything to 0 and empty all the hashtable slots. 258 */ 259 void seed_wipe (Seed ctx) { 260 ctx->treenodes_count = 0; 261 ctx->holes_count = 0; 262 ctx->nats_count = 0; 263 ctx->frags_count = 0; 264 ctx->leaves_table_count = 0; 265 ctx->nodes_table_count = 0; 266 267 memset(ctx->leaves_table, 255, 268 (sizeof(ctx->leaves_table[0]) * ctx->leaves_table_width)); 269 270 memset(ctx->nodes_table, 255, 271 (sizeof(ctx->nodes_table[0]) * ctx->nodes_table_width)); 272 } 273 274 void seed_free (Seed ctx) { 275 free(ctx->treenodes); 276 free(ctx->refcounts); 277 free(ctx->depths); 278 free(ctx->nats); 279 free(ctx->leaves_table); 280 free(ctx->nodes_table); 281 free(ctx->frags); 282 free(ctx); 283 } 284 285 static INLINE treenode_t alloc_treenode(Seed c, treenode_value v, uint32_t depth) { 286 uint32_t res = c->treenodes_count++; 287 uint32_t wid = c->treenodes_width; 288 289 if (res >= wid) { 290 wid *= 2; 291 c->treenodes = reallocarray(c->treenodes, wid, sizeof(c->treenodes[0])); 292 c->refcounts = reallocarray(c->refcounts, wid, sizeof(c->refcounts[0])); 293 c->depths = reallocarray(c->depths, wid, sizeof(c->depths[0])); 294 c->treenodes_width = wid; 295 } 296 297 c->treenodes[res] = v; 298 c->refcounts[res] = 1; // nicer debug output than starting at 0 299 c->depths[res] = depth; 300 301 return (treenode_t){ .ix = res }; 302 } 303 304 treenode_t seed_hole(Seed ctx) { 305 debugs(" seed_hole()\n"); 306 int i = ctx->holes_count++; 307 treenode_value v = TAG_PIN(i); 308 return alloc_treenode(ctx, v, 0); 309 } 310 311 312 // Inserting /////////////////////////////////////////////////////////////////// 313 314 static void rehash_nodes_if_full(Seed ctx) { 315 uint32_t oldwid = ctx->nodes_table_width; 316 uint32_t num = ctx->nodes_table_count; 317 318 // If capacity is >= 50%, resize. 319 if (num*2 < oldwid) return; 320 321 uint32_t newwid = oldwid*2; 322 323 debugf("\t\tREHASH_NODES (old=%u new=%u)\n", oldwid, newwid); 324 325 NodeEntry *oldtab = ctx->nodes_table; 326 NodeEntry *newtab = malloc(newwid * sizeof(*newtab)); 327 328 memset(newtab, 255, newwid * sizeof(*newtab)); 329 330 uint64_t newmask = newwid - 1; 331 332 /* 333 Loop over the whole of oldtab, and re-insert every 334 non-empty slot. Inserts are guarenteed to be unique, 335 so we just need to, starting at the correct bucket, 336 scan for an empty slot and write the value there. 337 */ 338 for (int end=oldwid, i=0; i<end; i++) { 339 NodeEntry ent = oldtab[i]; 340 341 uint64_t j = ent.hax; 342 343 if (ent.val.word == UINT64_MAX) continue; // empty slot 344 345 for (;; j++) { 346 j &= newmask; 347 NodeEntry *tar = newtab + j; 348 if (tar->val.word == UINT64_MAX) { 349 *tar = ent; 350 break; 351 } 352 } 353 } 354 355 free(ctx->nodes_table); 356 357 ctx->nodes_table_width = newwid; 358 ctx->nodes_table = newtab; 359 } 360 361 static void rehash_leaves_if_full(Seed ctx) { 362 uint32_t oldwid = ctx->leaves_table_width; 363 uint32_t num = ctx->leaves_table_count; 364 365 // If capacity is >= 50%, resize. 366 if (num*2 < oldwid) return; 367 368 uint32_t newwid = oldwid*2; 369 370 debugf("\t\tREHASH_LEAVES (old=%u new=%u)\n", oldwid, newwid); 371 372 LeafEntry *oldtab = ctx->leaves_table; 373 LeafEntry *newtab = malloc(newwid * sizeof(*newtab)); 374 375 memset(newtab, 255, newwid * sizeof(*newtab)); 376 377 uint64_t newmask = newwid - 1; 378 379 /* 380 Loop over the whole of oldtab, and re-insert every 381 non-empty slot. Inserts are guarenteed to be unique, 382 so we just need to, starting at the correct bucket, 383 scan for an empty slot and write the value there. 384 */ 385 for (int end=oldwid, i=0; i<end; i++) { 386 LeafEntry ent = oldtab[i]; 387 388 // empty slot 389 if (ent.ptr.ix == UINT32_MAX) continue; 390 391 uint64_t j = ent.leaf.hax; 392 for (;; j++) { 393 j &= newmask; 394 LeafEntry *tar = newtab + j; 395 if (tar->ptr.ix == UINT32_MAX) { 396 *tar = ent; 397 debugf(("\t\t%d -> %"PRIu64"\n"), i, j); 398 break; 399 } else { 400 debugf( 401 "\t\t\t(%d -> %"PRIu64") is taken\n", 402 i, j 403 ); 404 } 405 } 406 } 407 408 free(ctx->leaves_table); 409 410 ctx->leaves_table_width = newwid; 411 ctx->leaves_table = newtab; 412 } 413 414 415 static treenode_t insert_leaf(Seed ctx, leaf_t leaf) { 416 /* 417 Do a linear-search over the leaves table, and use the 418 index of the corresponding nat, if we find one. 419 420 We make sure ahead-of-time that there is enough space 421 for us to insert. This way we don't have to move things 422 around while were are inserting. 423 */ 424 425 rehash_leaves_if_full(ctx); 426 427 LeafEntry *ent = NULL; 428 429 uint64_t mask = ctx->leaves_table_width - 1; 430 uint64_t ix = leaf.hax; 431 432 for (;; ix++) { 433 ix &= mask; 434 435 ent = &(ctx->leaves_table[ix]); 436 437 // marker indicating an empty slot 438 if (ent->ptr.ix == UINT32_MAX) break; 439 440 bool match = ent->leaf.hax == leaf.hax && 441 ent->leaf.msw == leaf.msw && 442 ent->leaf.nex == leaf.nex && 443 ( leaf.nex == 0 || 444 0 == memcmp(ent->leaf.buf, leaf.buf, 8*leaf.nex) 445 ); 446 447 if (!match) continue; 448 449 if (DEBUG) { 450 debugs("\t\tLEAF_MATCH:\n\t\t\t"); 451 print_tree_outline(ctx, ent->ptr); 452 debugs(" = "); 453 print_tree(ctx, ent->ptr); 454 debugs("\n"); 455 } 456 457 ctx->refcounts[ent->ptr.ix]++; 458 459 return ent->ptr; 460 } 461 462 /* 463 We didn't find any matching entries, but now `ent` 464 is pointing into an empty slot, so we fill that and 465 return it. 466 */ 467 468 ctx->leaves_table_count++; 469 470 nat_t nat = alloc_nat(ctx); 471 ctx->nats[nat.ix] = leaf; 472 treenode_t ptr = alloc_treenode(ctx, TAG_NAT(nat), 0); 473 474 if (DEBUG) { 475 debugs("\t\tNEW_PACKED_LEAF:\n\t\t\t"); 476 print_tree_outline(ctx, ptr); 477 debugs(" = "); 478 print_tree(ctx, ptr); 479 debugs("\n"); 480 } 481 482 *ent = (LeafEntry){leaf, ptr}; 483 484 return ptr; 485 } 486 487 static inline frag_t alloc_frag(Seed ctx, FragVal frag) { 488 debugf("alloc_frag(%d, %d)\n", frag.head.ix, frag.tail.ix); 489 490 uint32_t nex = ctx->frags_count++; 491 uint32_t wid = ctx->frags_width; 492 493 if (nex >= wid) { 494 wid *= 2; 495 ctx->frags = reallocarray(ctx->frags, wid, sizeof(ctx->frags[0])); 496 ctx->frags_width = wid; 497 } 498 499 ctx->frags[nex] = frag; 500 501 return (frag_t){ .ix = nex }; 502 } 503 504 struct shatter { 505 uint32_t ix; // treenode id 506 uint32_t refs; // number of references to cell 507 uint32_t leaves; // number of leaves in fragment 508 }; 509 510 // See [shatter.txt] 511 static void shatter(Seed ctx, treenode_t top) { 512 debugs("<shatter>\n"); 513 514 // Whole seed context is just a leaf 515 if (top.ix == 0) { 516 debugs("</shatter>"); 517 return; 518 } 519 520 // Our stack depth will be (treedepth*2)+1; 521 int stksz = (ctx->depths[top.ix] * 2) + 2; 522 523 struct shatter *stk = (stksz > 128) 524 ? malloc(stksz * sizeof(struct shatter)) 525 : alloca(stksz * sizeof(struct shatter)) 526 ; 527 528 struct shatter *end = stk + (stksz - 1); 529 struct shatter *sp = end; 530 531 sp->ix = top.ix; 532 sp->leaves = 0; 533 534 debugf(" stksz = %d\n", stksz); 535 debugf(" toprefs = %d\n", ctx->refcounts[top.ix]); 536 debugf(" depth = %d\n", ctx->depths[top.ix]); 537 538 loop: 539 540 debugf( 541 " sp[%"PRIu64"] is t%u (leaves=%u)\n", 542 end - sp, 543 sp->ix, 544 sp->leaves 545 ); 546 547 // stk[0] unprocessed 548 if (sp->leaves == 0) { 549 treenode_value val = ctx->treenodes[sp->ix]; 550 551 switch (NODEVAL_TAG(val)) { 552 case 4: case 5: case 6: case 7: // leaf 553 debugf(" LEAF: t%u\n", sp->ix); 554 sp->leaves = 1; 555 goto loop; 556 default: // cell 557 debugf(" CELL: t%u\n", sp->ix); 558 sp->refs = ctx->refcounts[sp->ix]; 559 sp[-1].ix = NODEVAL_TAIL(val).ix; 560 sp[-1].leaves = 0; 561 sp[-2].ix = NODEVAL_HEAD(val).ix; 562 sp[-2].leaves = 0; 563 debugf(" CELL: t%u = (%u %u)\n", sp[0].ix, sp[-2].ix, sp[-1].ix); 564 sp -= 2; 565 goto loop; 566 } 567 } 568 569 // sp[1] not processed, swap 570 if (sp[1].leaves == 0) { 571 debugs(" SWAP\n"); 572 struct shatter tmp = sp[0]; 573 sp[0] = sp[1]; 574 sp[1] = tmp; 575 goto loop; 576 } 577 578 // both processed, combine 579 580 // stack is in this state: {head tail cell ...} 581 { 582 struct shatter *hed = sp; 583 struct shatter *tel = sp+1; 584 struct shatter *cel = sp+2; 585 586 debugf(" combo: t%u = (t%u, t%u)\n", cel->ix, hed->ix, tel->ix); 587 debugf(" hed.leaves = %u\n", hed->leaves); 588 debugf(" tel.leaves = %u\n", tel->leaves); 589 590 cel->leaves = tel->leaves + hed->leaves; 591 sp += 2; 592 593 // top node, or more refs than parent. 594 if (sp == end || sp[0].refs > sp[2].refs) { 595 debugf("FRAG(%d, leaves=%d)\n", sp->ix, sp->leaves); 596 597 treenode_value val = ctx->treenodes[sp->ix]; 598 599 treenode_t hed = NODEVAL_HEAD(val); 600 treenode_t tel = NODEVAL_TAIL(val); 601 602 frag_t frag = alloc_frag(ctx, 603 (FragVal){ .head=hed, 604 .tail=tel, 605 .leaves=sp->leaves 606 }); 607 608 debugf(" leaf_count = %u\n", sp->leaves); 609 610 ctx->treenodes[sp->ix] = TAG_FRAG(frag); 611 sp->leaves = 1; 612 } 613 614 debugf(" cel.leaves = %u\n", sp->leaves); 615 616 if (sp == end) { 617 debugs("</shatter>\n"); 618 if (stksz > 128) free(stk); 619 return; 620 } 621 622 goto loop; 623 } 624 } 625 626 // Compare nats in reverse order (biggest element comes first after 627 // sorting). 628 int cmp_nat(const void *xv, const void *yv, void *sv) { 629 Seed ctx = sv; 630 631 // casts 632 const uint32_t *xi = xv; 633 const uint32_t *yi = yv; 634 635 const leaf_t x = ctx->nats[*xi]; 636 const leaf_t y = ctx->nats[*yi]; 637 638 if (x.nex < y.nex) return 1; 639 if (x.nex > y.nex) return -1; 640 641 if (x.msw < y.msw) return 1; 642 if (x.msw > y.msw) return -1; 643 644 if (x.nex == 0) return 0; 645 646 return mpn_cmp(x.buf, y.buf, x.nex); 647 } 648 649 #ifdef BSD 650 int cmp_nat_wrapper(void *sv, const void *xv, const void *yv) { 651 return cmp_nat(xv, yv, sv); 652 } 653 #else 654 int cmp_nat_wrapper(const void *xv, const void *yv, void *sv) { 655 return cmp_nat(xv, yv, sv); 656 } 657 #endif 658 659 static void print_nat(Seed, nat_t); 660 661 void seed_done(Seed ctx) { 662 int num = ctx->treenodes_count; 663 664 if (num == 0) { 665 die("Can't finalize empty Seed context\n"); 666 } 667 668 int nats = ctx->nats_count; 669 670 treenode_t top = (treenode_t){ .ix = num - 1 }; 671 672 if (DEBUG) { 673 printf("olin: "); print_tree_outline(ctx, top); printf("\n"); 674 printf("tree: "); print_tree(ctx, top); printf("\n"); 675 } 676 677 shatter(ctx, top); 678 679 ctx->ordering = malloc(sizeof(int32_t) * nats); 680 ctx->rev_ordering = malloc(sizeof(int32_t) * nats); 681 682 for (int i = 0; i < nats; i++) { ctx->ordering[i] = i; } 683 684 #ifdef BSD 685 qsort_r(ctx->ordering, nats, sizeof(int32_t), ctx, cmp_nat_wrapper); 686 #else 687 qsort_r(ctx->ordering, nats, sizeof(int32_t), cmp_nat_wrapper, ctx); 688 #endif 689 690 for (int i = 0; i < nats; i++) { 691 ctx->rev_ordering[ctx->ordering[i]] = i; 692 } 693 694 int32_t num_bytes = 0; 695 int32_t num_words = 0; 696 for (int i = 0; i < nats; i++) { 697 leaf_t l = ctx->nats[i]; 698 if (l.nex == 0) { 699 if (l.msw < 256) num_bytes++; 700 else num_words++; 701 } 702 } 703 debugf("num_bytes=%d\n", num_bytes); 704 debugf("num_words=%d\n", num_words); 705 ctx->num_bytes = num_bytes; 706 ctx->num_words = num_words; 707 } 708 709 710 // Inserting Nats ////////////////////////////////////////////////////////////// 711 712 static INLINE nat_t alloc_nat(Seed c) { 713 uint32_t res = c->nats_count++; 714 uint32_t wid = c->nats_width; 715 716 if (res >= wid) { 717 wid *= 2; 718 c->nats = reallocarray(c->nats, wid, sizeof(c->nats[0])); 719 c->nats_width = wid; 720 } 721 722 return (nat_t){ .ix = res }; 723 } 724 725 treenode_t seed_word(Seed ctx, uint64_t word) { 726 uint32_t byte_width = word64_bytes(word); 727 728 debugf( 729 "\tseed_packed_nat(%"PRIu64", width=%"PRIu32")\n", 730 word, 731 byte_width 732 ); 733 734 // hashing works different here vs seed_barnat and seed_nat. 735 // This is fine, because words are never equal to things greater 736 // than words. This is useful because words are extremely common, 737 // and we want inserting them to be fast. 738 739 leaf_t leaf = (leaf_t) { 740 .msw = word, 741 .nex = 0, 742 .buf = NULL, 743 .hax = fmix64(word), 744 }; 745 746 return insert_leaf(ctx, leaf); 747 } 748 749 treenode_t seed_barnat (Seed ctx, size_t num_bytes, uint8_t* bytes) { 750 if (num_bytes < 8) { 751 uint64_t word = 0; 752 uint8_t *ptr = (void*) &word; 753 memcpy(ptr, bytes, num_bytes); 754 ptr[num_bytes] = 1; 755 return seed_word(ctx, word); 756 } 757 758 // Because there is an is an implicit 1 byte at the end of `bytes`. 759 int width_bytes = num_bytes + 1; 760 761 // width in words 762 uint64_t overflow = width_bytes % 8; 763 uint64_t wid = (width_bytes / 8) + (overflow ? 1 : 0); 764 765 // the last word goes in `msw`. 766 uint64_t nex = wid - 1; 767 768 // TODO: is this undefined behavior? Is there a better way? 769 uint64_t msw = 0; 770 uint8_t *dst = (void*) &msw; 771 772 { 773 uint64_t num = overflow ? overflow-1 : 7; 774 uint8_t *src = bytes + (nex*8); 775 memcpy(dst, src, num); 776 dst[num] = 1; 777 } 778 779 uint64_t hax = fmix64(msw) ^ fmix64(nex) ^ XXH3_64bits(bytes, nex*8); 780 781 return insert_leaf(ctx, (leaf_t){ 782 .msw = msw, 783 .nex = nex, 784 .buf = (uint64_t*) bytes, 785 .hax = hax, 786 } 787 ); 788 } 789 790 treenode_t seed_nat(Seed ctx, size_t wid, uint64_t *words) { 791 if (wid == 0) { 792 return seed_word(ctx, 0); 793 } 794 795 if (wid == 1) { 796 return seed_word(ctx, words[0]); 797 } 798 799 uint64_t nex = wid - 1; 800 uint64_t msw = words[nex]; 801 uint64_t hax = fmix64(msw) ^ fmix64(nex) ^ XXH3_64bits(words, nex*8); 802 803 if (msw == 0) { 804 die("Invalid indirect nat: Most significant word is zero!\n"); 805 return seed_nat(ctx, wid-1, words); 806 } 807 808 return insert_leaf(ctx, (leaf_t){ 809 .msw = words[nex], 810 .nex = nex, 811 .buf = words, 812 .hax = hax, 813 } 814 ); 815 } 816 817 818 // Bumping reference counts //////////////////////////////////////////////////// 819 820 /* 821 In some situations, the caller may know that something has 822 already been interned, based on the pin-hash for example. 823 824 In that situation, it can just re-use the existing treenode_t, 825 but shatter() still requires that all of the reference-counts 826 be bumped. This routine does that. 827 828 Since this operation is very expensive for huge trees, we optimize 829 by using an explicit stack with a pre-computed maximum depth. 830 831 While this is much cheaper than re-constructing the whole subtree, 832 it still scales badly for large highly-duplicated trees. 833 834 In those situations, break your input into pins, and serialize 835 each pin separately. 836 */ 837 void seed_touch(Seed ctx, treenode_t x) { 838 treenode_t *stk = alloca(sizeof(treenode_t) * (ctx->depths[x.ix] + 1)); 839 treenode_t *sp = stk; 840 sp[0] = x; 841 842 bool big = (ctx->depths[x.ix] > 499); 843 844 if (big) putchar('<'); 845 846 while (sp >= stk) { 847 treenode_value val = ctx->treenodes[sp[0].ix]; 848 849 if (NODEVAL_TAG(val) > 3) { sp--; continue; } 850 851 ctx->refcounts[sp[0].ix]++; 852 853 *(sp++) = NODEVAL_HEAD(val); 854 *sp = NODEVAL_TAIL(val); 855 } 856 857 if (big) putchar('>'); 858 } 859 860 861 // Inserting Nats ////////////////////////////////////////////////////////////// 862 863 /* 864 Either returns a pointer to a matching treenode, or returns a 865 pointer into an empty slot that may be written to. 866 */ 867 treenode_t seed_cons(Seed ctx, treenode_t hed, treenode_t tel) { 868 rehash_nodes_if_full(ctx); 869 870 treenode_value val = TAG_PAIR(hed, tel); 871 872 uint64_t target = val.word; 873 uint64_t mask = ctx->nodes_table_width - 1; 874 875 uint32_t hax = (uint32_t) fmix64(val.word); 876 uint64_t i = hax; 877 878 for (;; i++) { 879 i &= mask; 880 881 NodeEntry *cur = (ctx->nodes_table + i); 882 883 uint64_t word = cur->val.word; 884 885 if (word == target) { 886 debugf("\t\tTREE MATCH\n\t\t\tt%d = ", cur->ptr.ix); 887 if (DEBUG) print_tree(ctx, cur->ptr); 888 debugs("\n"); 889 890 ctx->refcounts[cur->ptr.ix]++; 891 892 return cur->ptr; 893 } 894 895 if (word == UINT64_MAX) { 896 uint32_t depth = 1 + MAX(ctx->depths[hed.ix], ctx->depths[tel.ix]); 897 898 treenode_t ptr = alloc_treenode(ctx, val, depth); 899 900 cur->val = val; 901 cur->hax = hax; 902 cur->ptr = ptr; 903 904 ctx->nodes_table_count++; 905 906 return ptr; 907 } 908 } 909 } 910 911 912 // Serializing ///////////////////////////////////////////////////////////////// 913 914 struct frag_state { 915 uint64_t acc; 916 uint64_t fil; 917 uint64_t *out; 918 treenode_t *stack; 919 uint32_t num_nats; 920 uint32_t num_holes; 921 int refbits; 922 }; 923 924 static void serialize_frag(Seed ctx, struct frag_state *st, FragVal frag) { 925 treenode_t *stack = st->stack; 926 treenode_t treeidx; 927 928 uint64_t fil = st->fil; 929 uint64_t acc = st->acc; 930 uint64_t *out = st->out; 931 uint64_t numnats = st->num_nats; 932 uint64_t numholes = st->num_holes; 933 uint64_t numleaves = numnats + numholes; 934 int refbits = st->refbits; 935 936 int sp = 0; 937 938 stack[sp++] = frag.tail; 939 stack[sp] = frag.head; 940 941 // this is the left-recursion depth. It tracks the number of 942 // 1-tags that we need to output before the head leaf. 943 int deep = 0; 944 945 while (sp >= 0) { 946 947 recur: // skipping the check (not needed when recursing downwwards) 948 949 treeidx = stack[sp]; 950 treenode_value val = ctx->treenodes[treeidx.ix]; 951 952 /* 953 If this is a node, push the tail and then recurse 954 instead the head. Also increment `deep` to 955 track the number of 1-bits needed. 956 */ 957 if (!NODEVAL_ISBACKREF(val)) { 958 deep++; 959 stack[sp++] = NODEVAL_TAIL(val); 960 stack[sp] = NODEVAL_HEAD(val); 961 goto recur; 962 } 963 964 /* 965 Output `deep` one bits. Since the depth can be 966 bigger than 64, we may need to output multiple 967 words here. 968 */ 969 while (deep) { 970 int remain = 64 - fil; 971 972 /* 973 If all of the tag bits fit in the remaining 974 bits of the accumulator, that's easy. 975 */ 976 if (deep < remain) { 977 acc |= ((1ULL<<deep)- 1ULL) << fil; 978 fil += deep; 979 deep=0; 980 break; 981 } 982 983 /* 984 Otherwise, fill the rest of the accumulator 985 with ones and repeat the whole process. 986 */ 987 *out++ = acc | (UINT64_MAX << fil); 988 acc = fil = 0; 989 deep -= remain; 990 } 991 992 /* 993 We need to output a 0 bit here, to tag this as a leaf. 994 However, we can do that more efficiently by just 995 right shifting the significant-bits by one and then 996 outputting refbits+1 bits. 997 */ 998 999 /* 1000 This cast to u32 is important because the truncation 1001 drops the tag bits. This should always be safe, unless 1002 there are billions of unique atoms in a single pin. 1003 */ 1004 uint32_t leaf = (uint32_t) val.word; 1005 uint64_t bits; 1006 1007 /* 1008 5=pin, 6=nat, 7=frag 1009 */ 1010 switch (NODEVAL_TAG(val)) { 1011 case 5: bits = leaf; break; 1012 case 6: bits = numholes + ctx->rev_ordering[leaf]; break; 1013 case 7: bits = numleaves + leaf; break; 1014 default: die("impossible: 4 is bar tag (not used)"); 1015 } 1016 1017 /* 1018 `bits` is left-shifted by one to create a zero-tag 1019 at the front (indicating a leaf). And we bump `fil` by 1020 (refbits+1) for the same reason. 1021 */ 1022 bits <<= 1; 1023 uint64_t new_bits = (bits << fil); 1024 uint64_t overflow = (bits >> (64 - fil)); 1025 acc |= new_bits; 1026 fil += refbits+1; 1027 1028 /* 1029 If the leaf data doesn't fit in the accumulator, output 1030 the accumulator, and replace it with the overflow data. 1031 */ 1032 if (fil >= 64) { 1033 *out++ = acc; 1034 acc = overflow; 1035 fil -= 64; 1036 } 1037 1038 sp--; 1039 } 1040 1041 // Flush these local variables back into the state (which is 1042 // re-loaded when we process the next fragment). 1043 st->fil = fil; 1044 st->acc = acc; 1045 st->out = out; 1046 } 1047 1048 size_t seed_size (Seed ctx) { 1049 uint64_t numholes = ctx->holes_count; 1050 uint64_t numnats = ctx->nats_count; 1051 uint64_t numbytes = ctx->num_bytes; 1052 uint64_t numwords = ctx->num_words; 1053 uint64_t numbigs = numnats - (numbytes + numwords); 1054 uint64_t numfrags = ctx->frags_count; 1055 1056 // The backreferences table starts off holding all "external 1057 // references" and all atoms. 1058 uint64_t refrs = numholes + numnats; 1059 1060 // This is the width before the bignat size information, it will 1061 // be increased as we go along. It includs the header, all the 1062 // bytes, the works, and the word-width of each bignat. 1063 uint64_t width = 40 + numbytes + numwords*8 + numbigs*8; 1064 1065 // Add the actual bignat data to the result width (each bignat 1066 // takes n words). 1067 for (int end=numbigs, j=0; j<end; j++) { 1068 int ix = ctx->ordering[j]; 1069 uint64_t w = ctx->nats[ix].nex + 1; 1070 width += (8*w); 1071 } 1072 1073 uint64_t treebits = 0; 1074 1075 for (int end=numfrags, i=0; i<end; i++) { 1076 // `refers` is always at least one, because you can't 1077 // have a cell without some atom (or external reference) 1078 // to have as a leaf. 1079 uint64_t maxref = refrs - 1; 1080 uint64_t leaf_width = word64_bits(maxref); 1081 1082 // Calculate the size of the fragment using the number 1083 // of leaves and the bit-width of each leaf. 1084 // The following formula works because: 1085 // 1086 // - Each leaf requires leaf_width bits 1087 // - Each leaf requires a single tag bit. 1088 // - Each interior node requires a single tag bit. 1089 // - There are (num_leaves - 1) interior nodes 1090 // - Every fragment is a cell, and so the outermost cell 1091 // does not requires a tag. 1092 uint64_t leaves = ctx->frags[i].leaves; 1093 uint64_t frag_bits = (leaves*leaf_width) + (leaves * 2) - 2; 1094 1095 // Accumulate the number of bits used in this fragment, 1096 // and bump `refrs` since the next fragment can also 1097 // reference this one. 1098 treebits += frag_bits; 1099 refrs++; 1100 } 1101 1102 // Tree-bits is padded to be a multiple of 8 (treat as a byte-array); 1103 uint64_t hanging_bits = treebits % 8; 1104 if (hanging_bits) treebits += (8 - hanging_bits); 1105 1106 // Add in the tree bits; 1107 width += (treebits/8); 1108 1109 // Result is always a multiple of 8 (so we can treat it as an 1110 // array of 64-bit words); 1111 uint64_t hanging_bytes = width % 8; 1112 if (hanging_bytes) width += (8 - hanging_bytes); 1113 1114 return width; 1115 } 1116 1117 size_t seed_save (Seed ctx, size_t width, uint8_t *top) { 1118 uint32_t num_holes = ctx->holes_count; 1119 uint32_t num_nats = ctx->nats_count; 1120 uint64_t num_bytes = ctx->num_bytes; 1121 uint64_t num_words = ctx->num_words; 1122 uint64_t num_bigs = num_nats - (num_bytes + num_words); 1123 uint64_t num_frags = ctx->frags_count; 1124 1125 uint64_t *header = (void*) top; 1126 uint8_t *out = top + 40 + (num_bigs*8); 1127 1128 1129 // size metadata (40 bytes) 1130 1131 header[0] = num_holes; 1132 header[1] = num_bigs; 1133 header[2] = num_words; 1134 header[3] = num_bytes; 1135 header[4] = num_frags; 1136 1137 // bignat sizes 1138 1139 for (int end=num_bigs, i=0; i < end; i++) { 1140 int ix = ctx->ordering[i]; 1141 leaf_t l = ctx->nats[ix]; 1142 uint64_t nw = l.nex + 1; 1143 header[5+i] = nw; 1144 } 1145 1146 1147 // Actual atom data in decreasing order. Bignums first. 1148 1149 for (int end=num_bigs, i=0; i < end; i++) { 1150 int ix = ctx->ordering[i]; 1151 leaf_t leaf = ctx->nats[ix]; 1152 memcpy(out, leaf.buf, leaf.nex * 8); 1153 out += ((leaf.nex + 1) * 8); 1154 ((uint64_t*)out)[-1] = leaf.msw; 1155 } 1156 1157 // Then words. 1158 1159 for (int i=0; i<num_words; i++) { 1160 int ix = ctx->ordering[i + num_bigs]; 1161 ((uint64_t*)out)[0] = ctx->nats[ix].msw; 1162 out += 8; 1163 } 1164 1165 // Then bytes. 1166 1167 for (int i=0; i < num_bytes; i++) { 1168 int ix = ctx->ordering[i + num_bigs + num_words]; 1169 *out++ = (uint8_t) ctx->nats[ix].msw; 1170 } 1171 1172 if (!num_frags) { 1173 size_t width = (out - top); 1174 size_t hanging_bytes = width % 8; 1175 if (hanging_bytes) width += (8 - hanging_bytes); 1176 return width; 1177 } 1178 1179 struct frag_state st; 1180 1181 /* 1182 treenode_t and refcounts are both 32 bits, and there is one 1183 refcount for every node in the whole tree. The stack only 1184 needs to be as big as the depth of the deepest fragment, so 1185 `refcounts` is plenty big. 1186 1187 At this stage, we have already shattered the tree, so it's 1188 fine to canibalize memory like this. 1189 1190 We should, however, document that a Seed context is "used up" 1191 after serialization, and can only be freed or wiped after that. 1192 */ 1193 st.stack = (void*) ctx->refcounts; 1194 1195 /* 1196 If we are not in word-aligned state, we need to rewind to the 1197 last word-aligned byte. We will load the extra bytes into the 1198 initial accumulator and treat those bits as "already filled". 1199 */ 1200 int used = out - top; 1201 int clif = used % 8; 1202 out -= clif; 1203 1204 /* 1205 We read in the current state of the first word, but we don't 1206 move the pointer. When this word has been filled with bytes, 1207 we will flush it to the array, replacing the existing `clif` 1208 bytes with the same data they started with. 1209 */ 1210 st.out = (uint64_t*) out; 1211 st.acc = *(st.out); 1212 st.fil = 8 * clif; 1213 1214 int maxref = (num_holes + num_nats) - 1; 1215 1216 st.num_nats = num_nats; 1217 st.num_holes = num_holes; 1218 st.refbits = word64_bits(maxref); 1219 1220 int until_cliff = (1 << st.refbits) - (maxref + 1); 1221 1222 /* 1223 `until_cliff` is the number of fragments we can output before 1224 the fragment width grows. 1225 1226 After we output, if this is zero, then we: 1227 1228 - increment st.refbits 1229 - Set until_cliff to (maxref - 1). 1230 1231 Setting until_clif to the value of (maxref-1) works out 1232 because the size increases every doubling of this value. 1233 */ 1234 1235 for (int i=0; i<num_frags; i++) { 1236 serialize_frag(ctx, &st, ctx->frags[i]); 1237 1238 maxref++; 1239 if (until_cliff == 0) { 1240 st.refbits++; 1241 until_cliff = maxref; 1242 } 1243 until_cliff--; 1244 } 1245 1246 /* 1247 If there is still data in the accumulator word (st.acc), 1248 then flush that to the buffer. 1249 1250 The only time this doesn't happen is if the output 1251 perfectly fits in a multiple of 64 bits. 1252 */ 1253 if (st.fil > 0) { *st.out++ = st.acc; } 1254 1255 /* 1256 Calculate the number of bytes written, and verify that we 1257 filled the whole buffer and didn't overflow it. 1258 */ 1259 uint8_t *end = (uint8_t*) st.out; 1260 return (end - top); 1261 } 1262 1263 1264 typedef struct frag_loader_state { 1265 Seed ctx; 1266 uint64_t *ptr; // Pointer into the remaining words. 1267 int rem; // Remaining words in the buffer. 1268 uint64_t acc; // The last word read from the buffer. 1269 uint64_t red; // The number of bits of `acc` that have been consumed. 1270 1271 uint64_t ref_bits; // The number of bits per leaf 1272 uint64_t max_ref; // The maximum ref that we can accept; 1273 uint64_t num_holes; 1274 uint64_t num_nats; 1275 } FragLoadSt; 1276 1277 typedef struct load_fragtree_result { 1278 treenode_t tree; 1279 uint32_t leaves; 1280 } FragRes; 1281 1282 static FragRes load_fragtree(FragLoadSt *s); 1283 1284 static inline FragVal 1285 load_fragment(FragLoadSt *s) { 1286 FragRes hed = load_fragtree(s); 1287 FragRes tel = load_fragtree(s); 1288 1289 uint32_t leaves = hed.leaves + tel.leaves; 1290 1291 FragVal fv = { .head=hed.tree, .tail=tel.tree, .leaves=leaves }; 1292 return fv; 1293 } 1294 1295 static FragRes 1296 load_fragtree(FragLoadSt *s) { 1297 int refbits = s->ref_bits; 1298 1299 showbits("mor", 64, (64-s->red), (s->acc >> s->red)); debugs("\n"); 1300 1301 uint64_t bit = (s->acc >> s->red) & 1; 1302 1303 s->red = (s->red + 1) % 64; 1304 1305 showbits("bit", 1, 1, bit); 1306 showbits("mor", 64, (64-s->red), (s->acc >> s->red)); debugs("\n"); 1307 1308 // TODO: Haskell approach is better, actually. Don't guard 1309 // against end of buffer here, just use "current word = 0" 1310 // if we are past. Validate at the end by checking that the 1311 // pointer didn't overflow. 1312 1313 if (!s->red) { 1314 if (!s->rem) die("Internal error: not enough space\n"); 1315 s->rem--; 1316 s->acc = *(s->ptr)++; 1317 } 1318 1319 showbits("mor", 64, (64-s->red), (s->acc >> s->red)); debugs("\n"); 1320 1321 // TODO: Haskell approach is better, actually. Use clz to count 1322 // 1s, instead of creating individual masks per one-bit. 1323 if (bit) { 1324 debugs("cell\n"); 1325 FragVal res = load_fragment(s); 1326 uint32_t depth = 1 + MAX( s->ctx->depths[res.head.ix] 1327 , s->ctx->depths[res.tail.ix] 1328 ); 1329 treenode_t tr = alloc_treenode(s->ctx, TAG_PAIR(res.head, res.tail), depth); 1330 return (FragRes){ .tree=tr, .leaves=res.leaves }; 1331 } 1332 1333 debugs("leaf\n"); 1334 debugf("refbits=%u\n", refbits); 1335 1336 // - Read n bits. 1337 // - n is the bit-width of the maximum backref at this point. 1338 uint64_t leaf_mask = (1 << refbits) - 1; 1339 uint64_t leaf = (s->acc >> s->red) & leaf_mask; 1340 1341 debugf( 1342 "\tacc=%"PRIu64" red=%"PRIu64" | mask=%"PRIu64" leaf=%"PRIu64"\n", 1343 s->acc, s->red, leaf_mask, leaf 1344 ); 1345 1346 int oldred = s->red; 1347 debugf("[[refbits=%d oldred=%d]]\n", refbits, oldred); 1348 1349 s->red += refbits; 1350 1351 if (s->red >= 64) { 1352 int extra = (oldred + refbits) - 64; 1353 int remain = 64-extra; 1354 int already = refbits - extra; 1355 1356 uint64_t nex = s->ptr[0]; 1357 1358 uint64_t why = nex & ((1ULL << extra) - 1ULL); 1359 uint64_t more = why << already; 1360 1361 debugf( 1362 "[nex=%"PRIu64" extra=%d remain=%d already=%d more=%"PRIu64" why=%"PRIu64"]\n", 1363 nex, extra, remain, already, more, why 1364 ); 1365 1366 leaf |= more; 1367 1368 s->red -= 64; 1369 s->acc = nex; 1370 s->rem--; 1371 s->ptr++; 1372 } 1373 1374 if (leaf > s->max_ref) { 1375 die("leaf val is out-of-bounds (%"PRIu64")\n", leaf); 1376 } 1377 1378 treenode_value v; 1379 bool is_frag = false; 1380 1381 if (leaf < s->num_holes) { 1382 debugf("got a pin: %ld\n", leaf); 1383 v = TAG_PIN(leaf); 1384 } else { 1385 leaf -= s->num_holes; 1386 if (leaf < s->num_nats) { 1387 debugf("got a nat: %ld\n", leaf); 1388 v = TAG_NAT((nat_t){leaf}); 1389 } else { 1390 debugs("got a frag\n"); 1391 leaf -= s->num_nats; 1392 v = TAG_FRAG((frag_t){leaf}); 1393 is_frag = true; 1394 } 1395 } 1396 1397 uint32_t depth = 0; 1398 1399 if (is_frag) { 1400 uint32_t frag_ix = leaf; 1401 FragVal v = s->ctx->frags[frag_ix]; 1402 depth = 1 + MAX(s->ctx->depths[v.head.ix], s->ctx->depths[v.tail.ix]); 1403 } 1404 1405 treenode_t t = alloc_treenode(s->ctx, v, depth); 1406 1407 return (FragRes){ .tree = t, .leaves = 0 }; 1408 } 1409 1410 1411 /* 1412 Note that for indirect atoms we do not copy the data, we just 1413 retain the pointer that we are given. 1414 1415 The livetime of that data must extend until the context is wiped 1416 or freed. 1417 */ 1418 void seed_load(Seed ctx, size_t wid, uint8_t *top) { 1419 struct ser st = { .buf = top, .wid = wid }; 1420 1421 if (st.wid < 40) { 1422 die("Input buffer is too small to include a seed header\n"); 1423 } 1424 1425 if (st.wid % 8) { 1426 die("Input buffer must contain a multiple of 8 bytes.\n"); 1427 } 1428 1429 uint64_t *header = (void*) st.buf; 1430 1431 uint64_t num_holes = header[0]; 1432 uint64_t num_bigs = header[1]; 1433 uint64_t num_words = header[2]; 1434 uint64_t num_bytes = header[3]; 1435 uint64_t num_frags = header[4]; 1436 1437 if (DEBUG) printf("\t(setting up %"PRIu64" holes)\n", num_holes); 1438 for (int i=0; i < num_holes; i++) seed_hole(ctx); 1439 1440 int header_size = 40 + (8 * num_bigs); 1441 1442 if (st.wid < header_size) { 1443 die("input buffer too small to include bignat widths\n"); 1444 } 1445 1446 if (DEBUG) printf("\t(loading %"PRIu64" bignat widths)\n", num_bigs); 1447 uint64_t bigwidths[num_bigs]; 1448 for (int i=0; i<num_bigs; i++) { bigwidths[i] = header[5+i]; } 1449 1450 st.buf += header_size; 1451 st.wid -= header_size; 1452 1453 if (DEBUG) { 1454 printf("num_holes = %"PRIu64"\n", num_holes); 1455 printf("num_bytes = %"PRIu64"\n", num_bytes); 1456 printf("num_words = %"PRIu64"\n", num_words); 1457 printf("num_bigs = %"PRIu64"\n", num_bigs); 1458 printf("num_frags = %"PRIu64"\n", num_frags); 1459 for (int i=0; i<num_bigs; i++) { 1460 printf("bignat_width[%d] = %ld\n", i, bigwidths[i]); 1461 } 1462 } 1463 1464 if (DEBUG) printf("\t(loading %"PRIu64" bigs)\n", num_bigs); 1465 for (int i=0; i<num_bigs; i++) { 1466 uint64_t wid = bigwidths[i]; // TODO wid<2 is an error 1467 uint32_t nix = alloc_nat(ctx).ix; 1468 uint64_t nex = wid-1; 1469 uint64_t *buf = (uint64_t*) st.buf; 1470 uint64_t msw = buf[nex]; 1471 ctx->nats[nix] = (leaf_t){msw, nex, buf, 0}; 1472 st.wid -= (8*wid); 1473 st.buf += (8*wid); 1474 } 1475 1476 if (DEBUG) printf("\t(loading %"PRIu64" words)\n", num_words); 1477 for (int i=0; i<num_words; i++) { 1478 uint64_t word = ((uint64_t*)st.buf)[0]; 1479 uint32_t nix = alloc_nat(ctx).ix; 1480 ctx->nats[nix] = (leaf_t){word, 0, NULL, 0}; 1481 st.buf += 8; 1482 st.wid -= 8; 1483 } 1484 1485 if (DEBUG) printf("\t(loading %"PRIu64" bytes)\n", num_bytes); 1486 for (int i=0; i<num_bytes; i++) { 1487 uint64_t byte = st.buf[i]; 1488 uint32_t nix = alloc_nat(ctx).ix; 1489 ctx->nats[nix] = (leaf_t){byte, 0, NULL, 0}; 1490 } 1491 st.wid -= num_bytes; 1492 st.buf += num_bytes; 1493 1494 uint64_t num_nats = num_bytes + num_words + num_bigs; 1495 1496 if (DEBUG) printf("\t(loading %"PRIu64" frags)\n", num_frags); 1497 if (num_frags) { 1498 int used = (st.buf - top); 1499 int clif = used % 8; 1500 1501 if (st.wid < (8 - clif)) { 1502 die("Internal error: not enough bits remain to serialize frags\n"); 1503 } 1504 1505 uint64_t red, acc, *ptr, rem; 1506 1507 // If reading the leaves left us in a place where we 1508 // are not word-aligned, then bump the pointer back to 1509 // the start of the last word. We start reading the 1510 // accumulator at the bit-offset `red`, so this doesn't 1511 // add any junk to the results. 1512 st.buf -= clif; 1513 st.wid += clif; 1514 1515 debugf("\tst.wid = %"PRIu64"\n", st.wid); 1516 debugf("\tst.buf = 0x%lx\n", (uint64_t) st.buf); 1517 1518 red = clif * 8; 1519 ptr = (uint64_t*) st.buf; 1520 acc = *ptr++; 1521 rem = (st.wid / 8) - 1; 1522 1523 FragLoadSt s = { 1524 .ctx = ctx, 1525 .ptr = ptr, 1526 .acc = acc, 1527 .red = red, 1528 .rem = rem, 1529 .ref_bits = 0, 1530 .max_ref = 0, 1531 .num_nats = num_nats, 1532 .num_holes = num_holes, 1533 }; 1534 1535 for (int i=0; i<num_frags; i++) { 1536 int num_refs = num_holes + num_nats + i; 1537 s.max_ref = num_refs - 1; 1538 s.ref_bits = word64_bits(s.max_ref); 1539 debugf("ref_bits: %ld\n", s.ref_bits); 1540 1541 debugs("\t[frag_loader_state]\n"); 1542 debugf("\tptr = 0x%016lx\n", (uint64_t) s.ptr); 1543 1544 debugs(" "); 1545 showbits("acc", 64, 64, s.acc); 1546 debugs("\n"); 1547 debugs(" "); 1548 showbits("mor", 64, (64-s.red), (s.acc >> s.red)); 1549 debugs("\n"); 1550 1551 debugf("\tred = %"PRIu64"\n", s.red); 1552 debugf("\trem = %d\n", s.rem); 1553 debugf("\tref_bits = %"PRIu64"\n", s.ref_bits); 1554 debugf("\tmax_ref = %"PRIu64"\n", s.max_ref); 1555 debugf("\tnum_nats = %"PRIu64"\n", s.num_nats); 1556 1557 FragVal fv = load_fragment(&s); 1558 frag_t frag = alloc_frag(ctx, fv); 1559 debugf("loaded frag %u\n", frag.ix); 1560 } 1561 1562 if (s.rem != 0) { 1563 // TODO: cleaner handling of this edge-case 1564 if (s.rem == -1 && s.red == 0); 1565 else { 1566 showbits("mor", 64, (64-s.red), (s.acc >> s.red)); debugs("\n"); 1567 showbits("mor", 64, 64, s.ptr[1]); debugs("\n"); 1568 die("EXTRA STUFF %d words unread!\n", s.rem); 1569 } 1570 } 1571 1572 if (s.acc >> s.red) { 1573 // TODO: cleaner handling of this edge-case 1574 if (s.rem == -1 && s.red == 0); 1575 else { 1576 showbits("mor", 64, (64-s.red), (s.acc >> s.red)); debugs("\n"); 1577 die("EXTRA BITS unread in last word: %lx\n", (s.acc >> s.red)); 1578 } 1579 } 1580 1581 } else { 1582 uint32_t num_leaves = num_holes + num_nats; 1583 1584 if (num_leaves == 0) { 1585 die("no leaves.\n"); 1586 } 1587 1588 if (num_leaves > 1) { 1589 die("No frags, but multiple leaves.\n"); 1590 } 1591 1592 if (num_holes == 0) { 1593 treenode_value v = TAG_NAT((nat_t){0}); 1594 alloc_treenode(ctx, v, 0); 1595 } 1596 1597 if (st.wid != 0) { 1598 debugf("EXTRA STUFF %"PRIu64" bytes unread!\n", st.wid); 1599 } 1600 } 1601 } 1602 1603 1604 1605 //////////////////////////////////////////////////////////////////////////////// 1606 // Testing ///////////////////////////////////////////////////////////////////// 1607 //////////////////////////////////////////////////////////////////////////////// 1608 1609 #if DEBUG 1610 static void showbits_(char *key, int wid, int num, uint64_t bits) { 1611 putchar('\t'); 1612 putchar(' '); 1613 if (key) debugf("%s:", key); 1614 putchar('('); 1615 1616 int extra = wid - num; 1617 for (int i=0; i<extra; i++) putchar('_'); 1618 1619 for (num--; num >= 0; num--) { 1620 putchar(((bits>>num)&1) ? '1' : '0'); 1621 } 1622 putchar(')'); 1623 putchar('\n'); 1624 } 1625 #endif 1626 1627 // This is just used for printing 1628 static uint8_t *leaf_bytes(leaf_t l, int *width) { 1629 if (l.msw == 0) { 1630 *width = 0; 1631 return NULL; 1632 } 1633 1634 int wid = l.nex + 1; 1635 1636 uint64_t *wbuf = calloc(8, wid); 1637 1638 // TODO: memcpy 1639 for (int i=0; i < l.nex; i++) { 1640 wbuf[i] = l.buf[i]; 1641 } 1642 1643 wbuf[l.nex] = l.msw; 1644 1645 uint8_t *bbuf = (void*) wbuf; 1646 1647 int byte_width = (wid * 8); 1648 while (bbuf[byte_width-1] == 0) byte_width--; 1649 1650 *width = byte_width; 1651 return (void*) bbuf; 1652 } 1653 1654 1655 static void print_nat(Seed ctx, nat_t nat) { 1656 leaf_t l = ctx->nats[nat.ix]; 1657 1658 if (l.nex == 0 && l.msw < 256) { 1659 printf("%ld", l.msw); 1660 return; 1661 } 1662 1663 int wid; 1664 uint8_t *bytes = leaf_bytes(l, &wid); 1665 1666 bool is_bar = false; 1667 bool is_string = true; 1668 int end = wid - 1; 1669 1670 if (bytes[end] == 1) end--, is_bar=true; 1671 1672 for (int i = end; i>=0; i--) { 1673 uint8_t byte = bytes[i]; 1674 if (isprint(byte)) continue; 1675 is_string=false; 1676 is_bar=false; 1677 break; 1678 } 1679 1680 if (is_bar) printf("b#"); 1681 1682 if (is_bar || is_string) { 1683 putchar('{'); 1684 fwrite(bytes, 1, (end+1), stdout); 1685 putchar('}'); 1686 free(bytes); 1687 return; 1688 } 1689 1690 free(bytes); 1691 1692 if (l.nex == 0) { 1693 printf("%ld", l.msw); 1694 } else { 1695 printf("0x%lx", l.msw); 1696 for (int i=l.nex; i>0; i--) { 1697 printf(".%016lx", l.buf[i-1]); 1698 } 1699 } 1700 } 1701 1702 static void print_tree_outline_list(Seed ctx, treenode_t tree) { 1703 treenode_value val = ctx->treenodes[tree.ix]; 1704 1705 uint32_t offset = (uint32_t) val.word; 1706 1707 switch (NODEVAL_TAG(val)) { 1708 case 4: die("impossible: 4 tag is not used"); 1709 case 5: printf("p%u", offset); break; 1710 case 6: printf("n%u", offset); break; 1711 case 7: printf("f%u", offset); break; 1712 default: { 1713 treenode_t hed = NODEVAL_HEAD(val); 1714 treenode_t tel = NODEVAL_TAIL(val); 1715 print_tree_outline_list(ctx, hed); 1716 putchar(' '); 1717 print_tree_outline(ctx, tel); 1718 } 1719 } 1720 } 1721 1722 static void print_tree_outline(Seed ctx, treenode_t tree) { 1723 treenode_value val = ctx->treenodes[tree.ix]; 1724 1725 uint32_t offset = (uint32_t) val.word; 1726 1727 switch (NODEVAL_TAG(val)) { 1728 case 4: die("impossible: 4 tag is not used"); 1729 case 5: printf("p%u", offset); break; 1730 case 6: printf("n%u", offset); break; 1731 case 7: printf("f%u", offset); break; 1732 default: { 1733 treenode_t hed = NODEVAL_HEAD(val); 1734 treenode_t tel = NODEVAL_TAIL(val); 1735 putchar('('); 1736 print_tree_outline_list(ctx, hed); 1737 putchar(' '); 1738 print_tree_outline(ctx, tel); 1739 putchar(')'); 1740 } 1741 } 1742 } 1743 1744 static void print_tree_list(Seed ctx, treenode_t tree) { 1745 treenode_value val = ctx->treenodes[tree.ix]; 1746 1747 switch (NODEVAL_TAG(val)) { 1748 case 4: die("impossible: 4 tag is not used"); 1749 case 5: printf("p%d", NODEVAL_PIN(val)); break; 1750 case 6: print_nat(ctx, NODEVAL_NAT(val)); break; 1751 case 7: { 1752 FragVal frag = ctx->frags[NODEVAL_FRAG(val).ix]; 1753 print_tree_list(ctx, frag.head); 1754 putchar(' '); 1755 print_tree(ctx, frag.tail); 1756 break; 1757 } 1758 default: { 1759 treenode_t hed = NODEVAL_HEAD(val); 1760 treenode_t tel = NODEVAL_TAIL(val); 1761 print_tree_list(ctx, hed); 1762 putchar(' '); 1763 print_tree(ctx, tel); 1764 } 1765 } 1766 } 1767 1768 void print_tree_pub(Seed ctx, treenode_t tree) { 1769 print_tree(ctx, tree); 1770 } 1771 1772 static void print_tree(Seed ctx, treenode_t tree) { 1773 treenode_value val = ctx->treenodes[tree.ix]; 1774 1775 switch (NODEVAL_TAG(val)) { 1776 case 4: die("impossible: four tag is not used"); 1777 case 5: printf("p%d", NODEVAL_PIN(val)); break; 1778 case 6: print_nat(ctx, NODEVAL_NAT(val)); break; 1779 case 7: print_fragment(ctx, NODEVAL_FRAG(val)); break; 1780 default: { 1781 treenode_t hed = NODEVAL_HEAD(val); 1782 treenode_t tel = NODEVAL_TAIL(val); 1783 putchar('('); 1784 print_tree_list(ctx, hed); 1785 putchar(' '); 1786 print_tree(ctx, tel); 1787 putchar(')'); 1788 } 1789 } 1790 } 1791 1792 static void print_fragment_outline(Seed ctx, frag_t ref) { 1793 FragVal frag = ctx->frags[ref.ix]; 1794 putchar('('); 1795 print_tree_outline_list(ctx, frag.head); 1796 putchar(' '); 1797 print_tree_outline(ctx, frag.tail); 1798 putchar(')'); 1799 } 1800 1801 static void print_fragment(Seed ctx, frag_t ref) { 1802 FragVal frag = ctx->frags[ref.ix]; 1803 putchar('('); 1804 print_tree_list(ctx, frag.head); 1805 putchar(' '); 1806 print_tree(ctx, frag.tail); 1807 putchar(')'); 1808 } 1809 1810 1811 static void seed_debug_leaves(Seed ctx, bool details) { 1812 printf("\n\tleaves: (width=%u, count=%u)\n\n", 1813 ctx->leaves_table_width, 1814 ctx->leaves_table_count); 1815 1816 int wid = ctx->leaves_table_width; 1817 1818 uint64_t mask = ctx->leaves_table_width - 1; 1819 1820 for (int i=0; i<wid; i++) { 1821 LeafEntry ent = ctx->leaves_table[i]; 1822 1823 // Empty slot 1824 if (ent.ptr.ix == UINT32_MAX) continue; 1825 1826 printf("\t\t%4d = ", i); 1827 print_tree_outline(ctx, ent.ptr); 1828 printf("\t(val="); 1829 print_tree(ctx, ent.ptr); 1830 printf(")\n"); 1831 1832 if (details) { 1833 uint64_t width = (ent.leaf.nex + 1); 1834 uint64_t bin = ent.leaf.hax & mask; 1835 uint64_t distance = (((uint64_t)i) - bin); 1836 printf("\t\t bytes: %-4lu\n", width); 1837 printf("\t\t bin: %"PRIu64" [dist=%"PRIu64"]\n", 1838 bin, distance); 1839 printf("\t\t hash: 0x%016lx\n", ent.leaf.hax); 1840 } 1841 } 1842 } 1843 1844 1845 static void seed_debug_interior_nodes(Seed ctx) { 1846 printf("\n\tinterior_nodes_table: (width=%u, count=%u)\n\n", 1847 ctx->nodes_table_width, 1848 ctx->nodes_table_count); 1849 1850 int wid = ctx->nodes_table_width; 1851 1852 uint64_t mask = ctx->nodes_table_width - 1; 1853 1854 for (int i=0; i<wid; i++) { 1855 NodeEntry ent = ctx->nodes_table[i]; 1856 1857 // Empty slot 1858 if (ent.val.word == UINT64_MAX) continue; 1859 1860 uint64_t bin = ent.hax & mask; 1861 uint64_t distance = (((uint64_t)i) - bin); 1862 1863 printf( 1864 "\t\t%4d = t%u\tbin=%"PRIu64"\tdist=%"PRIu64"\thash=%08x\n", 1865 i, ent.ptr.ix, bin, distance, ent.hax 1866 ); 1867 } 1868 } 1869 1870 1871 void seed_dbug(Seed ctx) { 1872 printf("\nseed_debug():\n"); 1873 1874 seed_debug_leaves(ctx, false); 1875 1876 seed_debug_interior_nodes(ctx); 1877 1878 { 1879 printf("\n\tnats: (width=%u, count=%u)\n\n", 1880 ctx->nats_width, 1881 ctx->nats_count); 1882 int num = ctx->nats_count; 1883 for (int i=0; i<num; i++) { 1884 printf("\t\tn%d = ", i); 1885 print_nat(ctx, (nat_t){ .ix = i }); 1886 printf("\n"); 1887 } 1888 } 1889 1890 { 1891 printf("\n\ttreenodes: (width=%u, count=%u)\n\n", 1892 ctx->treenodes_width, 1893 ctx->treenodes_count); 1894 int num = ctx->treenodes_count; 1895 for (int i=0; i<num; i++) { 1896 int ref = ctx->refcounts[i]; 1897 treenode_value val = ctx->treenodes[i]; 1898 switch (NODEVAL_TAG(val)) { 1899 case 4: 1900 die("impossible: 4 tag is not used"); 1901 continue; 1902 case 5: 1903 printf("\t\tt%d = p%u\t\trefs=%u\n", i, (uint32_t) val.word, ref); 1904 continue; 1905 case 6: 1906 printf("\t\tt%d = n%u\t\trefs=%u\n", i, (uint32_t) val.word, ref); 1907 continue; 1908 case 7: 1909 printf("\t\tt%d = f%u\t\trefs=%u\n", i, (uint32_t) val.word, ref); 1910 continue; 1911 default: { 1912 treenode_t hed = NODEVAL_HEAD(val); 1913 treenode_t tel = NODEVAL_TAIL(val); 1914 uint32_t deep = ctx->depths[i]; 1915 printf("\t\tt%d = (%u, %u)\trefs=%u\tdeep=%u\n", i, hed.ix, tel.ix, ref, deep); 1916 continue; 1917 } 1918 } 1919 } 1920 } 1921 1922 printf("\nSeed Fragments:\n\n"); 1923 1924 uint32_t count = ctx->frags_count; 1925 for (int i=0; i<count; i++) { 1926 FragVal cur = ctx->frags[i]; 1927 printf("\tFragment[%d]:\n\n\t\t(t%d, t%d) = ", i, cur.head.ix, cur.tail.ix); 1928 print_fragment_outline(ctx, (frag_t){i}); 1929 1930 printf("\n\n\t\t "); 1931 print_fragment(ctx, (frag_t){i}); 1932 printf("\n\n"); 1933 } 1934 } 1935 1936 void seed_show(Seed ctx) { 1937 uint32_t frags = ctx->frags_count; 1938 1939 if (frags) { 1940 print_fragment(ctx, (frag_t){ .ix = (frags-1) }); 1941 } else { 1942 1943 if (!ctx->treenodes_count) { 1944 die("No frags and no leaves\n"); 1945 } 1946 1947 if (ctx->treenodes_count > 1) { 1948 die("No frags but %d leaves. Nonsense!\n", ctx->treenodes_count); 1949 } 1950 1951 print_tree(ctx, (treenode_t){0}); 1952 } 1953 1954 putchar('\n'); 1955 }