hitch.sire (26863B)
1 ; Copyright 2023 The Plunder Authors 2 ; Use of this source code is governed by a BSD-style license that can be 3 ; found in the LICENSE file. 4 5 #### hitch <- parallel 6 7 ;;;; HitchHiker Trees 8 ;;;; ================ 9 ;;;; 10 ;;;; The shared code for hitchhiker trees. 11 12 13 ;;; Imports ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 14 15 :| sire ; 16 :| stew ; 17 :| parallel ; 18 :| mutrec [{#mutual}] 19 20 21 ;;; Util ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 22 23 ;; Given an index and a hitchhiker object, {packIndexNode} creates a 24 ;; packed representation. 25 ;; 26 ;; Indexes' subnodes are wrapped in pins so that we don't have to write 27 ;; parts of trees which haven't changed during snapshotting. 28 29 = (packIndexNode [keys nodes] hh) 30 | PIN [keys nodes hh] 31 32 > Row a > Maybe (a, Row a) 33 = (rowUncons r) 34 | if (null r) NONE 35 | SOME [(idx 0 r) (drop 1 r)] 36 37 =?= NONE (rowUncons []) 38 =?= (SOME [0 [1 2 3]]) (rowUncons [0 1 2 3]) 39 40 > Row a > Maybe (Row a, a) 41 = (rowUnsnoc r) 42 @ l | len r 43 | ifz l NONE 44 @ minusOne | dec l 45 | SOME [(take minusOne r) (idx minusOne r)] 46 47 =?= NONE (rowUnsnoc []) 48 =?= (SOME [[0 1 2] 3]) (rowUnsnoc [0 1 2 3]) 49 50 51 ;;; TreeFun ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 52 53 # record TreeFun 54 | TREE_FUN 55 * mkNode : Any 56 * mkLeaf : Any 57 * caseNode : Any 58 * leafInsert : Any 59 * leafMerge : Any 60 * leafLength : Any 61 * leafSplitAt : Any 62 * leafFirstKey : Any 63 * leafEmpty : Any 64 * leafDelete : Any 65 * hhMerge : Any 66 * hhLength : Any 67 * hhSplit : Any 68 * hhEmpty : Any 69 * hhDelete : Any 70 71 72 ;;; Index ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 73 74 ;; A {HitchNode} is either [index hitchhikers] or {LEAFNODE}, where a 75 ;; leaf is a map or a set. 76 77 # data HitchNode 78 - INDEXNODE idx/Any hh/Any 79 - LEAFNODE leaf/Any 80 81 abstype#(Index k v) ; TODO: (???) 82 abstype#(LazyIndex k v) ; TODO: (???) 83 84 ;; An {Index} is a row of [a row of keys, a row of subnodes] 85 86 > Index k v 87 = emptyIndex [[] []] 88 89 ;; {mergeIndex} merges two indexes with the key to put between the two 90 ;; of them. 91 92 > Index k v > k > Index k v > Index k v 93 = (mergeIndex [lKeys lVals] middle [rKeys rVals]) 94 [(cat [lKeys [middle] rKeys]) (weld lVals rVals)] 95 96 !! | eql [[5] [%a %b]] 97 | mergeIndex [[] [{a}]] 5 [[] [{b}]] 98 99 > v > Index k v 100 = (singletonIndex val) [[] [val]] 101 102 ;; {fromSingletonIndex} assumes that v is not a Nat. This holds in the 103 ;; current implementation. 104 105 > Index k v > Maybe v 106 = (fromSingletonIndex [_ vals]) 107 | if (eql 1 (len vals)) (get vals 0) 0 108 109 > Index k v > Nat 110 = (indexKeyLen [keys _]) 111 | len keys 112 113 > Index k v > Nat 114 = (indexValLen [_ vals]) 115 | len vals 116 117 ;; Given an index, {splitIndexAt} splits it into two so the left index has 118 ;; {numLeftKeys} and the right contains the rest. Also returns the pivot key. 119 120 > Nat > Index k v > (Index k v, k, Index k v) 121 = (splitIndexAt numLeftKeys [keys vals]) 122 @ leftKeys | take numLeftKeys keys 123 @ middleKeyAndRightKey | drop numLeftKeys keys 124 @ numPlusOne | inc numLeftKeys 125 @ leftVals | take numPlusOne vals 126 @ rightVals | drop numPlusOne vals 127 | ifz (len middleKeyAndRightKey) (die %splitIndexAtEmpty) 128 @ middleKey | get middleKeyAndRightKey 0 129 @ rightKeys | drop 1 middleKeyAndRightKey 130 [[leftKeys leftVals] middleKey [rightKeys rightVals]] 131 132 !! | eql [[[] [[%a]]] %b [[] [[%b]]]] 133 | splitIndexAt 0 [[{b}] [[{a}] [{b}]]] 134 135 ;; {extendIndex}, when given a pure index with no hitchhikers, creates 136 ;; a node. 137 138 > TreeFun > Nat > Index k v > Index k v 139 = (extendIndex treeFun maxIndexKeys idx) 140 @ TREE_FUN(..) treeFun 141 @ maxIndexVals | inc maxIndexKeys 142 ^ _ idx 143 ? (loop idx) 144 @ numVals | **indexValLen idx 145 | if (lte numVals maxIndexVals) 146 | **singletonIndex | mkNode idx hhEmpty 147 | if (lte numVals | mul 2 maxIndexVals) 148 @ pos | dec | div numVals 2 149 @ [lIdx middleKey rIdx] | splitIndexAt pos idx 150 @ !leftNode | mkNode lIdx hhEmpty 151 @ !rightNode | mkNode rIdx hhEmpty 152 [[middleKey] [leftNode rightNode]] 153 @ [lIdx middleKey rIdx] | splitIndexAt maxIndexVals idx 154 @ ls | **singletonIndex | mkNode lIdx hhEmpty 155 | mergeIndex ls middleKey | loop rIdx 156 157 = (valView key [keys vals]) 158 @ [leftKeys rightKeys] | span a&(lte a key) keys 159 @ n | len leftKeys 160 @ [leftVals valAndRightVals] | splitAt n vals 161 | maybeCase | rowUncons valAndRightVals 162 | die {valView: can't split empty index} 163 & [val rightVals] 164 [[leftKeys leftVals rightKeys rightVals] val] 165 166 = (leftView [leftKeys leftVals rightKeys rightVals]) 167 | maybeCase (rowUnsnoc leftVals) NONE 168 & [leftVals leftVal] 169 | maybeCase (rowUnsnoc leftKeys) NONE 170 & [leftKeys leftKey] 171 @ newCtx [leftKeys leftVals rightKeys rightVals] 172 | SOME [newCtx leftVal leftKey] 173 174 = (rightView [leftKeys leftVals rightKeys rightVals]) 175 | maybeCase (rowUncons rightVals) NONE 176 & [rightVal rightVals] 177 | maybeCase (rowUncons rightKeys) NONE 178 & [rightKey rightKeys] 179 @ newCtx [leftKeys leftVals rightKeys rightVals] 180 | SOME [rightKey rightVal newCtx] 181 182 = (putVal [leftKeys leftVals rightKeys rightVals] val) 183 ++ weld leftKeys rightKeys 184 ++ cat [leftVals [val] rightVals] 185 186 = (putIdx [leftKeys leftVals rightKeys rightVals] [keys vals]) 187 ++ cat [leftKeys keys rightKeys] 188 ++ cat [leftVals vals rightVals] 189 190 ;; {findSubnodeByKey} finds the value needed when recursing downwards. 191 192 = (findSubnodeByKey key [keys vals]) 193 | get vals 194 @ b | bsearch key keys 195 @ found | mod b 2 196 @ idx | rsh b 1 197 | add found idx 198 199 200 ;;; Leaf ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 201 202 > TreeFun > Nat > Index k n 203 = (splitLeafMany treeFun maxLeafItems items) 204 @ TREE_FUN(..) treeFun 205 @ itemLen | leafLength items 206 ; leaf items don't overflow a single node 207 | if (lte itemLen maxLeafItems) 208 | **singletonIndex | mkLeaf items 209 ; we have to split, but only into two nodes 210 | if (lte itemLen | mul 2 maxLeafItems) 211 @ numLeft | div itemLen 2 212 @ [lLeaf rLeaf] | leafSplitAt numLeft items 213 @ rightFirstItem | leafFirstKey rLeaf 214 [[rightFirstItem] [(mkLeaf lLeaf) (mkLeaf rLeaf)]] 215 ; we have to split the node into more than two nodes 216 @ (fixup [keys vals]) 217 [keys (map mkLeaf vals)] 218 ^ fixup (_ items NIL NIL) 219 ? (loop items keys leafs) 220 @ itemLen | leafLength items 221 | if (gth itemLen | mul 2 maxLeafItems) 222 @ [leaf rem] | leafSplitAt maxLeafItems items 223 @ key | leafFirstKey rem 224 | loop rem (CONS key keys) (CONS leaf leafs) 225 | if (gth itemLen maxLeafItems) 226 @ numLeft | div itemLen 2 227 @ [left right] | leafSplitAt numLeft items 228 @ key | leafFirstKey right 229 | loop leafEmpty (CONS key keys) (CONS right (CONS left leafs)) 230 | ifz itemLen 231 [(listToRowRev keys) (listToRowRev leafs)] 232 | die %leafConstraintViolation 233 234 235 ;;; TreeConfig ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 236 237 # record TreeConfig 238 | TREE_CONFIG 239 * minFanout : Any 240 * maxFanout : Any 241 * minIdxKeys : Any 242 * maxIdxKeys : Any 243 * minLeafItems : Any 244 * maxLeafItems : Any 245 * maxHitchhikers : Any 246 247 ;; {twoThreeConfig} is a Testing TreeConfig which overflows quickly. 248 249 = twoThreeConfig 250 @ minFanout 2 251 @ maxFanout | dec | mul 2 minFanout 252 | TREE_CONFIG 253 * minFanout 254 * maxFanout 255 * dec minFanout 256 * dec maxFanout 257 * minFanout 258 * maxFanout 259 * minFanout 260 261 ;; {largeConfig} is a config with larger corfficients for more realistic 262 ;; testing. 263 264 = largeConfig 265 @ minFanout 64 266 @ maxFanout | dec | mul 2 minFanout 267 | TREE_CONFIG 268 * minFanout 269 * maxFanout 270 * dec minFanout 271 * dec maxFanout 272 * minFanout 273 * maxFanout 274 * minFanout 275 276 277 ;;; Tree ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 278 279 = (fixup treeConfig treeFun index) 280 @ TREE_CONFIG(..) treeConfig 281 @ !newRootNode | fromSingletonIndex index 282 | ifz newRootNode 283 @ !index | extendIndex treeFun maxLeafItems index 284 | fixup treeConfig treeFun index 285 newRootNode 286 287 # mutual insertRect_distributeDownwards 288 289 = (insertRec treeConfig treeFun toAdd node) 290 @ TREE_CONFIG(..) treeConfig 291 @ TREE_FUN(..) treeFun 292 293 # case (caseNode node) 294 295 - INDEXNODE children hitchhikers 296 297 @ !merged | hhMerge hitchhikers toAdd 298 | if (gth (hhLength merged) maxHitchhikers) 299 300 @ !distrib 301 | distributeDownwards treeConfig treeFun merged children 302 303 ; We have reached the maximum number of hitchhikers, we now 304 ; need to flush these downwards. 305 | extendIndex treeFun maxLeafItems distrib 306 307 | else 308 309 ; All we must do is rebuild the node with the new k/v pair 310 ; added on as a hitchhiker to this node. 311 | **singletonIndex | mkNode children merged 312 313 - LEAFNODE items 314 @ !inserted (leafInsert items toAdd) 315 | splitLeafMany treeFun maxLeafItems inserted 316 317 = (distributeDownwards treeConfig treeFun hitchhikers index) 318 319 @ TREE_FUN(..) treeFun 320 | ifz (hhLength hitchhikers) index 321 @ [keys vals] index 322 @ keyList | listFromRow keys 323 @ splitHH | downSplit treeFun keyList hitchhikers 324 @ indexList | parList 325 | listMap (downPush treeConfig treeFun) 326 | listZip splitHH 327 | listFromRow vals 328 @ [!lkeys !lvals] | joinIndex keyList indexList 329 [(listToRow lkeys) (cat | listToRow lvals)] 330 331 ; implementation details of insertRec/distributeDownwards 332 = (downSplit treeFun l hh) 333 : key keys < listCase l ~[hh] 334 @ TREE_FUN(..) treeFun 335 @ [!toAdd !rest] | hhSplit key hh 336 | CONS toAdd | downSplit treeFun keys rest 337 338 = (downPush treeConfig treeFun [hh node]) 339 @ TREE_FUN(..) treeFun 340 | ifz (hhLength hh) 341 | **singletonIndex node 342 | insertRec treeConfig treeFun hh node 343 344 = (joinIndex kl il) 345 # case kl 346 - NIL # case il 347 - NIL | (NIL, NIL) 348 - CONS [keys vals] _ | (listFromRow keys, ~[vals]) 349 - CONS k ks 350 # case il 351 - NIL (die {missing index in joinIndex}) 352 - CONS [keys vals] ts 353 @ [keyrest valrest] | joinIndex ks ts 354 @ !kout | listWeld (listFromRow keys) (CONS k keyrest) 355 @ !vout | CONS vals valrest 356 [kout vout] 357 358 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 359 360 = (splitHitchhikersByKeys treeFun keys hh) 361 @ TREE_FUN(..) treeFun 362 @ l | len keys 363 ^ unfoldr _ [0 hh] 364 & [i hh] 365 | if (eql i l) | SOME [hh [(inc i) hhEmpty]] 366 | if (gth i l) | NONE 367 @ [!cur !rest] | hhSplit (idx i keys) hh 368 | SOME [cur [(inc i) rest]] 369 370 ;; {getLeafNode} Forces a flush of all hitchhikers down to the leaf levels 371 ;; and return the resulting leaf vectors. 372 373 = (getLeafRow treeFun node) 374 @ TREE_FUN(..) treeFun 375 ^ _ hhEmpty node 376 ? (go_openTreeFun hh node) 377 # case (caseNode node) 378 - LEAFNODE leaves 379 @ !item (leafInsert leaves hh) 380 [item] 381 - INDEXNODE [keys vals] hitchhikers 382 @ !merged (hhMerge hitchhikers hh) 383 @ splitHH | splitHitchhikersByKeys treeFun keys merged 384 | cat 385 | map [hh node]&(go_openTreeFun hh node) | zip splitHH vals 386 387 = (nodeNeedsMerge config treeFun node) 388 @ TREE_CONFIG(..) config 389 @ TREE_FUN(..) treeFun 390 # case (caseNode node) 391 - INDEXNODE index hitchhikers 392 | lth (indexKeyLen index) minIdxKeys 393 - LEAFNODE leaves 394 | lth (leafLength leaves) minLeafItems 395 396 = (mergeNodes config treeFun left middleKey right) 397 @ TREE_CONFIG(..) config 398 @ TREE_FUN(..) treeFun 399 ; TODO I wish I could collapse this into a single case like in Haskell. 400 # case (caseNode left) 401 - INDEXNODE leftIdx leftHH 402 # case (caseNode right) 403 - INDEXNODE rightIdx rightHH 404 @ !left | distributeDownwards config treeFun leftHH leftIdx 405 @ !right | distributeDownwards config treeFun rightHH rightIdx 406 @ !merged | mergeIndex left middleKey right 407 | extendIndex treeFun maxIdxKeys merged 408 - LEAFNODE _ 409 | die %nodeMismatch 410 - LEAFNODE leftLeaf 411 # case (caseNode right) 412 - LEAFNODE rightLeaf 413 @ !merged | leafMerge leftLeaf rightLeaf 414 | splitLeafMany treeFun maxLeafItems merged 415 - INDEXNODE _ _ 416 | die %nodeMismatch 417 418 ;; {maybeCaseBack} is just backwards {maybeCase}. This is easier in 419 ;; some usage patterns. 420 421 (**maybeCaseBack mb som non)=(maybeCase mb non som) 422 423 = (deleteRec config treeFun key mybV node) 424 @ TREE_CONFIG(..) config 425 @ TREE_FUN(..) treeFun 426 # case (caseNode node) 427 - LEAFNODE leaves 428 | mkLeaf | leafDelete key mybV leaves 429 - INDEXNODE index hitchhikers 430 @ [ctx child] | valView key index 431 @ newChild | deleteRec config treeFun key mybV child 432 @ childNeedsMerge | nodeNeedsMerge config treeFun newChild 433 @ prunedHH | hhDelete key mybV hitchhikers 434 | if | not childNeedsMerge 435 | mkNode (putVal ctx newChild) prunedHH 436 | maybeCaseBack | rightView ctx 437 & [rKey rChild rCtx] 438 | mkNode 439 | putIdx rCtx | mergeNodes config treeFun newChild rKey rChild 440 prunedHH 441 | maybeCaseBack | leftView ctx 442 & [lCtx lChild lKey] 443 | mkNode 444 | putIdx lCtx | mergeNodes config treeFun lChild lKey newChild 445 prunedHH 446 | die {deleteRec: node with single child} 447 448 449 ;;; HitchhikerMap ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 450 451 abstype#(HSet a) 452 abstype#(HMap k v) 453 abstype#(HSetMap k v) 454 455 (hhDeleteKey k _ t)=(tabDel k t) 456 457 = (hmEmpty config) | [config 0] 458 459 = (hmCaseNode pinnedNode) 460 @ !node (pinItem pinnedNode) 461 | if | isTab node 462 | LEAFNODE node 463 @ [keys nodes hh] node 464 | INDEXNODE [keys nodes] hh 465 466 = (hmSingleton config k v) 467 @ node | PIN | tabSing k v 468 [config node] 469 470 (tabUnionRightBiased x y)=(tabUnion y x) 471 472 = hhMapTF 473 ++ packIndexNode ; mkNode 474 ++ PIN ; mkLeaf 475 ++ hmCaseNode ; caseNode 476 ++ tabUnionRightBiased ; leafInsert 477 ++ tabUnionRightBiased ; leafMerge 478 ++ tabLen ; leafLength 479 ++ tabSplitAt ; leafSplitAt 480 ++ tabMinKey ; leafFirstKey 481 ++ #[] ; leafEmpty 482 ++ hhDeleteKey ; leafDelete 483 ++ tabUnionRightBiased ; hhMerge 484 ++ tabLen ; hhLength 485 ++ tabSplitLT ; hhSplit 486 ++ #[] ; hhEmpty 487 ++ hhDeleteKey ; hhDelete 488 489 = (hmSize [config r]) 490 | ifz r 0 491 | sumOf tabLen 492 | getLeafRow hhMapTF r 493 494 ;; TODO: {hmKeys} returns a set in the empty case, and a row-of-rows 495 ;; otherwise. Is that wrong? 496 497 = (hmKeys [config top]) 498 | ifz top %[] 499 | map tabKeysRow 500 | getLeafRow hhMapTF top 501 502 = (hmInsert k v [config top]) 503 @ p | tabSing k v 504 | ifz top | hmSingleton config k v 505 @ !index | insertRec config hhMapTF p top 506 @ !fixed | fixup config hhMapTF index 507 ++ config 508 ++ fixed 509 510 = (hmInsertMany tab [config top]) 511 | if | tabIsEmpty tab 512 [config top] 513 @ TREE_CONFIG(..) config 514 @ !index 515 | ifz top 516 | splitLeafMany hhMapTF maxLeafItems tab 517 | insertRec config hhMapTF tab top 518 @ !fixed | fixup config hhMapTF index 519 ++ config 520 ++ fixed 521 522 ;; TODO: case guard statements would make this much more readable. 523 524 = (hmDelete k [config r]) 525 | ifz r [config r] 526 @ newRootNode | deleteRec config hhMapTF k NONE r 527 # case (hmCaseNode newRootNode) 528 - LEAFNODE leaves 529 ++ config 530 ++ if (tabIsEmpty leaves) NONE (SOME newRootNode) 531 - INDEXNODE index hitchhikers 532 @ childNode | fromSingletonIndex index 533 | ifz childNode 534 [config newRootNode] 535 @ base [config childNode] 536 | if (tabIsEmpty hitchhikers) base 537 | hmInsertMany hitchhikers base 538 539 = (hmLookup key [config r]) 540 | ifz r NONE 541 ^ _ r 542 ? (lookInNode node) 543 # case (hmCaseNode node) 544 - INDEXNODE index hitchhikers 545 : v < maybeCase (tabLookup key hitchhikers) 546 (lookInNode | findSubnodeByKey key index) 547 | (SOME v) 548 - LEAFNODE items 549 | tabLookup key items 550 551 552 ;;; HitchhikerSet ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 553 554 = (hsDeleteItem k _ c) | setDel k c 555 = (hsEmpty config) | [config 0] 556 = (hsNull [config r]) | isZero r 557 = (hsRawNode [config r]) | r 558 559 = (hsCaseNode pinnedNode) 560 @ !node (pinItem pinnedNode) 561 | if | isSet node 562 | LEAFNODE node 563 @ [keys nodes hh] node 564 | INDEXNODE [keys nodes] hh 565 566 = (hsRawSingleton v) 567 | PIN (setSing v) 568 569 (hsSingleton config v)=[config (hsRawSingleton v)] 570 571 = hhSetTF 572 ++ packIndexNode ; mkNode 573 ++ PIN ; mkLeaf 574 ++ hsCaseNode ; caseNode 575 ++ setUnion ; leafInsert 576 ++ setUnion ; leafMerge 577 ++ setLen ; leafLength 578 ++ setSplitAt ; leafSplitAt 579 ++ setMin ; leafFirstKey 580 ++ %[] ; leafEmpty 581 ++ hsDeleteItem ; leafDelete 582 ++ setUnion ; hhMerge 583 ++ setLen ; hhLength 584 ++ setSplitLT ; hhSplit 585 ++ %[] ; hhEmpty 586 ++ hsDeleteItem ; hhDelete 587 588 = (hsRawInsert i config r) 589 @ is | setSing i 590 | ifz r | hsRawSingleton i 591 @ !index | insertRec config hhSetTF is r 592 @ !fixed | fixup config hhSetTF index 593 | fixed 594 595 (hsInsert i [config r])=[config (hsRawInsert i config r)] 596 597 = (hsRawInsertMany set config r) 598 | if (setIsEmpty set) r 599 @ TREE_CONFIG(..) config 600 @ !index 601 | ifz r | splitLeafMany hhSetTF maxLeafItems set 602 | insertRec config hhSetTF set r 603 | fixup config hhSetTF index 604 605 (hsInsertMany set [config r])=[config (hsRawInsertMany set config r)] 606 607 = (hsRawFromSet config c) 608 | if (setIsEmpty c) NONE 609 | hsRawInsertMany c config NONE 610 611 (hsFromSet config c)=[config (hsRawFromSet config c)] 612 613 = (hsToSet [config r]) 614 | ifz r %[] 615 | setCatRowAsc 616 | getLeafRow hhSetTF r 617 618 = (hsMember key [config r]) 619 | ifz r FALSE 620 ^ _ r 621 ? (lookInNode node) 622 # case (hsCaseNode node) 623 - INDEXNODE index hitchhikers 624 | if (setHas key hitchhikers) TRUE 625 | lookInNode | findSubnodeByKey key index 626 - LEAFNODE items 627 | setHas key items 628 629 = (hsRawDelete key config r) 630 | ifz r r 631 @ newRootNode | deleteRec config hhSetTF key NONE r 632 # case (hsCaseNode newRootNode) 633 - LEAFNODE leaves 634 | if (setIsEmpty leaves) 0 newRootNode 635 - INDEXNODE index hitchhikers 636 @ childNode | fromSingletonIndex index 637 | ifz childNode newRootNode 638 | if (setIsEmpty hitchhikers) childNode 639 | hsRawInsertMany hitchhikers config childNode 640 641 = (hsDelete key [config r]) 642 @ x | hsRawDelete key config r 643 [config x] 644 645 ;; TODO: Like with intersection, a real implementation of union needs 646 ;; to be built instead of just the simplistic {hsRawUnion}. 647 ;; 648 ;; What would a faster union look like? This implementation goes through all 649 ;; leaf sets and then iteratively unions them all together on each side and then 650 ;; unions that. Then it takes that megaset and splits it back up. That seems 651 ;; hella inefficient. 652 ;; 653 ;; Some ideas: 654 ;; 655 ;; - When one side is small, treat it as an insertRec into the other. 656 ;; 657 ;; - Operate on a list of set pieces. unlike intersection, you 658 ;; can't prevent touching the pins for every leaf so maintain using 659 ;; getLeafRow. But make the internal union operation work on 660 ;; {[Set k] -> [Set k] -> [Set k]} and then {consolidate} on that 661 ;; structure. That at minimum removes the megaunion at the end and 662 ;; then breaking that back up into smaller leaves. (It's probably 663 ;; faster everywhere else, but I'd have to double check...) 664 665 = (hsRawUnion aconfig ar br) 666 | ifz ar br 667 | ifz br ar 668 @ as | setCatRowAsc | getLeafRow hhSetTF ar 669 @ bs | setCatRowAsc | getLeafRow hhSetTF br 670 | hsRawFromSet aconfig 671 | setUnion as bs 672 673 = (hsUnion as bs) 674 @ [aconfig ar] as 675 @ [_ br] bs 676 [aconfig (hsRawUnion aconfig ar br)] 677 678 =?= 1 | hsNull | hsEmpty twoThreeConfig 679 =?= 0 | hsNull | hsSingleton twoThreeConfig 9 680 =?= 0 | hsNull | hsInsert 8 | hsSingleton twoThreeConfig 9 681 =?= 0 | hsNull | hsInsert 9 | hsSingleton twoThreeConfig 9 682 =?= 1 | hsNull | hsDelete 9 | hsSingleton twoThreeConfig 9 683 684 685 ;;; Hitchhiker Set New Intersect ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 686 687 ;; {getLeafList} is like getLeafRow, but returns a list instead. 688 689 ;> TreeFun > Index k v > List (Set k) 690 = (getLeafList treeFun node) 691 @ TREE_FUN(..) treeFun 692 ^ _ hhEmpty node 693 ? (go_openTreeFun hh node) 694 # case (caseNode node) 695 - LEAFNODE leaves 696 @ !item (leafInsert leaves hh) 697 | CONS item NIL 698 - INDEXNODE [keys vals] hitchhikers 699 @ !merged (hhMerge hitchhikers hh) 700 @ splitHH | splitHitchhikersByKeys treeFun keys merged 701 | listCat 702 | listMap [hh node]&(go_openTreeFun hh node) 703 | listFromRow 704 | zip splitHH vals 705 706 > List (Set k) > List (Set k) > List (Set k) 707 = (setlistIntersect ao bo) 708 | listCase ao NIL 709 & (a as) 710 | listCase bo NIL 711 & (b bs) 712 @ amin | setMin a 713 @ amax | setMax a 714 @ bmin | setMin b 715 @ bmax | setMax b 716 @ overlap | and lte-amin-bmax lte-bmin-amax 717 @ int | setIntersect a b 718 @ rest 719 | switch (cmp amax bmax) 0 720 ++ setlistIntersect as bo 721 ++ setlistIntersect as bs 722 ++ setlistIntersect ao bs 723 | if | and overlap (not | setIsEmpty int) 724 | CONS int rest 725 rest 726 727 =?= ~[] 728 | setlistIntersect 729 * ~[%[1 2 3] %[4 5 6]] 730 * ~[%[7 8 9] %[10 11 12]] 731 732 =?= ~[%[6] %[7]] 733 | setlistIntersect 734 * ~[%[4 5 6] %[7 8 9]] 735 * ~[%[6 7]] 736 737 =?= ~[%[2] %[4] %[9]] 738 | setlistIntersect 739 * ~[%[2] %[3] %[4 6] %[9]] 740 * ~[%[2] %[4 5] %[7 8 9]] 741 742 > Row (HSet k) > List (Set k) 743 = (hsMultiIntersect setRow) 744 | ifz (len setRow) NIL 745 | if (eql 1 | len setRow) 746 @ [_ node] | idx 0 setRow 747 | getLeafList hhSetTF node 748 ; 749 @ mybNodes | map hsRawNode setRow 750 | if (any isZero mybNodes) 751 NIL 752 ; 753 @ setNodes 754 | map (getLeafList hhSetTF) mybNodes 755 ; 756 ; manual foldl1 like 757 ^ _ (idx 0 setNodes) 1 (dec (len setNodes)) 758 ? (go acc i rem) 759 | ifNot rem acc 760 @ rem | dec rem 761 @ acc | setlistIntersect acc (idx i setNodes) 762 | seq acc 763 | go acc (inc i) rem 764 765 > Nat > List (Set a) > List (Set a) 766 = (lsDrop num sets) 767 | ifz num sets 768 | listCase sets NIL 769 & (x xs) 770 @ xl | setLen x 771 | if | gte num xl 772 | lsDrop (sub num xl) xs 773 | CONS 774 * setDrop num x 775 * xs 776 777 =?= ~[%[5]] | lsDrop 4 ~[%[1 2 3] %[4 5]] 778 779 > Nat > List (Set k) > List (Set k) 780 = (lsTake num sets) 781 | ifz num NIL 782 | listCase sets NIL 783 & (x xs) 784 @ xl | setLen x 785 | if | lth num xl ; num < xl 786 | CONS (setTake num x) NIL 787 | CONS 788 * x 789 * lsTake (sub num xl) xs 790 791 =?= ~[%[1 2 3]] | lsTake 3 ~[%[1 2 3] %[4 5]] 792 =?= ~[%[1 2 3] %[4]] | lsTake 4 ~[%[1 2 3] %[4 5]] 793 794 > List (Set k) > Nat 795 = (lsLen sets) 796 | listFoldl (i s)&(add i | setLen s) 0 sets 797 798 =?= 2 799 | lsLen (CONS %[4] (CONS %[5] NIL)) 800 801 > List (Set k) > List k 802 = (lsToList ls) 803 | listCat 804 | listMap setToList ls 805 806 807 ;;; HitchhikerSetMap ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 808 809 = (hsmEmpty mapConfig setConfig) 810 [mapConfig setConfig 0] 811 812 = (hsmCaseNode pinnedNode) 813 @ !node (pinItem pinnedNode) 814 | if | isTab node 815 | LEAFNODE node 816 @ [keys nodes hh] node 817 | INDEXNODE [keys nodes] hh 818 819 ;; In a {SetMap}, hitchhikers are a normal map of sets, but leaves are 820 ;; maps of raw hitchhikersets. 821 822 > TreeConfig > Tab k (HSet Nat) > Tab k (Set a) > Tab k (HSet Nat) 823 = (hsmLeafInsertImpl setConfig leaf hh) 824 @ (alt new m) 825 | SOME 826 | maybeCase m | hsRawFromSet setConfig new 827 & old 828 | hsRawInsertMany new setConfig old 829 @ (merge items k vset) 830 | tabAlter (alt vset) k items 831 | tabFoldlWithKey merge leaf hh 832 833 = (hsmLeafDeleteImpl setConfig k mybV hsm) 834 | maybeCase mybV (die %cantDeleteNoValue) 835 & v 836 @ (update in) 837 | maybeCase in NONE 838 & set 839 | SOME | hsRawDelete v setConfig set 840 | tabAlter update k hsm 841 842 = (hsmHHDeleteImpl k mybV sm) 843 | maybeCase mybV (die %cantDeleteNoValue) 844 & v 845 @ (update in) 846 | maybeCase in NONE 847 & set 848 | SOME | setDel v set 849 | tabAlter update k sm 850 851 ;; Since the point of hitchhikers is that they'll fit in an index node's 852 ;; grain, {hhSetMapLength} counts the number of items across all sets 853 ;; in the hitchhiker, not the number of tab entries. 854 855 (hhSetMapLength a)=(sumOf setLen | tabValsRow a) 856 857 = (hhSetMapTF setConfig) 858 ++ packIndexNode ; mkNode 859 ++ PIN ; mkLeaf 860 ++ hsmCaseNode ; caseNode 861 ++ hsmLeafInsertImpl setConfig ; leafInsert 862 ++ tabUnionWith (hsRawUnion setConfig) ; leafMerge 863 ++ tabLen ; leafLength 864 ++ tabSplitAt ; leafSplitAt 865 ++ tabMinKey ; leafFirstKey 866 ++ #[] ; leafEmpty 867 ++ hsmLeafDeleteImpl setConfig ; leafDelete 868 ++ tabUnionWith setUnion ; hhMerge 869 ++ hhSetMapLength ; hhLength 870 ++ tabSplitLT ; hhSplit 871 ++ #[] ; hhEmpty 872 ++ hsmHHDeleteImpl ; hhDelete 873 874 = (hsmInsert k v [mapConfig setConfig r]) 875 | ifz r 876 @ raw | hsRawSingleton v 877 @ leaf | tabSing k raw 878 ++ mapConfig 879 ++ setConfig 880 ++ PIN leaf 881 @ tf | hhSetMapTF setConfig 882 @ hh | tabSing k (setSing v) 883 @ !index | insertRec mapConfig tf hh r 884 @ !fixed | fixup mapConfig tf index 885 ++ mapConfig 886 ++ setConfig 887 ++ fixed 888 889 = (hsmInsertMany tabset [mapConfig setConfig r]) 890 | if | tabIsEmpty tabset 891 [mapConfig setConfig r] 892 @ tf | hhSetMapTF setConfig 893 @ !index 894 | ifz r 895 @ TREE_CONFIG(..) mapConfig 896 | splitLeafMany tf maxLeafItems 897 | tabMapWithKey (k v)&(hsRawFromSet setConfig v) tabset 898 | insertRec mapConfig tf tabset r 899 @ !fixed | fixup mapConfig tf index 900 ++ mapConfig 901 ++ setConfig 902 ++ fixed 903 904 = (hsmDelete k v [mapConfig setConfig r]) 905 | ifz r 906 [mapConfig setConfig r] 907 @ newRootNode | deleteRec mapConfig (hhSetMapTF setConfig) k (SOME v) r 908 # case (hsmCaseNode newRootNode) 909 - LEAFNODE leaves 910 ++ mapConfig 911 ++ setConfig 912 ++ if (tabIsEmpty leaves) 0 newRootNode 913 - INDEXNODE index hitchhikers 914 @ childNode | fromSingletonIndex index 915 | ifz childNode 916 [mapConfig setConfig newRootNode] 917 @ base [mapConfig setConfig childNode] 918 | if (tabIsEmpty hitchhikers) base 919 | hsmInsertMany hitchhikers base 920 921 ;; TODO: {hsmLookup} does a bunch of work each time which gets thrown 922 ;; away. We are likely to lookup the same items multiple times in a row, 923 ;; so a stateful, modifying hsmFlushAndLookup which modified itself so 924 ;; hitchhikers for that one 925 926 = (hsmLookup k [mapConfig setConfig r]) 927 @ TREE_CONFIG(..) mapConfig 928 | ifz r 929 | hsEmpty setConfig 930 ^ _ %[] r 931 ? (lookInNode !hh node) 932 # case (hsmCaseNode node) 933 - INDEXNODE children hitchhikers 934 @ matched | fromSome %[] (tabLookup k hitchhikers) 935 | lookInNode (setUnion hh matched) | findSubnodeByKey k children 936 - LEAFNODE items 937 : ret < **maybeCase (tabLookup k items) 938 [setConfig (hsRawFromSet setConfig hh)] 939 [setConfig (hsRawInsertMany hh setConfig ret)] 940 941 942 ;;; Exports ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 943 944 ^-^ 945 ^-^ HSet HMap HSetMap 946 ^-^ TreeConfig TREE_CONFIG 947 ^-^ 948 ^-^ ; shared configuration 949 ^-^ twoThreeConfig largeConfig 950 ^-^ 951 ^-^ ; hitchhikermap (hm) 952 ^-^ hmEmpty hmSingleton hmSize hmKeys hmInsert hmInsertMany hmDelete hmLookup 953 ^-^ 954 ^-^ ; hitchhikerset (hs) 955 ^-^ hsEmpty hsNull hsSingleton hsInsert hsInsertMany hsDelete hsToSet hsFromSet 956 ^-^ hsMember hsUnion 957 ^-^ hsMultiIntersect 958 ^-^ 959 ^-^ ; listset (ls) 960 ^-^ lsDrop lsTake lsLen lsToList 961 ^-^ 962 ^-^ ; hitchhiersetmap (hsm) 963 ^-^ hsmEmpty hsmInsert hsmInsertMany hsmDelete hsmLookup 964 ^-^ tabSplitLT 965 ^-^