plunder

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

hitch.sire (26863B)


      1 ; Copyright 2023 The Plunder Authors
      2 ; Use of this source code is governed by a BSD-style license that can be
      3 ; found in the LICENSE file.
      4 
      5 #### hitch <- parallel
      6 
      7 ;;;; HitchHiker Trees
      8 ;;;; ================
      9 ;;;;
     10 ;;;; The shared code for hitchhiker trees.
     11 
     12 
     13 ;;; Imports ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     14 
     15 :| sire     ;
     16 :| stew     ;
     17 :| parallel ;
     18 :| mutrec   [{#mutual}]
     19 
     20 
     21 ;;; Util ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     22 
     23 ;; Given an index and a hitchhiker object, {packIndexNode} creates a
     24 ;; packed representation.
     25 ;;
     26 ;; Indexes' subnodes are wrapped in pins so that we don't have to write
     27 ;; parts of trees which haven't changed during snapshotting.
     28 
     29 = (packIndexNode [keys nodes] hh)
     30 | PIN [keys nodes hh]
     31 
     32 > Row a > Maybe (a, Row a)
     33 = (rowUncons r)
     34 | if (null r) NONE
     35 | SOME [(idx 0 r) (drop 1 r)]
     36 
     37 =?= NONE               (rowUncons [])
     38 =?= (SOME [0 [1 2 3]]) (rowUncons [0 1 2 3])
     39 
     40 > Row a > Maybe (Row a, a)
     41 = (rowUnsnoc r)
     42 @ l | len r
     43 | ifz l NONE
     44 @ minusOne | dec l
     45 | SOME [(take minusOne r) (idx minusOne r)]
     46 
     47 =?= NONE               (rowUnsnoc [])
     48 =?= (SOME [[0 1 2] 3]) (rowUnsnoc [0 1 2 3])
     49 
     50 
     51 ;;; TreeFun ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     52 
     53 # record TreeFun
     54 | TREE_FUN
     55 * mkNode       : Any
     56 * mkLeaf       : Any
     57 * caseNode     : Any
     58 * leafInsert   : Any
     59 * leafMerge    : Any
     60 * leafLength   : Any
     61 * leafSplitAt  : Any
     62 * leafFirstKey : Any
     63 * leafEmpty    : Any
     64 * leafDelete   : Any
     65 * hhMerge      : Any
     66 * hhLength     : Any
     67 * hhSplit      : Any
     68 * hhEmpty      : Any
     69 * hhDelete     : Any
     70 
     71 
     72 ;;; Index ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     73 
     74 ;; A {HitchNode} is either [index hitchhikers] or {LEAFNODE}, where a
     75 ;; leaf is a map or a set.
     76 
     77 # data HitchNode
     78 - INDEXNODE idx/Any hh/Any
     79 - LEAFNODE leaf/Any
     80 
     81 abstype#(Index k v) ; TODO: (???)
     82 abstype#(LazyIndex k v) ; TODO: (???)
     83 
     84 ;; An {Index} is a row of [a row of keys, a row of subnodes]
     85 
     86 > Index k v
     87 = emptyIndex [[] []]
     88 
     89 ;; {mergeIndex} merges two indexes with the key to put between the two
     90 ;; of them.
     91 
     92 > Index k v > k > Index k v > Index k v
     93 = (mergeIndex [lKeys lVals] middle [rKeys rVals])
     94 [(cat [lKeys [middle] rKeys]) (weld lVals rVals)]
     95 
     96 !! | eql [[5] [%a %b]]
     97    | mergeIndex [[] [{a}]] 5 [[] [{b}]]
     98 
     99 > v > Index k v
    100 = (singletonIndex val) [[] [val]]
    101 
    102 ;; {fromSingletonIndex} assumes that v is not a Nat.  This holds in the
    103 ;; current implementation.
    104 
    105 > Index k v > Maybe v
    106 = (fromSingletonIndex [_ vals])
    107 | if (eql 1 (len vals)) (get vals 0) 0
    108 
    109 > Index k v > Nat
    110 = (indexKeyLen [keys _])
    111 | len keys
    112 
    113 > Index k v > Nat
    114 = (indexValLen [_ vals])
    115 | len vals
    116 
    117 ;; Given an index, {splitIndexAt} splits it into two so the left index has
    118 ;; {numLeftKeys} and the right contains the rest. Also returns the pivot key.
    119 
    120 > Nat > Index k v > (Index k v, k, Index k v)
    121 = (splitIndexAt numLeftKeys [keys vals])
    122 @ leftKeys             | take numLeftKeys keys
    123 @ middleKeyAndRightKey | drop numLeftKeys keys
    124 @ numPlusOne           | inc numLeftKeys
    125 @ leftVals             | take numPlusOne vals
    126 @ rightVals            | drop numPlusOne vals
    127 | ifz (len middleKeyAndRightKey) (die %splitIndexAtEmpty)
    128 @ middleKey            | get middleKeyAndRightKey 0
    129 @ rightKeys            | drop 1 middleKeyAndRightKey
    130 [[leftKeys leftVals] middleKey [rightKeys rightVals]]
    131 
    132 !! | eql [[[] [[%a]]] %b [[] [[%b]]]]
    133    | splitIndexAt 0 [[{b}] [[{a}] [{b}]]]
    134 
    135 ;; {extendIndex}, when given a pure index with no hitchhikers, creates
    136 ;; a node.
    137 
    138 > TreeFun > Nat > Index k v > Index k v
    139 = (extendIndex treeFun maxIndexKeys idx)
    140 @ TREE_FUN(..) treeFun
    141 @ maxIndexVals | inc maxIndexKeys
    142 ^ _ idx
    143 ? (loop idx)
    144 @ numVals      | **indexValLen idx
    145 | if (lte numVals maxIndexVals)
    146     | **singletonIndex | mkNode idx hhEmpty
    147 | if (lte numVals | mul 2 maxIndexVals)
    148     @ pos | dec | div numVals 2
    149     @ [lIdx middleKey rIdx] | splitIndexAt pos idx
    150     @ !leftNode  | mkNode lIdx hhEmpty
    151     @ !rightNode | mkNode rIdx hhEmpty
    152     [[middleKey] [leftNode rightNode]]
    153 @ [lIdx middleKey rIdx] | splitIndexAt maxIndexVals idx
    154 @ ls | **singletonIndex | mkNode lIdx hhEmpty
    155 | mergeIndex ls middleKey | loop rIdx
    156 
    157 = (valView key [keys vals])
    158 @ [leftKeys rightKeys]       | span a&(lte a key) keys
    159 @ n                          | len leftKeys
    160 @ [leftVals valAndRightVals] | splitAt n vals
    161 | maybeCase | rowUncons valAndRightVals
    162     | die {valView: can't split empty index}
    163 & [val rightVals]
    164 [[leftKeys leftVals rightKeys rightVals] val]
    165 
    166 = (leftView [leftKeys leftVals rightKeys rightVals])
    167 | maybeCase (rowUnsnoc leftVals) NONE
    168 & [leftVals leftVal]
    169 | maybeCase (rowUnsnoc leftKeys) NONE
    170 & [leftKeys leftKey]
    171 @ newCtx  [leftKeys leftVals rightKeys rightVals]
    172 | SOME [newCtx leftVal leftKey]
    173 
    174 = (rightView [leftKeys leftVals rightKeys rightVals])
    175 | maybeCase (rowUncons rightVals) NONE
    176 & [rightVal rightVals]
    177 | maybeCase (rowUncons rightKeys) NONE
    178 & [rightKey rightKeys]
    179 @ newCtx  [leftKeys leftVals rightKeys rightVals]
    180 | SOME [rightKey rightVal newCtx]
    181 
    182 = (putVal [leftKeys leftVals rightKeys rightVals] val)
    183 ++ weld leftKeys rightKeys
    184 ++ cat [leftVals [val] rightVals]
    185 
    186 = (putIdx [leftKeys leftVals rightKeys rightVals] [keys vals])
    187 ++ cat [leftKeys keys rightKeys]
    188 ++ cat [leftVals vals rightVals]
    189 
    190 ;; {findSubnodeByKey} finds the value needed when recursing downwards.
    191 
    192 = (findSubnodeByKey key [keys vals])
    193 | get vals
    194 @ b     | bsearch key keys
    195 @ found | mod b 2
    196 @ idx   | rsh b 1
    197 | add found idx
    198 
    199 
    200 ;;; Leaf ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    201 
    202 > TreeFun > Nat > Index k n
    203 = (splitLeafMany treeFun maxLeafItems items)
    204 @ TREE_FUN(..) treeFun
    205 @ itemLen | leafLength items
    206 ; leaf items don't overflow a single node
    207 | if (lte itemLen maxLeafItems)
    208     | **singletonIndex | mkLeaf items
    209 ; we have to split, but only into two nodes
    210 | if (lte itemLen | mul 2 maxLeafItems)
    211     @ numLeft        | div itemLen 2
    212     @ [lLeaf rLeaf]  | leafSplitAt numLeft items
    213     @ rightFirstItem | leafFirstKey rLeaf
    214     [[rightFirstItem] [(mkLeaf lLeaf) (mkLeaf rLeaf)]]
    215 ; we have to split the node into more than two nodes
    216 @ (fixup [keys vals])
    217   [keys (map mkLeaf vals)]
    218 ^ fixup (_ items NIL NIL)
    219 ? (loop items keys leafs)
    220 @ itemLen | leafLength items
    221 | if (gth itemLen | mul 2 maxLeafItems)
    222     @ [leaf rem]   | leafSplitAt maxLeafItems items
    223     @ key          | leafFirstKey rem
    224     | loop rem (CONS key keys) (CONS leaf leafs)
    225 | if (gth itemLen maxLeafItems)
    226     @ numLeft      | div itemLen 2
    227     @ [left right] | leafSplitAt numLeft items
    228     @ key          | leafFirstKey right
    229     | loop leafEmpty (CONS key keys) (CONS right (CONS left leafs))
    230 | ifz itemLen
    231     [(listToRowRev keys) (listToRowRev leafs)]
    232 | die %leafConstraintViolation
    233 
    234 
    235 ;;; TreeConfig ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    236 
    237 # record TreeConfig
    238 | TREE_CONFIG
    239 * minFanout      : Any
    240 * maxFanout      : Any
    241 * minIdxKeys     : Any
    242 * maxIdxKeys     : Any
    243 * minLeafItems   : Any
    244 * maxLeafItems   : Any
    245 * maxHitchhikers : Any
    246 
    247 ;; {twoThreeConfig} is a Testing TreeConfig which overflows quickly.
    248 
    249 = twoThreeConfig
    250 @ minFanout 2
    251 @ maxFanout | dec | mul 2 minFanout
    252 | TREE_CONFIG
    253 * minFanout
    254 * maxFanout
    255 * dec minFanout
    256 * dec maxFanout
    257 * minFanout
    258 * maxFanout
    259 * minFanout
    260 
    261 ;; {largeConfig} is a config with larger corfficients for more realistic
    262 ;; testing.
    263 
    264 = largeConfig
    265 @ minFanout 64
    266 @ maxFanout | dec | mul 2 minFanout
    267 | TREE_CONFIG
    268 * minFanout
    269 * maxFanout
    270 * dec minFanout
    271 * dec maxFanout
    272 * minFanout
    273 * maxFanout
    274 * minFanout
    275 
    276 
    277 ;;; Tree ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    278 
    279 = (fixup treeConfig treeFun index)
    280 @ TREE_CONFIG(..) treeConfig
    281 @ !newRootNode | fromSingletonIndex index
    282 | ifz newRootNode
    283     @ !index | extendIndex treeFun maxLeafItems index
    284     | fixup treeConfig treeFun index
    285 newRootNode
    286 
    287 # mutual insertRect_distributeDownwards
    288 
    289     = (insertRec treeConfig treeFun toAdd node)
    290       @ TREE_CONFIG(..) treeConfig
    291       @ TREE_FUN(..) treeFun
    292 
    293       # case (caseNode node)
    294 
    295       - INDEXNODE children hitchhikers
    296 
    297           @ !merged | hhMerge hitchhikers toAdd
    298           | if (gth (hhLength merged) maxHitchhikers)
    299 
    300               @ !distrib
    301                   | distributeDownwards treeConfig treeFun merged children
    302 
    303               ; We have reached the maximum number of hitchhikers, we now
    304               ; need to flush these downwards.
    305               | extendIndex treeFun maxLeafItems distrib
    306 
    307           | else
    308 
    309               ; All we must do is rebuild the node with the new k/v pair
    310               ; added on as a hitchhiker to this node.
    311               | **singletonIndex | mkNode children merged
    312 
    313       - LEAFNODE items
    314           @ !inserted (leafInsert items toAdd)
    315           | splitLeafMany treeFun maxLeafItems inserted
    316 
    317     = (distributeDownwards treeConfig treeFun hitchhikers index)
    318 
    319         @ TREE_FUN(..) treeFun
    320         | ifz (hhLength hitchhikers) index
    321         @ [keys vals] index
    322         @ keyList   | listFromRow keys
    323         @ splitHH   | downSplit treeFun keyList hitchhikers
    324         @ indexList | parList
    325                     | listMap (downPush treeConfig treeFun)
    326                     | listZip splitHH
    327                     | listFromRow vals
    328         @ [!lkeys !lvals] | joinIndex keyList indexList
    329         [(listToRow lkeys) (cat | listToRow lvals)]
    330 
    331     ; implementation details of insertRec/distributeDownwards
    332     = (downSplit treeFun l hh)
    333       : key keys < listCase l ~[hh]
    334       @ TREE_FUN(..) treeFun
    335       @ [!toAdd !rest] | hhSplit key hh
    336       | CONS toAdd | downSplit treeFun keys rest
    337 
    338     = (downPush treeConfig treeFun [hh node])
    339       @ TREE_FUN(..) treeFun
    340       | ifz (hhLength hh)
    341         | **singletonIndex node
    342       | insertRec treeConfig treeFun hh node
    343 
    344     = (joinIndex kl il)
    345       # case kl
    346       - NIL # case il
    347             - NIL                | (NIL, NIL)
    348             - CONS [keys vals] _ | (listFromRow keys, ~[vals])
    349       - CONS k ks
    350           # case il
    351           - NIL (die {missing index in joinIndex})
    352           - CONS [keys vals] ts
    353               @ [keyrest valrest] | joinIndex ks ts
    354               @ !kout | listWeld (listFromRow keys) (CONS k keyrest)
    355               @ !vout | CONS vals valrest
    356               [kout vout]
    357 
    358 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    359 
    360 = (splitHitchhikersByKeys treeFun keys hh)
    361 @ TREE_FUN(..) treeFun
    362 @ l | len keys
    363 ^ unfoldr _ [0 hh]
    364 & [i hh]
    365 | if (eql i l) | SOME [hh [(inc i) hhEmpty]]
    366 | if (gth i l) | NONE
    367 @ [!cur !rest] | hhSplit (idx i keys) hh
    368 | SOME [cur [(inc i) rest]]
    369 
    370 ;; {getLeafNode} Forces a flush of all hitchhikers down to the leaf levels
    371 ;; and return the resulting leaf vectors.
    372 
    373 = (getLeafRow treeFun node)
    374 @ TREE_FUN(..) treeFun
    375 ^ _ hhEmpty node
    376 ? (go_openTreeFun hh node)
    377 # case (caseNode node)
    378 - LEAFNODE leaves
    379   @ !item (leafInsert leaves hh)
    380   [item]
    381 - INDEXNODE [keys vals] hitchhikers
    382   @ !merged (hhMerge hitchhikers hh)
    383   @ splitHH | splitHitchhikersByKeys treeFun keys merged
    384   | cat
    385   | map [hh node]&(go_openTreeFun hh node) | zip splitHH vals
    386 
    387 = (nodeNeedsMerge config treeFun node)
    388 @ TREE_CONFIG(..) config
    389 @ TREE_FUN(..) treeFun
    390 # case (caseNode node)
    391 - INDEXNODE index hitchhikers
    392   | lth (indexKeyLen index) minIdxKeys
    393 - LEAFNODE leaves
    394   | lth (leafLength leaves) minLeafItems
    395 
    396 = (mergeNodes config treeFun left middleKey right)
    397 @ TREE_CONFIG(..) config
    398 @ TREE_FUN(..) treeFun
    399 ; TODO I wish I could collapse this into a single case like in Haskell.
    400 # case (caseNode left)
    401 - INDEXNODE leftIdx leftHH
    402   # case (caseNode right)
    403   - INDEXNODE rightIdx rightHH
    404     @ !left   | distributeDownwards config treeFun leftHH leftIdx
    405     @ !right  | distributeDownwards config treeFun rightHH rightIdx
    406     @ !merged | mergeIndex left middleKey right
    407     | extendIndex treeFun maxIdxKeys merged
    408   - LEAFNODE _
    409     | die %nodeMismatch
    410 - LEAFNODE leftLeaf
    411   # case (caseNode right)
    412   - LEAFNODE rightLeaf
    413     @ !merged | leafMerge leftLeaf rightLeaf
    414     | splitLeafMany treeFun maxLeafItems merged
    415   - INDEXNODE _ _
    416     | die %nodeMismatch
    417 
    418 ;; {maybeCaseBack} is just backwards {maybeCase}.  This is easier in
    419 ;; some usage patterns.
    420 
    421 (**maybeCaseBack mb som non)=(maybeCase mb non som)
    422 
    423 = (deleteRec config treeFun key mybV node)
    424 @ TREE_CONFIG(..) config
    425 @ TREE_FUN(..) treeFun
    426 # case (caseNode node)
    427 - LEAFNODE leaves
    428   | mkLeaf | leafDelete key mybV leaves
    429 - INDEXNODE index hitchhikers
    430   @ [ctx child]     | valView key index
    431   @ newChild        | deleteRec config treeFun key mybV child
    432   @ childNeedsMerge | nodeNeedsMerge config treeFun newChild
    433   @ prunedHH        | hhDelete key mybV hitchhikers
    434   | if | not childNeedsMerge
    435     | mkNode (putVal ctx newChild) prunedHH
    436   | maybeCaseBack | rightView ctx
    437     & [rKey rChild rCtx]
    438     | mkNode
    439         | putIdx rCtx | mergeNodes config treeFun newChild rKey rChild
    440     prunedHH
    441   | maybeCaseBack | leftView ctx
    442     & [lCtx lChild lKey]
    443     | mkNode
    444         | putIdx lCtx | mergeNodes config treeFun lChild lKey newChild
    445     prunedHH
    446   | die {deleteRec: node with single child}
    447 
    448 
    449 ;;; HitchhikerMap ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    450 
    451 abstype#(HSet a)
    452 abstype#(HMap k v)
    453 abstype#(HSetMap k v)
    454 
    455 (hhDeleteKey k _ t)=(tabDel k t)
    456 
    457 = (hmEmpty config) | [config 0]
    458 
    459 = (hmCaseNode pinnedNode)
    460 @ !node (pinItem pinnedNode)
    461 | if | isTab node
    462     | LEAFNODE node
    463 @ [keys nodes hh] node
    464 | INDEXNODE [keys nodes] hh
    465 
    466 = (hmSingleton config k v)
    467 @ node | PIN | tabSing k v
    468 [config node]
    469 
    470 (tabUnionRightBiased x y)=(tabUnion y x)
    471 
    472 = hhMapTF
    473 ++ packIndexNode                 ; mkNode
    474 ++ PIN                           ; mkLeaf
    475 ++ hmCaseNode                    ; caseNode
    476 ++ tabUnionRightBiased           ; leafInsert
    477 ++ tabUnionRightBiased           ; leafMerge
    478 ++ tabLen                        ; leafLength
    479 ++ tabSplitAt                    ; leafSplitAt
    480 ++ tabMinKey                     ; leafFirstKey
    481 ++ #[]                           ; leafEmpty
    482 ++ hhDeleteKey                   ; leafDelete
    483 ++ tabUnionRightBiased           ; hhMerge
    484 ++ tabLen                        ; hhLength
    485 ++ tabSplitLT                    ; hhSplit
    486 ++ #[]                           ; hhEmpty
    487 ++ hhDeleteKey                   ; hhDelete
    488 
    489 = (hmSize [config r])
    490 | ifz r 0
    491 | sumOf tabLen
    492 | getLeafRow hhMapTF r
    493 
    494 ;; TODO: {hmKeys} returns a set in the empty case, and a row-of-rows
    495 ;; otherwise.  Is that wrong?
    496 
    497 = (hmKeys [config top])
    498 | ifz top %[]
    499 | map tabKeysRow
    500 | getLeafRow hhMapTF top
    501 
    502 = (hmInsert k v [config top])
    503 @ p | tabSing k v
    504 | ifz top | hmSingleton config k v
    505 @ !index | insertRec config hhMapTF p top
    506 @ !fixed | fixup config hhMapTF index
    507 ++ config
    508 ++ fixed
    509 
    510 = (hmInsertMany tab [config top])
    511 | if | tabIsEmpty tab
    512   [config top]
    513 @ TREE_CONFIG(..) config
    514 @ !index
    515   | ifz top
    516       | splitLeafMany hhMapTF maxLeafItems tab
    517   | insertRec config hhMapTF tab top
    518 @ !fixed | fixup config hhMapTF index
    519 ++ config
    520 ++ fixed
    521 
    522 ;; TODO: case guard statements would make this much more readable.
    523 
    524 = (hmDelete k [config r])
    525 | ifz r [config r]
    526 @ newRootNode | deleteRec config hhMapTF k NONE r
    527 # case (hmCaseNode newRootNode)
    528 - LEAFNODE leaves
    529   ++ config
    530   ++ if (tabIsEmpty leaves) NONE (SOME newRootNode)
    531 - INDEXNODE index hitchhikers
    532   @ childNode | fromSingletonIndex index
    533   | ifz childNode
    534       [config newRootNode]
    535   @ base [config childNode]
    536   | if (tabIsEmpty hitchhikers) base
    537   | hmInsertMany hitchhikers base
    538 
    539 = (hmLookup key [config r])
    540 | ifz r NONE
    541 ^ _ r
    542 ? (lookInNode node)
    543 # case (hmCaseNode node)
    544 - INDEXNODE index hitchhikers
    545   : v < maybeCase (tabLookup key hitchhikers)
    546              (lookInNode | findSubnodeByKey key index)
    547   | (SOME v)
    548 - LEAFNODE items
    549   | tabLookup key items
    550 
    551 
    552 ;;; HitchhikerSet ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    553 
    554 = (hsDeleteItem k _ c)   | setDel k c
    555 = (hsEmpty config)       | [config 0]
    556 = (hsNull [config r])    | isZero r
    557 = (hsRawNode [config r]) | r
    558 
    559 = (hsCaseNode pinnedNode)
    560 @ !node (pinItem pinnedNode)
    561 | if | isSet node
    562   | LEAFNODE node
    563 @ [keys nodes hh] node
    564 | INDEXNODE [keys nodes] hh
    565 
    566 = (hsRawSingleton v)
    567 | PIN (setSing v)
    568 
    569 (hsSingleton config v)=[config (hsRawSingleton v)]
    570 
    571 = hhSetTF
    572 ++ packIndexNode                 ; mkNode
    573 ++ PIN                           ; mkLeaf
    574 ++ hsCaseNode                    ; caseNode
    575 ++ setUnion                      ; leafInsert
    576 ++ setUnion                      ; leafMerge
    577 ++ setLen                        ; leafLength
    578 ++ setSplitAt                    ; leafSplitAt
    579 ++ setMin                        ; leafFirstKey
    580 ++ %[]                           ; leafEmpty
    581 ++ hsDeleteItem                  ; leafDelete
    582 ++ setUnion                      ; hhMerge
    583 ++ setLen                        ; hhLength
    584 ++ setSplitLT                    ; hhSplit
    585 ++ %[]                           ; hhEmpty
    586 ++ hsDeleteItem                  ; hhDelete
    587 
    588 = (hsRawInsert i config r)
    589 @ is     | setSing i
    590 | ifz r  | hsRawSingleton i
    591 @ !index | insertRec config hhSetTF is r
    592 @ !fixed | fixup config hhSetTF index
    593 | fixed
    594 
    595 (hsInsert i [config r])=[config (hsRawInsert i config r)]
    596 
    597 = (hsRawInsertMany set config r)
    598 | if (setIsEmpty set) r
    599 @ TREE_CONFIG(..) config
    600 @ !index
    601   | ifz r | splitLeafMany hhSetTF maxLeafItems set
    602   | insertRec config hhSetTF set r
    603 | fixup config hhSetTF index
    604 
    605 (hsInsertMany set [config r])=[config (hsRawInsertMany set config r)]
    606 
    607 = (hsRawFromSet config c)
    608 | if (setIsEmpty c) NONE
    609 | hsRawInsertMany c config NONE
    610 
    611 (hsFromSet config c)=[config (hsRawFromSet config c)]
    612 
    613 = (hsToSet [config r])
    614 | ifz r %[]
    615 | setCatRowAsc
    616 | getLeafRow hhSetTF r
    617 
    618 = (hsMember key [config r])
    619 | ifz r FALSE
    620 ^ _ r
    621 ? (lookInNode node)
    622 # case (hsCaseNode node)
    623 - INDEXNODE index hitchhikers
    624   | if (setHas key hitchhikers) TRUE
    625   | lookInNode | findSubnodeByKey key index
    626 - LEAFNODE items
    627   | setHas key items
    628 
    629 = (hsRawDelete key config r)
    630 | ifz r r
    631 @ newRootNode | deleteRec config hhSetTF key NONE r
    632 # case (hsCaseNode newRootNode)
    633 - LEAFNODE leaves
    634   | if (setIsEmpty leaves) 0 newRootNode
    635 - INDEXNODE index hitchhikers
    636   @ childNode | fromSingletonIndex index
    637   | ifz childNode newRootNode
    638   | if (setIsEmpty hitchhikers) childNode
    639   | hsRawInsertMany hitchhikers config childNode
    640 
    641 = (hsDelete key [config r])
    642 @ x | hsRawDelete key config r
    643 [config x]
    644 
    645 ;; TODO: Like with intersection, a real implementation of union needs
    646 ;; to be built instead of just the simplistic {hsRawUnion}.
    647 ;;
    648 ;; What would a faster union look like? This implementation goes through all
    649 ;; leaf sets and then iteratively unions them all together on each side and then
    650 ;; unions that. Then it takes that megaset and splits it back up. That seems
    651 ;; hella inefficient.
    652 ;;
    653 ;; Some ideas:
    654 ;;
    655 ;; - When one side is small, treat it as an insertRec into the other.
    656 ;;
    657 ;; - Operate on a list of set pieces. unlike intersection, you
    658 ;;   can't prevent touching the pins for every leaf so maintain using
    659 ;;   getLeafRow. But make the internal union operation work on
    660 ;;   {[Set k] -> [Set k] -> [Set k]} and then {consolidate} on that
    661 ;;   structure. That at minimum removes the megaunion at the end and
    662 ;;   then breaking that back up into smaller leaves. (It's probably
    663 ;;   faster everywhere else, but I'd have to double check...)
    664 
    665 = (hsRawUnion aconfig ar br)
    666 | ifz ar br
    667 | ifz br ar
    668 @ as | setCatRowAsc | getLeafRow hhSetTF ar
    669 @ bs | setCatRowAsc | getLeafRow hhSetTF br
    670 | hsRawFromSet aconfig
    671 | setUnion as bs
    672 
    673 = (hsUnion as bs)
    674 @ [aconfig ar] as
    675 @ [_ br] bs
    676 [aconfig (hsRawUnion aconfig ar br)]
    677 
    678 =?= 1 | hsNull | hsEmpty twoThreeConfig
    679 =?= 0 | hsNull | hsSingleton twoThreeConfig 9
    680 =?= 0 | hsNull | hsInsert 8 | hsSingleton twoThreeConfig 9
    681 =?= 0 | hsNull | hsInsert 9 | hsSingleton twoThreeConfig 9
    682 =?= 1 | hsNull | hsDelete 9 | hsSingleton twoThreeConfig 9
    683 
    684 
    685 ;;; Hitchhiker Set New Intersect ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    686 
    687 ;; {getLeafList} is like getLeafRow, but returns a list instead.
    688 
    689 ;> TreeFun > Index k v > List (Set k)
    690 = (getLeafList treeFun node)
    691 @ TREE_FUN(..) treeFun
    692 ^ _ hhEmpty node
    693 ? (go_openTreeFun hh node)
    694 # case (caseNode node)
    695 - LEAFNODE leaves
    696   @ !item (leafInsert leaves hh)
    697   | CONS item NIL
    698 - INDEXNODE [keys vals] hitchhikers
    699   @ !merged (hhMerge hitchhikers hh)
    700   @ splitHH | splitHitchhikersByKeys treeFun keys merged
    701   | listCat
    702   | listMap [hh node]&(go_openTreeFun hh node)
    703   | listFromRow
    704   | zip splitHH vals
    705 
    706 > List (Set k) > List (Set k) > List (Set k)
    707 = (setlistIntersect ao bo)
    708 | listCase ao NIL
    709 & (a as)
    710 | listCase bo NIL
    711 & (b bs)
    712 @ amin | setMin a
    713 @ amax | setMax a
    714 @ bmin | setMin b
    715 @ bmax | setMax b
    716 @ overlap | and lte-amin-bmax lte-bmin-amax
    717 @ int | setIntersect a b
    718 @ rest
    719   | switch (cmp amax bmax) 0
    720   ++ setlistIntersect as bo
    721   ++ setlistIntersect as bs
    722   ++ setlistIntersect ao bs
    723 | if | and overlap (not | setIsEmpty int)
    724   | CONS int rest
    725 rest
    726 
    727 =?= ~[]
    728     | setlistIntersect
    729     * ~[%[1 2 3] %[4 5 6]]
    730     * ~[%[7 8 9] %[10 11 12]]
    731 
    732 =?= ~[%[6] %[7]]
    733     | setlistIntersect
    734     * ~[%[4 5 6] %[7 8 9]]
    735     * ~[%[6 7]]
    736 
    737 =?= ~[%[2] %[4] %[9]]
    738     | setlistIntersect
    739     * ~[%[2] %[3] %[4 6] %[9]]
    740     * ~[%[2] %[4 5] %[7 8 9]]
    741 
    742 > Row (HSet k) > List (Set k)
    743 = (hsMultiIntersect setRow)
    744 | ifz (len setRow) NIL
    745 | if (eql 1 | len setRow)
    746     @ [_ node] | idx 0 setRow
    747     | getLeafList hhSetTF node
    748 ;
    749 @ mybNodes | map hsRawNode setRow
    750 | if (any isZero mybNodes)
    751     NIL
    752 ;
    753 @ setNodes
    754     | map (getLeafList hhSetTF) mybNodes
    755 ;
    756 ; manual foldl1 like
    757 ^ _ (idx 0 setNodes) 1 (dec (len setNodes))
    758 ? (go acc i rem)
    759 | ifNot rem acc
    760 @ rem | dec rem
    761 @ acc | setlistIntersect acc (idx i setNodes)
    762 | seq acc
    763 | go acc (inc i) rem
    764 
    765 > Nat > List (Set a) > List (Set a)
    766 = (lsDrop num sets)
    767 | ifz num sets
    768 | listCase sets NIL
    769 & (x xs)
    770 @ xl | setLen x
    771 | if | gte num xl
    772   | lsDrop (sub num xl) xs
    773 | CONS
    774 * setDrop num x
    775 * xs
    776 
    777 =?= ~[%[5]] | lsDrop 4 ~[%[1 2 3] %[4 5]]
    778 
    779 > Nat > List (Set k) > List (Set k)
    780 = (lsTake num sets)
    781 | ifz num NIL
    782 | listCase sets NIL
    783 & (x xs)
    784 @ xl | setLen x
    785 | if | lth num xl  ; num < xl
    786     | CONS (setTake num x) NIL
    787 | CONS
    788 * x
    789 * lsTake (sub num xl) xs
    790 
    791 =?= ~[%[1 2 3]]      | lsTake 3 ~[%[1 2 3] %[4 5]]
    792 =?= ~[%[1 2 3] %[4]] | lsTake 4 ~[%[1 2 3] %[4 5]]
    793 
    794 > List (Set k) > Nat
    795 = (lsLen sets)
    796 | listFoldl (i s)&(add i | setLen s) 0 sets
    797 
    798 =?= 2
    799     | lsLen (CONS %[4] (CONS %[5] NIL))
    800 
    801 > List (Set k) > List k
    802 = (lsToList ls)
    803 | listCat
    804 | listMap setToList ls
    805 
    806 
    807 ;;; HitchhikerSetMap ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    808 
    809 = (hsmEmpty mapConfig setConfig)
    810 [mapConfig setConfig 0]
    811 
    812 = (hsmCaseNode pinnedNode)
    813 @ !node (pinItem pinnedNode)
    814 | if | isTab node
    815   | LEAFNODE node
    816 @ [keys nodes hh] node
    817 | INDEXNODE [keys nodes] hh
    818 
    819 ;; In a {SetMap}, hitchhikers are a normal map of sets, but leaves are
    820 ;; maps of raw hitchhikersets.
    821 
    822 > TreeConfig > Tab k (HSet Nat) > Tab k (Set a) > Tab k (HSet Nat)
    823 = (hsmLeafInsertImpl setConfig leaf hh)
    824 @ (alt new m)
    825   | SOME
    826   | maybeCase m | hsRawFromSet setConfig new
    827   & old
    828   | hsRawInsertMany new setConfig old
    829 @ (merge items k vset)
    830   | tabAlter (alt vset) k items
    831 | tabFoldlWithKey merge leaf hh
    832 
    833 = (hsmLeafDeleteImpl setConfig k mybV hsm)
    834 | maybeCase mybV (die %cantDeleteNoValue)
    835 & v
    836 @ (update in)
    837   | maybeCase in NONE
    838   & set
    839   | SOME | hsRawDelete v setConfig set
    840 | tabAlter update k hsm
    841 
    842 = (hsmHHDeleteImpl k mybV sm)
    843 | maybeCase mybV (die %cantDeleteNoValue)
    844 & v
    845 @ (update in)
    846   | maybeCase in NONE
    847   & set
    848   | SOME | setDel v set
    849 | tabAlter update k sm
    850 
    851 ;; Since the point of hitchhikers is that they'll fit in an index node's
    852 ;; grain, {hhSetMapLength} counts the number of items across all sets
    853 ;; in the hitchhiker, not the number of tab entries.
    854 
    855 (hhSetMapLength a)=(sumOf setLen | tabValsRow a)
    856 
    857 = (hhSetMapTF setConfig)
    858 ++ packIndexNode                       ; mkNode
    859 ++ PIN                                 ; mkLeaf
    860 ++ hsmCaseNode                         ; caseNode
    861 ++ hsmLeafInsertImpl setConfig         ; leafInsert
    862 ++ tabUnionWith (hsRawUnion setConfig) ; leafMerge
    863 ++ tabLen                              ; leafLength
    864 ++ tabSplitAt                          ; leafSplitAt
    865 ++ tabMinKey                           ; leafFirstKey
    866 ++ #[]                                 ; leafEmpty
    867 ++ hsmLeafDeleteImpl setConfig         ; leafDelete
    868 ++ tabUnionWith setUnion               ; hhMerge
    869 ++ hhSetMapLength                      ; hhLength
    870 ++ tabSplitLT                          ; hhSplit
    871 ++ #[]                                 ; hhEmpty
    872 ++ hsmHHDeleteImpl                     ; hhDelete
    873 
    874 = (hsmInsert k v [mapConfig setConfig r])
    875 | ifz r
    876     @ raw  | hsRawSingleton v
    877     @ leaf | tabSing k raw
    878     ++ mapConfig
    879     ++ setConfig
    880     ++ PIN leaf
    881 @ tf | hhSetMapTF setConfig
    882 @ hh | tabSing k (setSing v)
    883 @ !index | insertRec mapConfig tf hh r
    884 @ !fixed | fixup mapConfig tf index
    885 ++ mapConfig
    886 ++ setConfig
    887 ++ fixed
    888 
    889 = (hsmInsertMany tabset [mapConfig setConfig r])
    890 | if | tabIsEmpty tabset
    891   [mapConfig setConfig r]
    892 @ tf | hhSetMapTF setConfig
    893 @ !index
    894   | ifz r
    895       @ TREE_CONFIG(..) mapConfig
    896       | splitLeafMany tf maxLeafItems
    897       | tabMapWithKey (k v)&(hsRawFromSet setConfig v) tabset
    898   | insertRec mapConfig tf tabset r
    899 @ !fixed | fixup mapConfig tf index
    900 ++ mapConfig
    901 ++ setConfig
    902 ++ fixed
    903 
    904 = (hsmDelete k v [mapConfig setConfig r])
    905 | ifz r
    906     [mapConfig setConfig r]
    907 @ newRootNode | deleteRec mapConfig (hhSetMapTF setConfig) k (SOME v) r
    908 # case (hsmCaseNode newRootNode)
    909 - LEAFNODE leaves
    910   ++ mapConfig
    911   ++ setConfig
    912   ++ if (tabIsEmpty leaves) 0 newRootNode
    913 - INDEXNODE index hitchhikers
    914   @ childNode | fromSingletonIndex index
    915   | ifz childNode
    916       [mapConfig setConfig newRootNode]
    917   @ base [mapConfig setConfig childNode]
    918   | if (tabIsEmpty hitchhikers) base
    919   | hsmInsertMany hitchhikers base
    920 
    921 ;; TODO: {hsmLookup} does a bunch of work each time which gets thrown
    922 ;; away. We are likely to lookup the same items multiple times in a row,
    923 ;; so a stateful, modifying hsmFlushAndLookup which modified itself so
    924 ;; hitchhikers for that one
    925 
    926 = (hsmLookup k [mapConfig setConfig r])
    927 @ TREE_CONFIG(..) mapConfig
    928 | ifz r
    929     | hsEmpty setConfig
    930 ^ _ %[] r
    931 ? (lookInNode !hh node)
    932 # case (hsmCaseNode node)
    933 - INDEXNODE children hitchhikers
    934   @ matched | fromSome %[] (tabLookup k hitchhikers)
    935   | lookInNode (setUnion hh matched) | findSubnodeByKey k children
    936 - LEAFNODE items
    937   : ret < **maybeCase (tabLookup k items)
    938              [setConfig (hsRawFromSet setConfig hh)]
    939   [setConfig (hsRawInsertMany hh setConfig ret)]
    940 
    941 
    942 ;;; Exports ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    943 
    944 ^-^
    945 ^-^ HSet HMap HSetMap
    946 ^-^ TreeConfig TREE_CONFIG
    947 ^-^
    948 ^-^ ; shared configuration
    949 ^-^ twoThreeConfig largeConfig
    950 ^-^
    951 ^-^ ; hitchhikermap (hm)
    952 ^-^ hmEmpty hmSingleton hmSize hmKeys hmInsert hmInsertMany hmDelete hmLookup
    953 ^-^
    954 ^-^ ; hitchhikerset (hs)
    955 ^-^ hsEmpty hsNull hsSingleton hsInsert hsInsertMany hsDelete hsToSet hsFromSet
    956 ^-^ hsMember hsUnion
    957 ^-^ hsMultiIntersect
    958 ^-^
    959 ^-^ ; listset (ls)
    960 ^-^ lsDrop lsTake lsLen lsToList
    961 ^-^
    962 ^-^ ; hitchhiersetmap (hsm)
    963 ^-^ hsmEmpty hsmInsert hsmInsertMany hsmDelete hsmLookup
    964 ^-^ tabSplitLT
    965 ^-^