demo_full_tag_site.sire (16294B)
1 ; Copyright 2023 The Plunder Authors 2 ; Use of this source code is governed by a BSD-style license that can be 3 ; found in the LICENSE file. 4 5 #### demo_full_tag_site <- prelude 6 7 :| sire 8 :| kern 9 :| hitch 10 :| json 11 :| stew 12 13 14 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 15 16 ; Design: 17 ; 18 ; Has one big HTTP server which just receives posts and gets. All POSTs perform 19 ; plunder lookups. All GETs perform lookups in a cache of file for PUT 20 ; 21 ; - POST /usecdn - sets demo to use CDN urls in the json 22 ; - POST /uselocal - sets demo to use urls we serve 23 ; 24 ; - POST /learn 'content-type: application/json' [json file] 25 ; 26 ; - POST /count -> application/json back. 27 ; 28 ; - POST /search 'tags@[x y z]' -> application/json back. 29 ; 30 ; - GET / -> frontpage 31 32 > TreeConfig 33 = imgmapConfig 34 @ minFanout 128 35 @ maxFanout | dec | mul 2 minFanout 36 | TREE_CONFIG 37 * minFanout 38 * maxFanout 39 * dec minFanout 40 * dec maxFanout 41 * minFanout 42 * maxFanout 43 * minFanout 44 45 46 ; config for just the set part of the id setmap, optimized for nat -> bigset of 47 ; nats. 48 = idsetConfig 49 @ minItems 1024 50 @ maxItems | dec | mul 2 minItems 51 @ minFanout 128 52 @ maxFanout | dec | mul 2 minFanout 53 | TREE_CONFIG 54 * minFanout 55 * maxFanout 56 * dec minFanout 57 * dec maxFanout 58 * minItems 59 * maxItems 60 * maxItems 61 62 ; derpibooru's favicon 63 > Pin Bar 64 = favicon 65 | PIN 66 | barCat 67 , x#{0000010001001010000000000000680400001600000028000000100000002000} 68 x#{000001002000000000000004000000000000000000000000000000000000ffff} 69 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00ffff} 70 x#{ff00edd673ffedd673ffeed97ee6ffffff00ffffff00ffffff0000000000ffff} 71 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00fefdfb05edd6} 72 x#{73ffedd673fffbf8e9ffedd673ffedd673ffffffff00ffffff0000000000ffff} 73 x#{ff00ffffff00ffffff00ddc194ffffffff00ffffff00edd673feedd673ffedd6} 74 x#{73ffedd673ffedd673ffedd673ffedd673fff9f2d445ffffff0000000000ffff} 75 x#{ff00ffffff00ffffff00ddc194ffddc194ffedd673ffedd673fffbf8e9ffedd6} 76 x#{73ffedd673ffedd673ffedd673ffedd673ffedd673ffffffff0000000000fdfb} 77 x#{f213edd673ffedd673ffedd673ffddc194ffedd673ffedd673ffedd673ffedd6} 78 x#{73ffedd673ffedd673fffbf8e9ffedd673ffedd673ffffffff0000000000ffff} 79 x#{ff00ffffff00edd673ffedd673ffddc194ffddc194ffedd673ffedd673ffedd6} 80 x#{73ffedd673ffedd673ffedd673ffedd673ffedd673ffefda82dd00000000ffff} 81 x#{ff00ffffff00ffffff00ffffff00fefefe00ddc194fffefefd02ffffff00ffff} 82 x#{ff00ffffff00fdfaef18edd673ffedd673ffedd673ffedd673ff00000000ffff} 83 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ddc194ffffffff00ffff} 84 x#{ff00ffffff00ffffff00ffffff00edd673fffbf8e9ffedd673ff00000000ffff} 85 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ddc194fff1e5d360ffff} 86 x#{ff00ffffff00ffffff00ffffff00edd673ffedd673ffedd673ff00000000ffff} 87 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00ddc194fcffff} 88 x#{ff00ffffff00ffffff00ffffff00ffffff00edd673ffedd673ff00000000ffff} 89 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00be85} 90 x#{23fff9efdf7cffffff00ffffff00ffffff00edd673ffedd673ff00000000ffff} 91 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00fdf9f428be85} 92 x#{23fff3e1c1fffdfaf61effffff00ffffff00f0dd8cffedd673ff00000000ffff} 93 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00f4e2c3ffddc1} 94 x#{94ffddc194ffe1c79cfff4e2c3fff7ebd6a2edd673ffefda83dc00000000ffff} 95 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00f4e2} 96 x#{c3ffddc194fff4e2c3ffffffff00ffffff00edd673ffffffff0000000000ffff} 97 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00f4e2} 98 x#{c3ffffffff00f3e1c2ffffffff00ffffff00edd673ffffffff0000000000ffff} 99 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff00ffff} 100 x#{ff00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff0000000000ff8f} 101 x#{0000ff070000ec070000e003000080030000c0010000fbe10000fdf10000fdf1} 102 x#{0000fef90000ff790000ff390000fe010000ff1b0000ff5b0000ffff0000} 103 104 105 # typedef ContentType Bar ; TODO 106 107 > HMap Str (ContentType, Pin Bar) 108 = startingFiles 109 ; TODO: Figure out tuned config for files. 110 | hmSingleton 111 * largeConfig 112 * {/favicon.ico} 113 * [b#{image/x-icon} favicon] 114 115 #* # typedef Lock | MVar () 116 #* # typedef Url | Bar 117 #* # typedef ImgId | Nat 118 119 # record ImgRow 120 | IMG_ROW 121 * imgId : ImgId 122 * tags : Row Str 123 * url : Url 124 125 typedef#Tag#Str 126 127 # record CogState 128 | COG_STATE 129 * files : (HMap Str (ContentType, Pin Bar)) 130 * fileBytes : Nat 131 * local : Bit 132 * imgCount : Nat 133 * pairCount : Nat 134 * imgs : (HMap ImgId ImgRow) 135 * tags : (HSetMap Tag Image) 136 * servThread : ThreadId 137 * learnLock : Lock 138 139 > ThreadId > Lock > CogState 140 = (newState servThread learnLock) 141 | COG_STATE 142 * startingFiles 143 * 0 144 * FALSE 145 * 0 146 * 0 147 * hmEmpty imgmapConfig 148 * hsmEmpty largeConfig idsetConfig 149 * servThread 150 * learnLock 151 152 (mkBarPath path)=(barFlat (b#{/}, listIntersperse b#{/} (listFromRow path))) 153 154 =?= (mkBarPath [b#{a} b#{b}]) 155 b#{/a/b} 156 157 = (getFilename path) 158 # case (barElemIndexEnd {/} path) 159 - NONE | {weird filename} path 160 - SOME lastIdx | barDrop (inc lastIdx) path 161 162 =?= (getFilename b#{http://blah/blah/two.html}) 163 b#{two.html} 164 165 ; Set of images from derpibooru where there was a proper non-deleted API 166 ; response, but the thumbnail file referenced was non-existent. Handle these as 167 ; exceptions. 168 > Set ImgId 169 = missingImgs 170 %% 79135 918505 3031483 3030628 3028888 3031523 1078315 3017754 500313 171 %% 2899504 2460697 2944469 911039 1078318 543877 3032025 2999533 2662803 172 %% 2863835 2916074 2929953 3023899 891477 2918755 2971526 2460696 2824778 173 %% 3032519 2915600 918535 1772525 2905776 2871055 2983147 2937212 2825423 174 %% 3019449 3032217 2836375 2668954 2938918 2994310 89769 2922373 3020597 175 %% 2943107 176 177 178 ;;; JSON Handling ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 179 180 = (unsafeGetJsonMap json) 181 # case json 182 - JMAP m | m 183 - _ | %notAMap json 184 185 = (unsafeGetJsonNumber json) 186 # case json 187 - JNUM n | n 188 - _ | {unsafeGetJsonNumber: not a number} json 189 190 = (unsafeTabLookup key t) 191 # case (tabLookup key t) 192 - SOME v | v 193 - NONE | {unsafeTabLookup: missing key} key 194 195 = (unsafeGetJsonArray json) 196 # case json 197 - JVEC v | v 198 - _ | {unsafeGetJsonArray: not an array} json 199 200 = (unsafeGetJsonStr json) 201 # case json 202 - JSTR s | s 203 - _ | {unsafeGetJsonStr: not a string} json 204 205 = (jsonImgToRow jsonBS m) 206 @ id | unsafeGetJsonNumber | unsafeTabLookup %id m 207 | if (setHas id missingImgs) NONE 208 @ !rawTags 209 | strictRow 210 | map unsafeGetJsonStr 211 | unsafeGetJsonArray 212 | unsafeTabLookup %tags m 213 @ !thumbnailURL 214 | unsafeGetJsonStr 215 | unsafeTabLookup %thumb 216 | unsafeGetJsonMap 217 | unsafeTabLookup %representations m 218 | SOME [id rawTags thumbnailURL] 219 220 = (imgRowToTagTabSet [id tags _]) 221 | tabFromPairs 222 | map tag&(barNat tag, setSing id) tags 223 224 = (imgRowToPair imgRow@[id _ _]) 225 [id imgRow] 226 227 # record LearnData 228 | LEARN_DATA 229 * addRows : Any 230 * addImages : Tab ImgId ImgRow 231 * addTags : Any 232 233 > Bar > LearnData 234 = (parseLearn (PIN jsonBS)) 235 @ [!json _] 236 | trk %parse 237 | parseJson jsonBS 238 @ !rows 239 | catMaybes 240 | map m&(jsonImgToRow jsonBS | unsafeGetJsonMap m) 241 | unsafeGetJsonArray 242 | unsafeTabLookup %images 243 | unsafeGetJsonMap json 244 @ !addTags 245 | trk %addTags 246 | foldl (tabUnionWith setUnion) #[] 247 | map imgRowToTagTabSet rows 248 @ !addImgs 249 | tabFromPairs 250 | map imgRowToPair rows 251 | LEARN_DATA rows addImgs addTags 252 253 abstype#Image ;; TODO: fix. What is the real type? 254 255 > Pin CogState 256 > Pin LearnData 257 > (HMap ImgId ImgRow, HSetMap Tag Image, Nat, Nat) 258 = (calcLearnSt (PIN st) (PIN [rows addImgs addTags])) 259 | trk [%calcLearnSt (len rows) %rows] 260 @ COG_STATE(..) st 261 @ !nuImgs | hmInsertMany addImgs imgs 262 @ !nuTags | hsmInsertMany addTags tags 263 @ imgCount | add imgCount | len rows 264 @ pairCount | add pairCount | tabLen addTags 265 | [nuImgs nuTags imgCount pairCount] 266 267 > Any > EvalResult b > b 268 = (forceUnpackEvalResult ctx res) 269 # switch (len res) 270 * 0 (die ({eval-timeout} ctx)) 271 * 1 (idx 0 res) 272 * 2 (die ({eval-crash} [ctx res])) 273 274 ; TODO: user #data to define the json structure, and use #case here. 275 = (jsonRow jsonBS) 276 @ res@[json leftover] (parseJson jsonBS) 277 | if res NONE 278 | if (neq 0 leftover) NONE 279 # switch (if json json (idx 0 json)) 280 * vec | SOME (idx 1 json) 281 * _ | NONE 282 283 > Bar > Maybe (Tab Str Json) 284 = (jsonMap jsonBS) 285 @ res@[json leftover] (parseJson jsonBS) 286 | if (res || leftover) NONE 287 # case json 288 - JMAP m | SOME m 289 - _ | NONE 290 291 = (asJsonMap m) 292 # case m 293 - JMAP m | SOME m 294 - _ | NONE 295 296 = (asJsonRow m) 297 # case m 298 - JVEC v | SOME v 299 - _ | NONE 300 301 = (asJsonNum m) 302 # case m 303 - JNUM n | SOME n 304 - _ | NONE 305 306 = (asJsonStr m) 307 # case m 308 - JSTR s | SOME (barNat s) 309 - _ | NONE 310 311 ; List of commands we parse out of the incoming json 312 # data Cmd 313 - STATUSCMD 314 - SEARCHCMD offset/Nat tags/Any 315 316 (**bindMaybe mVal k)=(maybeCase mVal NONE k) 317 318 ; 319 ; the json here being parsed is `{tag: "Search", contents: [array of str]}`. 320 ; 321 > Bar > Maybe (Row Str) 322 = (parseSearch jsonBS) 323 : map < **bindMaybe (jsonMap jsonBS) 324 @ isSearch 325 (SOME (JSTR b#{Search}) == tabLookup %tag map) 326 | ifNot isSearch NONE 327 : jcontents < **bindMaybe (tabLookup %contents map) 328 : content < **bindMaybe (asJsonMap jcontents) 329 : joffset < **bindMaybe (tabLookup %offset content) 330 : offset < **bindMaybe (asJsonNum joffset) 331 : jr < **bindMaybe (tabLookup %tags content) 332 : r < **bindMaybe (asJsonRow jr) 333 : terms < ^ (rowTraverse _ r) 334 & (item pure) 335 # case item 336 - JSTR s | pure (barNat s) 337 - _ | NONE 338 | SOME (**SEARCHCMD offset terms) 339 340 ; todo: this is doing the quick hack of just calling the old parseSearch; I 341 ; think all of this should be rewritten when we have more user friendly json 342 ; usage components. 343 > Bar > Maybe Cmd 344 = (parseCmd jsonBS) 345 : jm < **bindMaybe (jsonMap jsonBS) 346 : jt < **bindMaybe (tabLookup %tag jm) 347 : tg < **bindMaybe (asJsonStr jt) 348 # switch tg 349 * Search | parseSearch jsonBS 350 * Status | SOME 0 ; STATUSCMD (but #data generated [0], not 0) 351 352 =?= NONE | parseCmd b#{"} 353 =?= NONE | parseCmd b#{{"x":3}} 354 =?= NONE | parseCmd b#{[]} 355 356 =?= (SOME [1 5 [%x]]) 357 | parseCmd b#{{"tag": "Search", "contents": {"offset":5, "tags":["x"]}}} 358 359 =?= NONE 360 | parseCmd b#{{"tag": "bonk", "contents": {"offset":0, "tags":["x"]}}} 361 362 =?= NONE 363 | parseCmd b#{{"tag": "Search", "contents": [234]}} 364 365 =?= SOME-[1 8 [%x %y]] 366 | parseCmd 367 | b#{{"tag": "Search", "contents": {"offset":8, "tags":["x", "y"]}}} 368 369 =?= (SOME 0) 370 | parseCmd b#{{"tag": "Status"}} 371 372 (tagToStr t)=(JSTR | natBar t) 373 374 = (buildStatus st) 375 @ COG_STATE(..) st 376 | JMAP 377 ## =bytes | JNUM fileBytes 378 ## =imgs | JNUM imgCount 379 ## =pairs | JNUM pairCount 380 381 ; [hm hsm] [cord] > bar 382 = (doSearch st [offset searchTags]) 383 @ COG_STATE(..) st 384 @ (lookupTag tag) 385 @ hs | hsmLookup tag tags 386 | if | hsNull hs 387 | LEFT tag 388 | RIGHT hs 389 @ [missing idsets] | partition | map lookupTag searchTags 390 ;| trk [%results missing idsets] 391 | ifNonZero (len missing) 392 ; send back the missing tags 393 | JMAP 394 ## =tag | JSTR b#{BadTag} 395 ## =contents | JVEC (map tagToStr missing) 396 ## =status | **buildStatus st 397 ; 398 @ ids 399 | hsMultiIntersect idsets 400 ; 401 ;| trk [%ids ids] 402 @ targetIds 403 | lsToList 404 | lsTake 25 405 | lsDrop offset ids 406 @ records 407 | listToRow 408 ^ (listMapMaybe _ targetIds) 409 & id 410 | **fmapMaybe (hmLookup id imgs) 411 & [_ _ url] 412 @ path 413 | ifNot local url 414 ; generate a local server path from the id and the filename 415 @ filename | getFilename url 416 | mkBarPath [b#{img} (natBar | showNat id) filename] 417 | JVEC 418 ++ JNUM id 419 ++ JSTR path 420 | JMAP 421 ## =tag | JSTR b#{OK} 422 ## =contents | JMAP 423 ## =total | JNUM | lsLen ids 424 ## =offset | JNUM | offset 425 ## =query | JVEC | map tagToStr searchTags 426 ## =slice | JVEC | records 427 ## =status | **buildStatus st 428 429 = (doStatus st) 430 | JMAP 431 ## =tag | JSTR b#{Status} 432 ## =contents | **buildStatus st 433 434 435 ;;; Website ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 436 437 (emptyFileServer req)=NONE 438 439 = (fileServer (PIN st) [method path headers (PIN body)]) 440 # switch method 441 * _ | NONE 442 * POST 443 # switch path 444 * b#{/search} 445 # case (parseCmd body) 446 - NONE | SOME [400 b#bad [] b#{}] 447 - SOME q 448 # case q 449 - STATUSCMD 450 @ statusJson | **doStatus st 451 @ statusBS | printJson statusJson 452 | SOME [200 b#gotcha [] statusBS] 453 - SEARCHCMD offset tags 454 @ searchJson | doSearch st [offset tags] 455 @ searchBS | printJson searchJson 456 | SOME [200 b#gotcha [] searchBS] 457 * GET 458 | **fmapMaybe | hmLookup (barNat path) (**getFiles st) 459 & [type (PIN content)] 460 @ head | [(b#{content-type}, type)] 461 [200 b#gotcha head content] 462 463 ; `modifyState` modifies the vSt ref and reboots the static server 464 ; in-place (keeping the same ThreadId). 465 466 > Ref CogState 467 > (CogState > CogState) 468 > Cog () 469 = (modifyState vSt fun return) 470 : (PIN old) < readRef vSt 471 @ srv | **getServThread old 472 @ pNew | PIN (fun old) 473 : _ < writeRef vSt pNew 474 : _ < cancelFork srv (syscall (**HTTP_SERV | fileServer pNew)) 475 | return () 476 477 ; `addLearningsToState` inserts the new images and tags into the state. 478 ; We do this in an EVAL to free up the main loop, but it needs to be 479 ; transactional so we take the `learnLock` lock to prevent multiple 480 ; simultaneous insertions. 481 482 > Ref CogState > Pin LearnData > Cog () 483 = (addLearningsToState vSt pBundle return) 484 ; 485 ; Take the lock, and inject the stuff 486 : (PIN tmp) < readRef vSt 487 @ learnLock (**getLearnLock tmp) 488 ; 489 : ??(alts_locked _) < takeMVar learnLock 490 : pOld < readRef vSt 491 : ??(alts_evaled res) < syscall (**EVAL 1000 [calcLearnSt pOld pBundle]) 492 ; 493 @ [nuImgs nuTags nuImgCount nuPairCount] 494 (forceUnpackEvalResult %learn res) 495 ; 496 : _ < modifyState vSt & st 497 | setImgCount nuImgCount 498 | setPairCount nuPairCount 499 | setImgs nuImgs 500 | setTags nuTags 501 | st 502 ; Release the lock (This wont block, since there are no other writers) 503 | putMVar learnLock () return 504 505 = (addToState vSt rid jsonPin return) 506 : ??(ats_evaled evalResult) < syscall (EVAL1 1000 parseLearn jsonPin) 507 # switch (len evalResult) 508 * 0 | trk b#{TIMEOUT in /learn} 509 | syscall (**HTTP_ECHO rid 408 b#timeout [] b#{}) 510 | return 511 * 2 | trk [b#{CRASH in /learn} (idx 0 evalResult) (idx 1 evalResult)] 512 | syscall (**HTTP_ECHO rid 500 b#error [] b#{}) 513 | return 514 * 1 515 ; 516 @ parsed (idx 0 evalResult) 517 : ??(ats_learned _) < addLearningsToState vSt (PIN parsed) 518 | syscall (**HTTP_ECHO rid 200 b#ok [] b#{}) 519 | return 520 521 > Ref CogState > HttpReq > Cog () 522 = (handleReq vSt request return) 523 @ [rid method path headers pBody@(PIN body)] request 524 # switch method 525 * POST 526 # switch path 527 * b#{/usecdn} 528 : _ < fork (syscall (**HTTP_ECHO rid 200 b#ok [] b#{})) 529 : _ < modifyRef vSt (@(PIN st))&(PIN (setLocal FALSE st)) 530 | return () 531 * b#{/uselocal} 532 : _ < fork (syscall (**HTTP_ECHO rid 200 b#ok [] b#{})) 533 : _ < modifyRef vSt (@(PIN st))&(| PIN | setLocal TRUE st) 534 | return () 535 * b#{/learn} 536 : _ < fork (addToState vSt rid pBody) 537 | return () 538 * PUT 539 @ barType | **fromSome b#{text/plain} 540 | tabLookup b#{content-type} 541 | tabFromPairs headers 542 : (PIN st) < readRef vSt 543 @ newTotalSize | add (barLen body) | getFileBytes st 544 ; TODO: Currently inserting paths as nats instead of bars because bars put in 545 ; don't get looked up later despite showing up with hmKeys? Somehow? wat? 546 @ files (**getFiles st) 547 @ files | hmInsert (barNat path) [barType pBody] files 548 : _ < fork (syscall (**HTTP_ECHO rid 201 b#done [] b#{})) 549 : _ < modifyState vSt & st 550 | setFileBytes newTotalSize 551 | setFiles files 552 | st 553 | return () 554 * _ 555 : _ < fork (syscall (**HTTP_ECHO rid 400 b#bad [] b#{})) 556 | return () 557 558 559 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 560 561 > Ref CogState > Cog Void 562 = (runHttpServer vSt return) 563 : ??(rhs_heard req) < syscall HTTP_HEAR 564 : _ < handleReq vSt req 565 | runHttpServer vSt return 566 567 > Cog () 568 = (launchFullTagDemo return) 569 : servThread < fork (syscall (**HTTP_SERV emptyFileServer)) 570 : learnLock < newMVar () 571 : vSt < newRef (PIN | newState servThread learnLock) 572 : httpThread1 < fork (runHttpServer vSt) 573 : httpThread2 < fork (runHttpServer vSt) 574 | return () 575 576 > PausedCog 577 main=(runCog launchFullTagDemo)