json.sire (13915B)
1 ; Copyright 2023 The Plunder Authors 2 ; Use of this source code is governed by a BSD-style license that can be 3 ; found in the LICENSE file. 4 5 #### json <- hitch 6 7 :| sire 8 :| sire_25_datatype [natTag barTag appTag rowTag tabTag] 9 :| mutrec [{#mutrec}] 10 :| stew 11 12 13 ;;; Lexemes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 14 15 abstype#(ResultPair a b) ; type (ResultPair a b) = (Or Str (a, b)) 16 abstype#(MaybePair a b) ; type (MaybePair a b) = (Or Zero (a, b)) 17 18 typedef#Deci#(MaybePair Nat Nat) 19 typedef#Expo#(MaybePair Nat Nat) 20 21 # data JLexeme 22 * ( TSPACE = space ) 23 * ( TNULL = null ) 24 * ( TTRUE = true ) 25 * ( TFALSE = false ) 26 * ( TCOMMA = comma ) 27 * ( TCOLON = colon ) 28 * ( TLBRAK = l_brak ) 29 * ( TRBRAK = r_brak ) 30 * ( TLCURL = l_curl ) 31 * ( TRCURL = r_curl ) 32 * ( TSTR = str ) str/Bar 33 * ( TNUM = num ) neg/Bit num/Nat dec/Deci exp/Expo 34 * ( TERR = err ) off/Nat why/Str 35 36 37 ;;; Lexing Whitespace ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 38 39 = (chomp input off) 40 # switch (barIdx off input) 41 * 32 | chomp input (inc off) 42 * 9 | chomp input (inc off) 43 * 10 | chomp input (inc off) 44 * 13 | chomp input (inc off) 45 * _ | off 46 47 =?= 7 (chomp b{ xyz} 0) 48 49 50 ;;; Lexing Strings ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 51 52 = (lexStringLoop input top off) 53 @ off (barElemIndexOff {"} off input) 54 | if (eql off barLen-input) 55 | {Unexpected EOF (string)} 56 @ (countSlashes off count) 57 | if (eql {\} | barIdx off input) 58 | countSlashes (dec off) (inc count) 59 | count 60 @ numSlashes | **countSlashes (dec off) 0 61 | if (and (gth numSlashes 0) (mod numSlashes 2)) 62 | lexStringLoop input top (inc off) 63 | else 64 @ tok (**TSTR (barSlice top (sub off top) input)) 65 | (tok, inc off) 66 67 (lexString input off)=(lexStringLoop input off off) 68 69 =?= (TSTR b{fdsafdsa}, 10) 70 | lexString b{"fdsafdsa"} 1 71 72 =?= (TSTR b{fdsafdsa\"more}, 16) 73 | lexString b{"fdsafdsa\"more"} 1 74 75 =?= (TSTR b{fdsafdsa\\}, 12) 76 | lexString b{"fdsafdsa\\"} 1 77 78 =?= {Unexpected EOF (string)} 79 | lexString b{"fdsafdsa\"more} 1 80 81 82 ;;; Lexing Digits ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 83 84 = (consumeDigits input startOff) 85 ^ _ 0 startOff 86 ? (loop acc off) 87 @ chr (barIdx off input) 88 | ifNot (isDigit chr) [acc off] 89 @ !acc (add (sub chr 48) (mul 10 acc)) 90 | loop acc (inc off) 91 92 =?= [1234 5] (consumeDigits b{+1234+} 1) 93 =?= [0 0] (consumeDigits b{+1234+} 0) 94 95 96 ;;; Lexing Strings of Zeros ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 97 98 = (consumeZeros input startOff) 99 ^ _ 0 startOff 100 ? (go !count off) 101 | if ({0} == barIdx off input) 102 | go (inc count) (inc off) 103 | [count off] 104 105 =?= [0 0] | consumeZeros b{+0000+} 0 106 =?= [4 5] | consumeZeros b{+0000+} 1 107 =?= [3 5] | consumeZeros b{+0000+} 2 108 109 110 ;;; Lexing Decimal Sections ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 111 112 = noDig | {No digits after dot (.)} 113 114 ; This can't be turned into inlined-continuation-passing-style because 115 ; we have two places where we return successfully, and the short-circult 116 ; case is the common case, so we don't want to just do the whole thing 117 ; each time. 118 ; 119 ; Inlining this anyways doesn't make things faster, because it's used 120 ; in the body of a let-binding, so all of the branches become calls to 121 ; `if` instead of being a part of the law's code. 122 ; 123 > Bar > Nat > ResultPair Deci Nat 124 = (lexDecimal input off) 125 | ifNot ({.} == barIdx off input) [0 off] 126 ; 127 @ off | inc off 128 @ [numZeros zof] | **consumeZeros input off 129 @ [nat dof] | **consumeDigits input zof 130 ; 131 | if (dof == off) noDig 132 | [[numZeros nat] dof] 133 134 =?= [[2 33] 6] | lexDecimal b{#.0033#} 1 135 =?= [[1 0 ] 3] | lexDecimal b{#.0#} 1 136 =?= [[0 1 ] 3] | lexDecimal b{#.1#} 1 137 =?= [0 1] | lexDecimal b{##} 1 138 =?= [0 1] | lexDecimal b{#e2#} 1 139 =?= noDig | lexDecimal b{#.#} 1 140 141 142 ;;; Lexing Exponent Sections ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 143 144 ; 145 ; optional exponent. {e,E}{-,+,[nothing]}[digits] 146 ; 147 148 = noDig | {No digits in exponent} 149 150 ; 151 ; This can't be turned into inlined-continuation-passing-style because 152 ; we have two places where we return successfully, and the short-circult 153 ; case is the common case, so we don't want to just do the whole thing 154 ; each time. 155 ; 156 ; if we were to just inline this anyways, that doesn't make things faster, 157 ; because it's used in the body of a let-binding, so all of the branches 158 ; become calls to `if` instead of being a part of the law's code. 159 ; 160 > Bar > Nat > ResultPair Expo Nat 161 = (lexExponent input off) 162 ; 163 @ chr (barIdx off input) 164 @ hasExp ((chr == {e}) || (chr == {E})) 165 ; 166 | ifNot hasExp [0 off] 167 @ off (inc off) 168 ; 169 @ chr (barIdx off input) 170 @ nega (chr == {-}) 171 @ posi (chr == {+}) 172 @ signed (nega || posi) 173 @ off (add off signed) 174 ; 175 @ [nat dof] (**consumeDigits input off) 176 ; 177 | if (dof == off) noDig 178 ; 179 | [[nega nat] dof] 180 181 =?= [[0 0] 3] | lexExponent b{#e0#} 1 182 =?= [[0 0] 4] | lexExponent b{#e00#} 1 183 =?= [[0 0] 5] | lexExponent b{#e+00#} 1 184 =?= [[1 0] 5] | lexExponent b{#e-00#} 1 185 =?= [[1 1] 5] | lexExponent b{#e-01#} 1 186 =?= [[1 10] 5] | lexExponent b{#e-10#} 1 187 =?= [[1 11] 5] | lexExponent b{#e-11#} 1 188 =?= noDig | lexExponent b{#e-#} 1 189 =?= noDig | lexExponent b{#e+#} 1 190 =?= noDig | lexExponent b{#e#} 1 191 =?= [0 0] | lexExponent b{#e00#} 0 192 =?= [0 1] | lexExponent b{#g00#} 1 193 194 195 ;;; Lexing Json ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 196 197 = (lexJson input) 198 ^ _ 0 199 ? (loop off) 200 # switch (barIdx off input) 201 * 0 202 | NIL 203 * n 204 | ifNot (b#null == barSlice off 4 input) 0 205 | (TNULL, loop (add 4 off)) 206 * t 207 | ifNot (b#true == barSlice off 4 input) 0 208 | (TTRUE, loop (add 4 off)) 209 * f 210 | ifNot (b#false == barSlice off 5 input) 0 211 | (TFALSE, loop (add 5 off)) 212 ; 213 * }_,_ (TCOMMA, loop (inc off)) 214 * }_:_ (TCOLON, loop (inc off)) 215 * }_[_ (TLBRAK, loop (inc off)) 216 * }_]_ (TRBRAK, loop (inc off)) 217 * }_{_ (TLCURL, loop (inc off)) 218 * }_}_ (TRCURL, loop (inc off)) 219 ; 220 * }_"_ 221 @ res (**lexString input (inc off)) 222 | if res (**TERR off res, 0) 223 @ [tok off] res 224 [tok (loop off)] 225 ; 226 * 32 (TSPACE, loop (chomp input | inc off)) 227 * 9 (TSPACE, loop (chomp input | inc off)) 228 * 10 (TSPACE, loop (chomp input | inc off)) 229 * 13 (TSPACE, loop (chomp input | inc off)) 230 * _ 231 @ fst | barIdx off input 232 @ neg | eql {-} fst 233 @ off | add neg off ; add bit+number to avoid branching. 234 @ chr | barIdx off input 235 @ [nat dof] | **consumeDigits input off 236 | if (dof == off) 237 | if neg (**TERR off {Bad number, no digits}, 0) 238 | (**TERR off {Unexpected Character}, 0) 239 ; 240 @ off | dof 241 @ [deci off]@decRes | lexDecimal input off 242 | if decRes (**TERR off decRes, 0) 243 @ [expi off]@expRes | lexExponent input off 244 | if expRes (**TERR off expRes, 0) 245 ; 246 | (**TNUM neg nat deci expi, loop off) 247 248 =?= (listToRow | lexJson b{{"x":999}}) 249 ++ TLCURL 250 ++ TSTR b#x 251 ++ TCOLON 252 ++ TNUM FALSE 999 0 0 253 ++ TRCURL 254 255 =?= (listToRow | lexJson b{{"x":-999}}) 256 ++ TLCURL 257 ++ TSTR b#x 258 ++ TCOLON 259 ++ TNUM TRUE 999 0 0 260 ++ TRCURL 261 262 =?= (listToRow | lexJson b{{"x":9.9}}) 263 ++ TLCURL 264 ++ TSTR b#x 265 ++ TCOLON 266 ++ TNUM FALSE 9 [0 9] 0 267 ++ TRCURL 268 269 =?= (listToRow | lexJson b{{"x":9e9}}) 270 ++ TLCURL 271 ++ TSTR b#x 272 ++ TCOLON 273 ++ TNUM FALSE 9 0 [0 9] 274 ++ TRCURL 275 276 =?= (listToRow | lexJson b{{"x":09.09e9}}) 277 ++ TLCURL 278 ++ TSTR b#x 279 ++ TCOLON 280 ++ TNUM FALSE 9 [1 9] [0 9] 281 ++ TRCURL 282 283 =?= (listToRow | lexJson b{ { "x" : 999 } }) 284 ++ TSPACE 285 ++ TLCURL 286 ++ TSPACE 287 ++ TSTR b#x 288 ++ TSPACE 289 ++ TCOLON 290 ++ TSPACE 291 ++ TNUM FALSE 999 0 0 292 ++ TSPACE 293 ++ TRCURL 294 ++ TSPACE 295 296 297 ;;; Representation ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 298 299 abstype#Json 300 301 > Json > Nat 302 = (jsonTag x) 303 @ t (typeTag x) 304 | if (t == natTag) x t 305 306 ; JNUM.conHas=0 because fetching with idx-0 works on (0 _) 307 ; JNUM.conRaw=0 because wrapper in (0 _) 308 309 = (dataProps tag ari fun has raw) 310 | bt [{conTag},tag {conAri},ari {conFun},fun {conHas},has {conRaw},raw] 311 312 = jnullProps | dataProps {null} 0 jsonTag 0 0 313 = jtrueProps | dataProps {true} 0 jsonTag 0 0 314 = jfalseProps | dataProps {false} 0 jsonTag 0 0 315 = jstrProps | dataProps barTag 1 jsonTag 0 1 316 = jnumProps | dataProps appTag 1 jsonTag 0 0 317 = jvecProps | dataProps rowTag 1 jsonTag 0 1 318 = jmapProps | dataProps tabTag 1 jsonTag 0 1 319 320 #= 0 jnullProps JNULL {null} 321 #= 0 jtrueProps JTRUE {true} 322 #= 0 jfalseProps JFALSE {false} 323 #= 0 jstrProps JSTR (JSTR x ?? x) 324 #= 0 jnumProps JNUM (JNUM n ?? 0 n) 325 #= 0 jvecProps JVEC (JVEC xs ?? xs) 326 #= 0 jmapProps JMAP (JMAP kv ?? kv) 327 328 = (jsonWithType j) 329 # case j 330 - JNULL [{n} j] 331 - JTRUE [{t} j] 332 - JFALSE [{f} j] 333 - JSTR x [{s} x] 334 - JNUM n [{u} n] 335 - JVEC xs [{v} xs] 336 - JMAP kv [{m} kv] 337 338 =?= | map jsonWithType 339 | [ {null} {true} {false} b{lol} 0-9 [0-3 0-4 0-5] #[foo={null}] ] 340 ++ [%n %null] 341 ++ [%t %true] 342 ++ [%f %false] 343 ++ [%s b#lol] 344 ++ [%u 9] 345 ++ [%v [0-3 0-4 0-5]] 346 ++ [%m [foo={null}]] 347 348 349 ;;; Parser ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 350 351 = (eatSpace stream) 352 : t ts < listCase stream stream 353 | if (TSPACE == t) ts stream 354 355 = (parseLexerStream inputStream topFail topOkay) 356 # mutrec %loop 357 | val inputStream topFail topOkay 358 * (val stream fail ok) 359 @ stream | eatSpace stream 360 : t ts < listCase stream (fail {value} stream) 361 # switch (dataTag t) 362 * null | ok JNULL ts 363 * true | ok JTRUE ts 364 * false | ok JFALSE ts 365 * l_brak | arr NIL ts fail ok 366 * l_curl | obj 0 NIL ts fail ok 367 * str | ok (JSTR | idx 1 t) ts 368 * num | ok (JNUM | idx 2 t) ts ; todo: more data. 369 * err @ [_ off why] t 370 @ msg (strCat [{lex error @} showNat-off {: } why]) 371 | fail msg stream 372 * _ | fail {value} stream 373 ; 374 * (obj count pairs stream fail ok) 375 ; 376 @ stream (eatSpace stream) 377 ; 378 : t@[k v] ts < listCase stream (fail }@'}', or key@ stream) 379 ; 380 | if (t == TRCURL) 381 @ !row (sizedListToRow count pairs) 382 @ !tab (tabFromPairs row) 383 | ok (JMAP tab) ts 384 ; 385 | if (isNat t || neq {str} k) 386 | trk #[=t =k =v] 387 | fail }@'}', or key@ stream 388 ; 389 @ newKey | barNat v 390 @ stream | eatSpace ts 391 @ complain | fail {Expecting :} stream 392 : t ts < listCase stream complain 393 ; 394 | if (TCOLON /= t) complain 395 ; 396 @ stream | eatSpace ts 397 : newVal stream < val stream fail 398 @ !pairs | CONS (newKey, newVal) pairs 399 @ !count | inc count 400 @ stream | eatSpace stream 401 @ complain | fail }@Expecting , or }@ stream 402 : t ts < listCase stream complain 403 ; 404 | if (t == TRCURL) | obj count pairs stream fail ok 405 | if (t == TCOMMA) | obj count pairs ts fail ok 406 ; 407 | complain 408 ; 409 * (arr acc stream fail ok) 410 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [end] 411 @ stream | eatSpace stream 412 @ complain | fail {space, ']', or value} stream 413 : t ts < listCase stream complain 414 ; 415 | if (t == TRBRAK) 416 @ res (JVEC | listToRowRev acc) 417 | ok res ts 418 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [val] 419 : val more < val stream fail 420 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [more] 421 @ more | eatSpace more 422 @ complain | fail {space, ']', or value} stream 423 : m ms < listCase more complain 424 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [final] 425 | if (TRBRAK == m) 426 @ acc (CONS val acc) 427 @ !res (JVEC | listToRowRev acc) ; TODO: use a sized operation here 428 | ok res ms 429 | if (TCOMMA == m) 430 @ acc (CONS val acc) 431 | arr acc ms fail ok 432 | fail {space or ',' or ']'} more 433 434 = (parseJson input) 435 @ tokStream | lexJson input 436 @ (fail reason _) | reason 437 @ (okay v extra) | [v extra] 438 | parseLexerStream tokStream fail okay 439 440 =?= (parseJson b{"}) 441 | {lex error @0: Unexpected EOF (string)} 442 443 =?= (parseJson b{[null]}) 444 ^ (JVEC [_], NIL) 445 | JNULL 446 447 =?= (parseJson b{ [null,null,true,false] }) 448 ^ (JVEC _, [TSPACE 0]) 449 ++ JNULL 450 ++ JNULL 451 ++ JTRUE 452 ++ JFALSE 453 454 =?= (parseJson b{[[]]}) 455 ^ (JVEC [_], NIL) 456 | JVEC [] 457 458 =?= (parseJson b{[[null,null]]}) 459 ^ (JVEC [_], NIL) 460 | (JVEC (JNULL, JNULL)) 461 462 =?= (parseJson b{[[0,0],[0, 0],[0 ,0],[0 , 0]]}) 463 ^ (JVEC _, NIL) 464 ++ JVEC (JNUM 0, JNUM 0) 465 ++ JVEC (JNUM 0, JNUM 0) 466 ++ JVEC (JNUM 0, JNUM 0) 467 ++ JVEC (JNUM 0, JNUM 0) 468 469 =?= (parseJson b{[0, ",", 1]}) 470 ^ (JVEC _, NIL) 471 | (JNUM 0, JSTR b{,}, JNUM 1) 472 473 =?= (parseJson b{{}}) (JMAP #[], NIL) 474 =?= (parseJson b{ { }}) (JMAP #[], NIL) 475 =?= (parseJson b{ {"str":"x", "null":null}}) 476 ^ (JMAP _, NIL) 477 ## =str | JSTR b#x 478 ## =null | JNULL 479 480 481 ;;; Printer ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 482 483 ;;; TODO: Substitue escape characters. 484 ;;; TODO: Handle edge-cases with "\u1234" escapes. 485 486 = (printVec go vec) 487 ^ [b{[} _ b{]}] 488 | listIntersperse b{,} 489 | listFromRow 490 | map go vec 491 492 = (printMap go map) 493 ^ [b#}_{_ _ b#}_}_] 494 | listIntersperse b{,} 495 : [k v] < listForEach (tabToPairList map) 496 | (go (JSTR | natBar k), b{:}, go v) 497 498 = (printJson input) 499 ^ barFlat (_ input) 500 ? (go json) 501 # case json 502 - JNULL | b#null 503 - JTRUE | b#true 504 - JFALSE | b#false 505 - JSTR s | [b{"} s b{"}] 506 - JNUM n | natBar (showNat n) 507 - JVEC v | printVec go v 508 - JMAP t | printMap go t 509 - _ | {bad json} [json [tag=(jsonTag json)]] 510 511 =?= b{["hi",null,true,false,99]} 512 ^ printJson (JVEC _) 513 ++ JSTR b#hi 514 ++ JNULL 515 ++ JTRUE 516 ++ JFALSE 517 ++ JNUM 99 518 519 =?= b{{"null":null,"bools":[true,false]}} 520 ^ printJson (JMAP _) 521 ## =null | JNULL 522 ## =bools | JVEC [JTRUE JFALSE] 523 524 =?= ++ b{{}} 525 ++ b{{}} 526 ++ b{{"str":"x","null":null}} 527 ++ printJson | idx 0 | parseJson b{{}} 528 ++ printJson | idx 0 | parseJson b{ { }} 529 ++ printJson | idx 0 | parseJson b{ {"str":"x", "null":null}} 530 531 532 ;;; Exports ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 533 534 ^-^ 535 ^-^ JLexeme 536 ^-^ TSPACE TNULL TTRUE TFALSE 537 ^-^ TCOMMA TCOLON 538 ^-^ TLBRAK TRBRAK TLCURL TRCURL 539 ^-^ TSTR TNUM 540 ^-^ 541 ^-^ 542 ^-^ Json 543 ^-^ JNULL JTRUE JFALSE JSTR JNUM JVEC JMAP 544 ^-^ 545 ^-^ lexJson parseJson printJson 546 ^-^