plunder

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

json.sire (13915B)


      1 ; Copyright 2023 The Plunder Authors
      2 ; Use of this source code is governed by a BSD-style license that can be
      3 ; found in the LICENSE file.
      4 
      5 #### json <- hitch
      6 
      7 :| sire
      8 :| sire_25_datatype [natTag barTag appTag rowTag tabTag]
      9 :| mutrec           [{#mutrec}]
     10 :| stew
     11 
     12 
     13 ;;; Lexemes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     14 
     15 abstype#(ResultPair a b) ; type (ResultPair a b) = (Or Str (a, b))
     16 abstype#(MaybePair a b)  ; type (MaybePair a b)  = (Or Zero (a, b))
     17 
     18 typedef#Deci#(MaybePair Nat Nat)
     19 typedef#Expo#(MaybePair Nat Nat)
     20 
     21 # data JLexeme
     22 * ( TSPACE = space  )
     23 * ( TNULL  = null   )
     24 * ( TTRUE  = true   )
     25 * ( TFALSE = false  )
     26 * ( TCOMMA = comma  )
     27 * ( TCOLON = colon  )
     28 * ( TLBRAK = l_brak )
     29 * ( TRBRAK = r_brak )
     30 * ( TLCURL = l_curl )
     31 * ( TRCURL = r_curl )
     32 * ( TSTR   = str    ) str/Bar
     33 * ( TNUM   = num    ) neg/Bit num/Nat dec/Deci exp/Expo
     34 * ( TERR   = err    ) off/Nat why/Str
     35 
     36 
     37 ;;; Lexing Whitespace  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     38 
     39 = (chomp input off)
     40 # switch (barIdx off input)
     41 * 32 | chomp input (inc off)
     42 * 9  | chomp input (inc off)
     43 * 10 | chomp input (inc off)
     44 * 13 | chomp input (inc off)
     45 * _  | off
     46 
     47 =?= 7 (chomp b{	  	   xyz} 0)
     48 
     49 
     50 ;;; Lexing Strings ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     51 
     52 = (lexStringLoop input top off)
     53 @ off (barElemIndexOff {"} off input)
     54 | if (eql off barLen-input)
     55     | {Unexpected EOF (string)}
     56 @ (countSlashes off count)
     57     | if (eql {\} | barIdx off input)
     58         | countSlashes (dec off) (inc count)
     59     | count
     60 @ numSlashes | **countSlashes (dec off) 0
     61 | if (and (gth numSlashes 0) (mod numSlashes 2))
     62     | lexStringLoop input top (inc off)
     63 | else
     64     @ tok (**TSTR (barSlice top (sub off top) input))
     65     | (tok, inc off)
     66 
     67 (lexString input off)=(lexStringLoop input off off)
     68 
     69 =?= (TSTR b{fdsafdsa}, 10)
     70   | lexString b{"fdsafdsa"} 1
     71 
     72 =?= (TSTR b{fdsafdsa\"more}, 16)
     73   | lexString b{"fdsafdsa\"more"} 1
     74 
     75 =?= (TSTR b{fdsafdsa\\}, 12)
     76   | lexString b{"fdsafdsa\\"} 1
     77 
     78 =?= {Unexpected EOF (string)}
     79   | lexString b{"fdsafdsa\"more} 1
     80 
     81 
     82 ;;; Lexing Digits ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     83 
     84 = (consumeDigits input startOff)
     85 ^ _ 0 startOff
     86 ? (loop acc off)
     87 @ chr (barIdx off input)
     88 | ifNot (isDigit chr) [acc off]
     89 @ !acc (add (sub chr 48) (mul 10 acc))
     90 | loop acc (inc off)
     91 
     92 =?= [1234 5] (consumeDigits b{+1234+} 1)
     93 =?= [0 0]    (consumeDigits b{+1234+} 0)
     94 
     95 
     96 ;;; Lexing Strings of Zeros ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     97 
     98 = (consumeZeros input startOff)
     99 ^ _ 0 startOff
    100 ? (go !count off)
    101 | if ({0} == barIdx off input)
    102     | go (inc count) (inc off)
    103 | [count off]
    104 
    105 =?= [0 0] | consumeZeros b{+0000+} 0
    106 =?= [4 5] | consumeZeros b{+0000+} 1
    107 =?= [3 5] | consumeZeros b{+0000+} 2
    108 
    109 
    110 ;;; Lexing Decimal Sections ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    111 
    112 = noDig | {No digits after dot (.)}
    113 
    114 ; This can't be turned into inlined-continuation-passing-style because
    115 ; we have two places where we return successfully, and the short-circult
    116 ; case is the common case, so we don't want to just do the whole thing
    117 ; each time.
    118 ;
    119 ; Inlining this anyways doesn't make things faster, because it's used
    120 ; in the body of a let-binding, so all of the branches become calls to
    121 ; `if` instead of being a part of the law's code.
    122 ;
    123 > Bar > Nat > ResultPair Deci Nat
    124 = (lexDecimal input off)
    125 | ifNot ({.} == barIdx off input) [0 off]
    126 ;
    127 @ off            | inc off
    128 @ [numZeros zof] | **consumeZeros input off
    129 @ [nat dof]      | **consumeDigits input zof
    130 ;
    131 | if (dof == off) noDig
    132 | [[numZeros nat] dof]
    133 
    134 =?= [[2 33] 6] | lexDecimal b{#.0033#} 1
    135 =?= [[1 0 ] 3] | lexDecimal b{#.0#}    1
    136 =?= [[0 1 ] 3] | lexDecimal b{#.1#}    1
    137 =?= [0      1] | lexDecimal b{##}      1
    138 =?= [0      1] | lexDecimal b{#e2#}    1
    139 =?= noDig      | lexDecimal b{#.#}     1
    140 
    141 
    142 ;;; Lexing Exponent Sections ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    143 
    144 ;
    145 ; optional exponent. {e,E}{-,+,[nothing]}[digits]
    146 ;
    147 
    148 = noDig | {No digits in exponent}
    149 
    150 ;
    151 ; This can't be turned into inlined-continuation-passing-style because
    152 ; we have two places where we return successfully, and the short-circult
    153 ; case is the common case, so we don't want to just do the whole thing
    154 ; each time.
    155 ;
    156 ; if we were to just inline this anyways, that doesn't make things faster,
    157 ; because it's used in the body of a let-binding, so all of the branches
    158 ; become calls to `if` instead of being a part of the law's code.
    159 ;
    160 > Bar > Nat > ResultPair Expo Nat
    161 = (lexExponent input off)
    162 ;
    163 @ chr    (barIdx off input)
    164 @ hasExp ((chr == {e}) || (chr == {E}))
    165 ;
    166 | ifNot hasExp [0 off]
    167 @ off    (inc off)
    168 ;
    169 @ chr    (barIdx off input)
    170 @ nega   (chr == {-})
    171 @ posi   (chr == {+})
    172 @ signed (nega || posi)
    173 @ off    (add off signed)
    174 ;
    175 @ [nat dof] (**consumeDigits input off)
    176 ;
    177 | if (dof == off) noDig
    178 ;
    179 | [[nega nat] dof]
    180 
    181 =?= [[0  0] 3] | lexExponent b{#e0#}   1
    182 =?= [[0  0] 4] | lexExponent b{#e00#}  1
    183 =?= [[0  0] 5] | lexExponent b{#e+00#} 1
    184 =?= [[1  0] 5] | lexExponent b{#e-00#} 1
    185 =?= [[1  1] 5] | lexExponent b{#e-01#} 1
    186 =?= [[1 10] 5] | lexExponent b{#e-10#} 1
    187 =?= [[1 11] 5] | lexExponent b{#e-11#} 1
    188 =?= noDig      | lexExponent b{#e-#}   1
    189 =?= noDig      | lexExponent b{#e+#}   1
    190 =?= noDig      | lexExponent b{#e#}    1
    191 =?= [0 0]      | lexExponent b{#e00#}  0
    192 =?= [0 1]      | lexExponent b{#g00#}  1
    193 
    194 
    195 ;;; Lexing Json ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    196 
    197 = (lexJson input)
    198 ^ _ 0
    199 ? (loop off)
    200 # switch (barIdx off input)
    201 * 0
    202     | NIL
    203 * n
    204     | ifNot (b#null == barSlice off 4 input) 0
    205     | (TNULL, loop (add 4 off))
    206 * t
    207     | ifNot (b#true == barSlice off 4 input) 0
    208     | (TTRUE, loop (add 4 off))
    209 * f
    210     | ifNot (b#false == barSlice off 5 input) 0
    211     | (TFALSE, loop (add 5 off))
    212 ;
    213 * }_,_ (TCOMMA, loop (inc off))
    214 * }_:_ (TCOLON, loop (inc off))
    215 * }_[_ (TLBRAK, loop (inc off))
    216 * }_]_ (TRBRAK, loop (inc off))
    217 * }_{_ (TLCURL, loop (inc off))
    218 * }_}_ (TRCURL, loop (inc off))
    219 ;
    220 * }_"_
    221     @ res (**lexString input (inc off))
    222     | if res (**TERR off res, 0)
    223     @ [tok off] res
    224     [tok (loop off)]
    225 ;
    226 * 32 (TSPACE, loop (chomp input | inc off))
    227 * 9  (TSPACE, loop (chomp input | inc off))
    228 * 10 (TSPACE, loop (chomp input | inc off))
    229 * 13 (TSPACE, loop (chomp input | inc off))
    230 * _
    231     @ fst       | barIdx off input
    232     @ neg       | eql {-} fst
    233     @ off       | add neg off ; add bit+number to avoid branching.
    234     @ chr       | barIdx off input
    235     @ [nat dof] | **consumeDigits input off
    236     | if (dof == off)
    237         | if neg (**TERR off {Bad number, no digits}, 0)
    238         | (**TERR off {Unexpected Character}, 0)
    239     ;
    240     @ off               | dof
    241     @ [deci off]@decRes | lexDecimal input off
    242     | if decRes (**TERR off decRes, 0)
    243     @ [expi off]@expRes | lexExponent input off
    244     | if expRes (**TERR off expRes, 0)
    245     ;
    246     | (**TNUM neg nat deci expi, loop off)
    247 
    248 =?= (listToRow | lexJson b{{"x":999}})
    249  ++ TLCURL
    250  ++ TSTR b#x
    251  ++ TCOLON
    252  ++ TNUM FALSE 999 0 0
    253  ++ TRCURL
    254 
    255 =?= (listToRow | lexJson b{{"x":-999}})
    256  ++ TLCURL
    257  ++ TSTR b#x
    258  ++ TCOLON
    259  ++ TNUM TRUE 999 0 0
    260  ++ TRCURL
    261 
    262 =?= (listToRow | lexJson b{{"x":9.9}})
    263  ++ TLCURL
    264  ++ TSTR b#x
    265  ++ TCOLON
    266  ++ TNUM FALSE 9 [0 9] 0
    267  ++ TRCURL
    268 
    269 =?= (listToRow | lexJson b{{"x":9e9}})
    270  ++ TLCURL
    271  ++ TSTR b#x
    272  ++ TCOLON
    273  ++ TNUM FALSE 9 0 [0 9]
    274  ++ TRCURL
    275 
    276 =?= (listToRow | lexJson b{{"x":09.09e9}})
    277  ++ TLCURL
    278  ++ TSTR b#x
    279  ++ TCOLON
    280  ++ TNUM FALSE 9 [1 9] [0 9]
    281  ++ TRCURL
    282 
    283 =?= (listToRow | lexJson b{ { "x" : 999 } })
    284  ++ TSPACE
    285  ++ TLCURL
    286  ++ TSPACE
    287  ++ TSTR b#x
    288  ++ TSPACE
    289  ++ TCOLON
    290  ++ TSPACE
    291  ++ TNUM FALSE 999 0 0
    292  ++ TSPACE
    293  ++ TRCURL
    294  ++ TSPACE
    295 
    296 
    297 ;;; Representation ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    298 
    299 abstype#Json
    300 
    301 > Json > Nat
    302 = (jsonTag x)
    303 @ t (typeTag x)
    304 | if (t == natTag) x t
    305 
    306 ; JNUM.conHas=0 because fetching with idx-0 works on (0 _)
    307 ; JNUM.conRaw=0 because wrapper in (0 _)
    308 
    309 = (dataProps tag ari fun has raw)
    310 | bt [{conTag},tag {conAri},ari {conFun},fun {conHas},has {conRaw},raw]
    311 
    312 = jnullProps  | dataProps {null}  0 jsonTag 0 0
    313 = jtrueProps  | dataProps {true}  0 jsonTag 0 0
    314 = jfalseProps | dataProps {false} 0 jsonTag 0 0
    315 = jstrProps   | dataProps barTag  1 jsonTag 0 1
    316 = jnumProps   | dataProps appTag  1 jsonTag 0 0
    317 = jvecProps   | dataProps rowTag  1 jsonTag 0 1
    318 = jmapProps   | dataProps tabTag  1 jsonTag 0 1
    319 
    320 #= 0 jnullProps  JNULL  {null}
    321 #= 0 jtrueProps  JTRUE  {true}
    322 #= 0 jfalseProps JFALSE {false}
    323 #= 0 jstrProps   JSTR   (JSTR x ?? x)
    324 #= 0 jnumProps   JNUM   (JNUM n ?? 0 n)
    325 #= 0 jvecProps   JVEC   (JVEC xs ?? xs)
    326 #= 0 jmapProps   JMAP   (JMAP kv ?? kv)
    327 
    328 = (jsonWithType j)
    329 # case j
    330 - JNULL   [{n} j]
    331 - JTRUE   [{t} j]
    332 - JFALSE  [{f} j]
    333 - JSTR x  [{s} x]
    334 - JNUM n  [{u} n]
    335 - JVEC xs [{v} xs]
    336 - JMAP kv [{m} kv]
    337 
    338 =?=   | map jsonWithType
    339       | [ {null} {true} {false} b{lol} 0-9 [0-3 0-4 0-5] #[foo={null}] ]
    340  ++ [%n %null]
    341  ++ [%t %true]
    342  ++ [%f %false]
    343  ++ [%s b#lol]
    344  ++ [%u 9]
    345  ++ [%v [0-3 0-4 0-5]]
    346  ++ [%m [foo={null}]]
    347 
    348 
    349 ;;; Parser ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    350 
    351 = (eatSpace stream)
    352 : t ts < listCase stream stream
    353 | if (TSPACE == t) ts stream
    354 
    355 = (parseLexerStream inputStream topFail topOkay)
    356 # mutrec %loop
    357    | val inputStream topFail topOkay
    358 * (val stream fail ok)
    359    @ stream | eatSpace stream
    360    : t ts   < listCase stream (fail {value} stream)
    361    # switch (dataTag t)
    362    * null   | ok JNULL ts
    363    * true   | ok JTRUE ts
    364    * false  | ok JFALSE ts
    365    * l_brak | arr NIL ts fail ok
    366    * l_curl | obj 0 NIL ts fail ok
    367    * str    | ok (JSTR | idx 1 t) ts
    368    * num    | ok (JNUM | idx 2 t) ts  ; todo: more data.
    369    * err    @ [_ off why] t
    370             @ msg (strCat [{lex error @} showNat-off {: } why])
    371             | fail msg stream
    372    * _      | fail {value} stream
    373 ;
    374 * (obj count pairs stream fail ok)
    375    ;
    376    @ stream (eatSpace stream)
    377    ;
    378    : t@[k v] ts < listCase stream (fail }@'}', or key@ stream)
    379    ;
    380    | if (t == TRCURL)
    381        @ !row (sizedListToRow count pairs)
    382        @ !tab (tabFromPairs row)
    383        | ok (JMAP tab) ts
    384    ;
    385    | if (isNat t || neq {str} k)
    386        | trk #[=t =k =v]
    387        | fail }@'}', or key@ stream
    388    ;
    389    @ newKey   | barNat v
    390    @ stream   | eatSpace ts
    391    @ complain | fail {Expecting :} stream
    392    : t ts     < listCase stream complain
    393    ;
    394    | if (TCOLON /= t) complain
    395    ;
    396    @ stream        | eatSpace ts
    397    : newVal stream < val stream fail
    398    @ !pairs        | CONS (newKey, newVal) pairs
    399    @ !count        | inc count
    400    @ stream        | eatSpace stream
    401    @ complain      | fail }@Expecting , or }@ stream
    402    : t ts          < listCase stream complain
    403    ;
    404    | if (t == TRCURL) | obj count pairs stream fail ok
    405    | if (t == TCOMMA) | obj count pairs ts fail ok
    406    ;
    407    | complain
    408 ;
    409 * (arr acc stream fail ok)
    410    ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [end]
    411    @ stream   | eatSpace stream
    412    @ complain | fail {space, ']', or value} stream
    413    : t ts     < listCase stream complain
    414    ;
    415    | if (t == TRBRAK)
    416        @ res (JVEC | listToRowRev acc)
    417        | ok res ts
    418    ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [val]
    419    : val more < val stream fail
    420    ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [more]
    421    @ more     | eatSpace more
    422    @ complain | fail {space, ']', or value} stream
    423    : m ms     < listCase more complain
    424    ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [final]
    425    | if (TRBRAK == m)
    426        @ acc (CONS val acc)
    427        @ !res (JVEC | listToRowRev acc) ; TODO: use a sized operation here
    428        | ok res ms
    429    | if (TCOMMA == m)
    430        @ acc (CONS val acc)
    431        | arr acc ms fail ok
    432    | fail {space or ',' or ']'} more
    433 
    434 = (parseJson input)
    435 @ tokStream       | lexJson input
    436 @ (fail reason _) | reason
    437 @ (okay v extra)  | [v extra]
    438 | parseLexerStream tokStream fail okay
    439 
    440 =?= (parseJson b{"})
    441   | {lex error @0: Unexpected EOF (string)}
    442 
    443 =?= (parseJson b{[null]})
    444   ^ (JVEC [_], NIL)
    445   | JNULL
    446 
    447 =?= (parseJson b{ [null,null,true,false] })
    448   ^ (JVEC _, [TSPACE 0])
    449  ++ JNULL
    450  ++ JNULL
    451  ++ JTRUE
    452  ++ JFALSE
    453 
    454 =?= (parseJson b{[[]]})
    455   ^ (JVEC [_], NIL)
    456   | JVEC []
    457 
    458 =?= (parseJson b{[[null,null]]})
    459   ^ (JVEC [_], NIL)
    460   | (JVEC (JNULL, JNULL))
    461 
    462 =?= (parseJson b{[[0,0],[0, 0],[0 ,0],[0 , 0]]})
    463   ^ (JVEC _, NIL)
    464  ++ JVEC (JNUM 0, JNUM 0)
    465  ++ JVEC (JNUM 0, JNUM 0)
    466  ++ JVEC (JNUM 0, JNUM 0)
    467  ++ JVEC (JNUM 0, JNUM 0)
    468 
    469 =?= (parseJson b{[0, ",", 1]})
    470   ^ (JVEC _, NIL)
    471   | (JNUM 0, JSTR b{,}, JNUM 1)
    472 
    473 =?= (parseJson b{{}})   (JMAP #[], NIL)
    474 =?= (parseJson b{ { }}) (JMAP #[], NIL)
    475 =?= (parseJson b{ {"str":"x", "null":null}})
    476   ^ (JMAP _, NIL)
    477  ## =str  | JSTR b#x
    478  ## =null | JNULL
    479 
    480 
    481 ;;; Printer ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    482 
    483 ;;; TODO: Substitue escape characters.
    484 ;;; TODO: Handle edge-cases with "\u1234" escapes.
    485 
    486 = (printVec go vec)
    487 ^ [b{[} _ b{]}]
    488 | listIntersperse b{,}
    489 | listFromRow
    490 | map go vec
    491 
    492 = (printMap go map)
    493 ^ [b#}_{_ _ b#}_}_]
    494 | listIntersperse b{,}
    495 : [k v] < listForEach (tabToPairList map)
    496 | (go (JSTR | natBar k), b{:}, go v)
    497 
    498 = (printJson input)
    499 ^ barFlat (_ input)
    500 ? (go json)
    501 # case json
    502 - JNULL  | b#null
    503 - JTRUE  | b#true
    504 - JFALSE | b#false
    505 - JSTR s | [b{"} s b{"}]
    506 - JNUM n | natBar (showNat n)
    507 - JVEC v | printVec go v
    508 - JMAP t | printMap go t
    509 - _      | {bad json} [json [tag=(jsonTag json)]]
    510 
    511 =?= b{["hi",null,true,false,99]}
    512   ^ printJson (JVEC _)
    513  ++ JSTR b#hi
    514  ++ JNULL
    515  ++ JTRUE
    516  ++ JFALSE
    517  ++ JNUM 99
    518 
    519 =?= b{{"null":null,"bools":[true,false]}}
    520   ^ printJson (JMAP _)
    521  ## =null  | JNULL
    522  ## =bools | JVEC [JTRUE JFALSE]
    523 
    524 =?=  ++ b{{}}
    525      ++ b{{}}
    526      ++ b{{"str":"x","null":null}}
    527  ++ printJson | idx 0 | parseJson b{{}}
    528  ++ printJson | idx 0 | parseJson b{ { }}
    529  ++ printJson | idx 0 | parseJson b{ {"str":"x", "null":null}}
    530 
    531 
    532 ;;; Exports ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    533 
    534 ^-^
    535 ^-^ JLexeme
    536 ^-^ TSPACE TNULL TTRUE TFALSE
    537 ^-^ TCOMMA TCOLON
    538 ^-^ TLBRAK TRBRAK TLCURL TRCURL
    539 ^-^ TSTR TNUM
    540 ^-^
    541 ^-^
    542 ^-^ Json
    543 ^-^ JNULL JTRUE JFALSE JSTR JNUM JVEC JMAP
    544 ^-^
    545 ^-^ lexJson parseJson printJson
    546 ^-^