1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
(** Copyright 2025-2026, Georgiy Belyanin, Ignat Sergeev *)

(** SPDX-License-Identifier: LGPL-3.0-or-later *)

open Angstrom
module Map = Base.Map.Poly

let is_whitespace = function
  | ' ' | '\t' | '\n' | '\r' -> true
  | _ -> false
;;

let whitespace = take_while is_whitespace
let whitespace1 = take_while1 is_whitespace
let token f = whitespace *> f <* whitespace

let is_digit = function
  | '0' .. '9' -> true
  | _ -> false
;;

let is_idschar = function
  | 'a' .. 'z' | 'A' .. 'Z' | '_' -> true
  | _ -> false
;;

let is_idchar = function
  | 'a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9' -> true
  | _ -> false
;;

let is_fbinopchar = function
  | '$' | '&' | '*' | '+' | '-' | '/' | '=' | '>' | '@' | '^' | '|' | '%' | '<' | '#' ->
    true
  | _ -> false
;;

let is_sbinopchar = function
  | '$'
  | '&'
  | '*'
  | '+'
  | '-'
  | '/'
  | '='
  | '>'
  | '@'
  | '^'
  | '|'
  | '%'
  | '<'
  | '!'
  | '.'
  | ':'
  | '?'
  | '~' -> true
  | _ -> false
;;

let is_funopchar = function
  | '?' | '~' | '!' -> true
  | _ -> false
;;

let is_sunopchar = is_sbinopchar

let kws =
  Map.of_alist_exn
    [ "rec", ()
    ; "let", ()
    ; "in", ()
    ; "type", ()
    ; "fun", ()
    ; "if", ()
    ; "then", ()
    ; "else", ()
    ]
;;

let spf = Format.asprintf
let pp_sep_space ppf () = Format.fprintf ppf " "
let pp_sep_newline ppf () = Format.fprintf ppf "\n"
let failf fmt = Format.kasprintf fail fmt
let parens f = token (char '(') *> f <* token (char ')')

let chainl1 e op =
  let rec go acc = lift2 (fun f x -> f acc x) op e >>= go <|> return acc in
  e >>= go
;;

let kw =
  let* kw = take_while1 is_idchar |> token in
  if Map.mem kws kw then return kw else fail (spf "expected keyword, but found %s" kw)
;;

let ident =
  let un_ident =
    let* hd = satisfy is_funopchar in
    let* tl = take_while is_sunopchar in
    String.cat (String.make 1 hd) tl |> return
  in
  let binop_ident =
    let* hd = satisfy is_fbinopchar in
    let* tl = take_while is_sbinopchar in
    String.cat (String.make 1 hd) tl |> return
  in
  let* ident =
    take_while1 is_idchar |> token <|> parens un_ident <|> parens binop_ident
  in
  if Map.find kws ident |> Option.is_none
  then ident |> return
  else failf "expected ident, but found keyword %s" ident
;;

let punit = (string "()" |> token) *> return Ast.punit
let plug = (string "_" |> token) *> return Ast.plug

let ptuple pattern =
  let tuple =
    let* fpattern = pattern in
    let* patterns = many (token (char ',') *> pattern) in
    return (Ast.ptuple (fpattern :: patterns))
  in
  parens tuple
;;

let pattern =
  fix (fun pattern ->
    punit <|> plug <|> ptuple pattern <|> (ident >>= fun ident -> return (Ast.ident ident)))
;;

let const =
  (string "()" |> token) *> return (Ast.cunit |> Ast.const)
  <|>
  let* v = take_while1 is_digit |> token in
  v |> int_of_string |> Ast.cint |> Ast.const |> return
;;

let var =
  let* ident = ident in
  Ast.var ident |> return
;;

let rec_flag =
  let rec_ = token (string "rec") *> return Ast.rec_ in
  rec_ <|> return Ast.nonrec_
;;

let let_ expr =
  let* rec_flag = rec_flag in
  let* pattern = pattern <?> "expected pattern after 'let'" in
  let* _ = token (char '=') in
  let* body =
    expr <?> spf "expected expression after 'let %a ='" Ast.pp_pattern pattern
  in
  let* _ = token (string "in") <?> "expected \'in\' after introducing new let binding" in
  let* expr = expr <?> "expected expression after 'let ... = ... in'" in
  Ast.let_ rec_flag pattern body expr |> return
;;

let fun_ expr =
  let* patterns = many1 pattern <?> "expected one or more function argument names" in
  let* _ = token (string "->") in
  let* expr =
    expr
    <?> spf
          "expected expression after 'fun %a ' "
          (Format.pp_print_list ~pp_sep:pp_sep_space Ast.pp_pattern)
          patterns
  in
  Ast.fun_ patterns expr |> return
;;

let app expr =
  let* f = expr in
  let* args = many1 expr in
  match args with
  | hd :: tl -> List.fold_left Ast.app (Ast.app f hd) tl |> return
  | _ -> assert false
;;

let binops expr =
  let binop op expr =
    let op = token op in
    chainl1 expr op
  in
  let combine op =
    string op *> return (fun lhs rhs -> Ast.app (Ast.app (Ast.var op) lhs) rhs)
  in
  (* Stolen from https://ocaml.org/manual/5.1/api/Ocaml_operators.html. *)
  expr
  |> binop (combine "*" <|> combine "/")
  |> binop (combine "+" <|> combine "-")
  |> binop
       (combine "<="
        <|> combine "=="
        <|> combine "<>"
        <|> combine "="
        <|> combine "<"
        <|> combine ">")
  |> binop (combine ">=" <|> combine ">")
  |> binop (combine "&&")
  |> binop (combine "||")
;;

let ite expr =
  let* cond = expr <?> spf "expected expression inside 'if' condition" in
  let* _ = token (string "then") <?> spf "expected 'then'" in
  let* then_ = expr <?> spf "expected expression inside 'then' clause" in
  let* _ = token (string "else") <?> spf "expected 'else'" in
  let* else_ = expr <?> spf "expected expression inside 'else' clause" in
  Ast.ite cond then_ else_ |> return
;;

let tuple expr =
  let tuple =
    let* fexpr = expr in
    let* exprs = many (token (char ',') *> expr) in
    return (Ast.tuple (fexpr :: exprs))
  in
  parens tuple
;;

let expr =
  fix (fun expr ->
    let expr' =
      let* c = whitespace *> peek_char in
      match c with
      | Some '0' .. '9' -> const
      | Some '(' ->
        let* r = const <|> parens expr <|> tuple expr <|> var in
        r |> return
      | _ -> var
    in
    let expr' = app expr' <|> expr' in
    let expr' = binops expr' in
    let expr' =
      (let* kw = kw in
       match kw with
       | "let" -> let_ expr
       | "fun" -> fun_ expr
       | "if" -> ite expr
       | _ -> fail "")
      <|> expr'
    in
    expr')
;;

let ty = fail "types and ADTs are not implemented yet"

let let_decl =
  let* rec_flag = rec_flag in
  let* pattern = pattern <?> "expected pattern after 'let'" in
  let* _ = token (char '=') in
  let* body =
    expr <?> spf "expected expression after 'let %a ='" Ast.pp_pattern pattern
  in
  let* _ = token (string ";;") in
  Ast.letdecl rec_flag pattern body |> return
;;

let top_level =
  let* kw = kw in
  match kw with
  | "let" ->
    let* () = commit in
    let_decl
  | "type" ->
    let* () = commit in
    ty
  | _ -> fail "expected top level declaration"
;;

let parse = parse_string ~consume:Consume.All (many1 top_level)

let parse_and_print code =
  match parse_string ~consume:Consume.All (many1 top_level) code with
  | Ok res ->
    Format.printf "%a" (Format.pp_print_list ~pp_sep:pp_sep_newline Ast.pp_top_level) res
  | Error res ->
    Format.printf
      "%s"
      (res
       |> String.split_on_char '>'
       |> List.rev
       |> List.map String.trim
       |> String.concat "\n")
;;

let%expect_test "const definition to 15" =
  parse_and_print "let const_15 = fun () -> 15;;";
  [%expect {| let const_15 = fun () -> 15;; |}]
;;

let%expect_test "simple algebraic expression" =
  parse_and_print "let sub = fun a b c -> a + b + c;;";
  [%expect {| let sub = fun a b c -> (+) ((+) a b) c;; |}]
;;

let%expect_test "find stddev" =
  parse_and_print "let stddev = fun a b c -> a * a + ((b * b) + (c * c));;";
  [%expect {| let stddev = fun a b c -> (+) ((*) a a) ((+) ((*) b b) ((*) c c));; |}]
;;

let%expect_test "use let ins for test" =
  parse_and_print
    {|
    let some_test = fun a b c ->
      let a_plus_b = a + b in
      let b_plus_c = b + c in
      let c_plus_a = c + a in
      5
    ;;
  |};
  [%expect
    {|
    let some_test =
      fun a b c ->
        let a_plus_b =
          (+) a b
        in
        let b_plus_c =
          (+) b c
        in
        let c_plus_a =
          (+) c a
        in
        5
    ;;
    |}]
;;

let%expect_test "wrong let ins" =
  parse_and_print
    {|
    let some_test = fun () ->
      let % = 5 in 10
    ;;
  |};
  [%expect
    {| expected expression after 'let some_test =': expected ident, but found keyword fun |}]
;;

let%expect_test "simple call" =
  parse_and_print "let swap = fun a b k -> k b a;;";
  [%expect {| let swap = fun a b k -> k b a;; |}]
;;

let%expect_test "simple call with parens" =
  parse_and_print "let wrong_swap = fun a b k -> k (b a);;";
  [%expect {| let wrong_swap = fun a b k -> k (b a);; |}]
;;

let%expect_test "factorial" =
  parse_and_print
    {|
    let rec fac = fun n ->
      if n = 0 then
        1
      else
        n * (fac (n - 1))
    ;;
  |};
  [%expect {| let rec fac = fun n -> if (=) n 0 then 1 else (*) n (fac ((-) n 1));; |}]
;;

let%expect_test "factorial" =
  parse_and_print
    {|
    let f = 15;;
    let g = f + 3;;
    let h = g * 15;;
  |};
  [%expect
    {|
    let f = 15;;

    let g = (+) f 3;;

    let h = (*) g 15;;
    |}]
;;

let%expect_test "ite in ite" =
  parse_and_print
    {|
    let f = if (if 1 then 0 else 0) then 1 else 0;;
  |};
  [%expect {| let f = if if 1 then 0 else 0 then 1 else 0;; |}]
;;

let%expect_test "tuples and plugs" =
  parse_and_print
    {|
    let () = _;;
    let _ = _;;
    let (a, b) = _;;
  |};
  [%expect
    {|
    let () = _;;

    let _ = _;;

    let (a, b) = _;;
    |}]
;;

let%expect_test "quick check" =
  let top_levels =
    parse
      {|
    let rec fac = fun n ->
      if n = 0 then
        1
      else
        n * (fac (n - 1))
    ;;
  |}
    |> Result.get_ok
  in
  let top_levels' =
    Format.asprintf
      "%a"
      (Format.pp_print_list ~pp_sep:pp_sep_newline Ast.pp_top_level)
      top_levels
    |> parse
    |> Result.map_error (fun err ->
      Format.printf "%s" err;
      err)
    |> Result.get_ok
  in
  Format.printf "%b" (top_levels = top_levels');
  [%expect {| true |}]
;;