1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
(** Copyright 2021-2023, Kakadu and contributors *)

(** SPDX-License-Identifier: LGPL-3.0-or-later *)

open Angstrom

let is_space = function
  | ' ' | '\t' | '\n' | '\r' -> true
  | _ -> false
;;

let spaces = skip_while is_space

(* Keyword filtering *)
let is_keyword = function
  | "let" | "rec" | "if" | "then" | "else" | "in" | "fun" -> true
  | _ -> false
;;

let is_ident_char = function
  | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' | '\'' -> true
  | _ -> false
;;

(* Multi-character identifier parser *)
let identifier =
  let first_char_parser =
    satisfy (function
      | 'a' .. 'z' | '_' -> true
      | _ -> false)
  in
  lift2
    (fun fc rest -> String.make 1 fc ^ rest)
    first_char_parser
    (take_while is_ident_char)
  >>= fun ident ->
  if is_keyword ident
  then fail ("Keyword '" ^ ident ^ "' cannot be used as identifier")
  else return ident
;;

type error = [ `Parsing_error of string ]

let pp_error ppf = function
  | `Parsing_error s -> Format.fprintf ppf "%s" s
;;

(* Integer literal parser *)
let pinteger =
  take_while1 (function
    | '0' .. '9' -> true
    | _ -> false)
  >>| int_of_string
  >>| fun n -> Ast.Int n
;;

(* Chainl1: left-associative infix operator parser *)
let chainl1 p op =
  let rec go acc = op >>= (fun f -> p >>= fun x -> go (f acc x)) <|> return acc in
  p >>= go
;;

(* Main parser using single fix point *)
let pexpr =
  fix (fun pexpr ->
    (* Atomic expressions: integers, variables and parenthesized expressions *)
    let patom =
      spaces
      *> choice
           [ char '(' *> pexpr <* char ')' <* spaces <?> "Parentheses expected"
           ; pinteger <* spaces
           ; (identifier <* spaces >>| fun name -> Ast.Var name)
           ]
    in
    (* Lambda abstractions *)
    let plambda =
      choice
        [ (* fun x y z -> body syntax sugar (multi-parameter) *)
          (string "fun" *> spaces *> many1 (identifier <* spaces)
           <* string "->"
           <* spaces
           >>= fun params ->
           pexpr
           >>| fun body ->
           (* Desugar: fun x y z -> body becomes fun x -> fun y -> fun z -> body *)
           List.fold_right (fun param acc -> Ast.Abs (param, acc)) params body)
        ; (* λ x y z -> body syntax sugar (multi-parameter) *)
          ((string "λ" <|> string "\\") *> spaces *> many1 (identifier <* spaces)
           <* string "->"
           <* spaces
           >>= fun params ->
           pexpr
           >>| fun body ->
           (* Desugar: λ x y z -> body becomes λx.λy.λz.body *)
           List.fold_right (fun param acc -> Ast.Abs (param, acc)) params body)
        ; (* λx. body or \x. body (single parameter with dot) *)
          ((string "λ" <|> string "\\") *> spaces *> identifier
           <* spaces
           <* char '.'
           <* spaces
           >>= fun param -> pexpr >>| fun body -> Ast.Abs (param, body))
        ]
    in
    (* If-then-else expression *)
    let pif =
      string "if" *> spaces *> pexpr
      >>= fun cond ->
      spaces *> string "then" *> spaces *> pexpr
      >>= fun then_branch ->
      option None (spaces *> string "else" *> spaces *> pexpr >>| Option.some)
      >>| fun else_branch -> Ast.If (cond, then_branch, else_branch)
    in
    (* Let expression *)
    let plet =
      string "let" *> spaces *> option false (string "rec" *> spaces *> return true)
      >>= fun is_rec ->
      identifier
      <* spaces
      <* char '='
      <* spaces
      >>= fun name ->
      pexpr
      >>= fun binding ->
      spaces *> string "in" *> spaces *> pexpr
      >>| fun body -> Ast.Let (is_rec, name, binding, body)
    in
    (* Application: sequence of atoms *)
    let papp =
      patom
      >>= fun first ->
      many patom >>| fun rest -> List.fold_left (fun acc e -> Ast.App (acc, e)) first rest
    in
    (* Binary operators with precedence *)
    (* Comparison operators: =, <>, <, >, <=, >= (lowest precedence) *)
    let pcmp_op =
      spaces
      *> choice
           [ string "<=" *> return (fun l r -> Ast.BinOp (Ast.Leq, l, r))
           ; string ">=" *> return (fun l r -> Ast.BinOp (Ast.Geq, l, r))
           ; string "<>" *> return (fun l r -> Ast.BinOp (Ast.Neq, l, r))
           ; char '<' *> return (fun l r -> Ast.BinOp (Ast.Lt, l, r))
           ; char '>' *> return (fun l r -> Ast.BinOp (Ast.Gt, l, r))
           ; char '=' *> return (fun l r -> Ast.BinOp (Ast.Eq, l, r))
           ]
      <* spaces
    in
    (* Additive operators: +, - *)
    let padd_op =
      spaces
      *> choice
           [ char '+' *> return (fun l r -> Ast.BinOp (Ast.Add, l, r))
           ; char '-' *> return (fun l r -> Ast.BinOp (Ast.Sub, l, r))
           ]
      <* spaces
    in
    (* Multiplicative operators: *, /, % (higher precedence) *)
    let pmul_op =
      spaces
      *> choice
           [ char '*' *> return (fun l r -> Ast.BinOp (Ast.Mul, l, r))
           ; char '/' *> return (fun l r -> Ast.BinOp (Ast.Div, l, r))
           ; char '%' *> return (fun l r -> Ast.BinOp (Ast.Mod, l, r))
           ]
      <* spaces
    in
    (* Precedence levels: mul > add > cmp > app > lambda *)
    let pmul = chainl1 papp pmul_op in
    let padd = chainl1 pmul padd_op in
    let pcmp = chainl1 padd pcmp_op in
    (* Top level: try let, if, lambda first, then comparison *)
    spaces *> choice [ plet; pif; plambda; pcmp ] <* spaces)
;;

let parse str =
  match Angstrom.parse_string ~consume:Angstrom.Consume.All pexpr str with
  | Result.Ok x -> Result.Ok x
  | Error er -> Result.Error (`Parsing_error er)
;;