ocaml: record parsing

This commit is contained in:
Alain Zscheile 2024-02-11 14:24:29 +01:00
parent 0b206bcdbc
commit 65c82283fc
4 changed files with 110 additions and 39 deletions

View file

@ -4,7 +4,7 @@ open Format
open Yanais_syntax.Tys
module type LocaleT = sig
val fmt_loc_span : loc_span -> string
val fmt_loc_span : loc_span_full -> string
val fmt_error_ctx : error_ctx -> string
val pp_error : formatter -> error -> unit
end
@ -14,10 +14,10 @@ end
*)
module Bare : LocaleT = struct
let fmt_loc_span (s : loc_span) =
let fmt_loc_span ((file, s) : loc_span_full) =
let b = Buffer.create 64 in
Buffer.add_string b "file=";
Buffer.add_string b s.file;
Buffer.add_string b file;
Buffer.add_string b "; start=";
Buffer.add_string b (string_of_int s.start);
Buffer.add_string b "; len=";
@ -32,16 +32,17 @@ module Bare : LocaleT = struct
| XLet -> "let-expresssion"
| XLiteral -> "literal"
| XParens -> "parentheses"
| XSemiColon -> "semi-colon"
| XPattern -> "pattern"
| XRecord -> "record"
| XSelect -> "selection"
| XString -> "string"
| XIdent -> "identifier"
let pp_error fmtp (k,s) =
let pp_error fmtp (k,file,s) =
pp_open_box fmtp 0;
pp_print_string fmtp "error; ";
pp_print_string fmtp (fmt_loc_span s);
fmt_loc_span (file, s) |> pp_print_string fmtp;
(match k with
| EExpected x ->
pp_print_string fmtp "#expected; ctx=";
@ -71,10 +72,10 @@ module Bare : LocaleT = struct
end
module En : LocaleT = struct
let fmt_loc_span (s : loc_span) =
let fmt_loc_span ((file, s) : loc_span_full) =
let b = Buffer.create 64 in
Buffer.add_string b "File ";
Buffer.add_string b s.file;
Buffer.add_string b file;
Buffer.add_string b ": Span ";
Buffer.add_string b (string_of_int s.start);
Buffer.add_string b "..+";
@ -89,16 +90,17 @@ module En : LocaleT = struct
| XLet -> "let expresssion"
| XLiteral -> "literal"
| XParens -> "parentheses"
| XSemiColon -> "semi-colon"
| XPattern -> "pattern"
| XRecord -> "record"
| XSelect -> "selection"
| XString -> "string"
| XIdent -> "identifier"
let pp_error fmtp (k,s) =
let pp_error fmtp (k,file,s) =
pp_open_box fmtp 0;
pp_print_string fmtp "Error: ";
pp_print_string fmtp (fmt_loc_span s);
fmt_loc_span (file, s) |> pp_print_string fmtp;
(match k with
| EExpected x ->
pp_print_string fmtp "expected ";

View file

@ -3,8 +3,8 @@
open Tys
type lexer =
{ buf : Sedlexing.lexbuf;
file : string
{ buf : Sedlexing.lexbuf
; file : string
}
let l_digit = [%sedlex.regexp? '0' .. '9']
@ -12,12 +12,13 @@ let l_number = [%sedlex.regexp? Plus l_digit]
let l_ident = [%sedlex.regexp? xid_start, Star xid_continue]
let l_encap lx res offs = (res,
{ file = lx.file
; start = (Sedlexing.lexeme_start lx.buf) - offs
{ start = (Sedlexing.lexeme_start lx.buf) - offs
; length = Unsigned.UInt32.of_int (Sedlexing.lexeme_length lx.buf)
})
let l_raise lx xc = Yanais_syntax_err (l_encap lx xc 0) |> raise
let l_raise lx xc =
let ((), sp) = l_encap lx () 0 in
Yanais_syntax_err (xc, lx.file, sp) |> raise
let l_unhandled lx = l_raise lx (EUnhandled (Sedlexing.Utf8.lexeme lx.buf))
(* this should be equal to Uunf_string.normalize_utf_8, but that function is unavailable with some builds *)
@ -122,10 +123,7 @@ let tokens lx () =
let start = Sedlexing.lexeme_start lx.buf in
let s = handle_string lx [] in
let endp = Sedlexing.lexeme_end lx.buf in
Some (TString s, { file = lx.file
; start
; length = Unsigned.UInt32.of_int (endp - start)
})
Some (TString s, { start; length = Unsigned.UInt32.of_int (endp - start) })
| eof -> None
(* note: white space is already handled by handle_comments *)
| any -> l_unhandled lx

View file

@ -2,18 +2,20 @@
type stack_ref = int
open Tys
type pattern =
| PatIgnore of Tys.loc_span
| PatName of Tys.tagged_ident
| PatIgnore of loc_span_full
| PatName of tagged_ident
| PatRecord of pattern record
and pattern_mt = pattern * ((Tys.loc_span * expr) option)
and pattern_mt = pattern * ((loc_span_full * expr) option)
and 'a record = (Tys.loc_span * (string option) * 'a) list
and 'a record = (loc_span_full * (string option) * 'a) list
and lambda =
{ pat : pattern_mt
; body_span : Tys.loc_span
; body_span : loc_span_full
; body : expr
}
@ -23,7 +25,7 @@ and expr =
| Use of stack_ref
| Lambda of lambda
| TyLambda of expr * lambda
| Apply of expr * Tys.loc_span * expr
| Apply of expr * loc_span_full * expr
| RefOf of expr
| RefTy of stack_ref * expr
| Record of expr record
@ -32,20 +34,84 @@ and expr =
(* parser *)
type parser_env =
{ lg : Tys.token_ann Gen.t
; names : string list
{ lt : token_ann list
; file : string
; offset : int
; names : string list
}
type parser_err = unit
let zero_len = Unsigned.UInt32.zero
type 'suc parser_result =
| POk of 'suc
| PNone
| PErr of parser_err
let parser_init file lt = { lt; file; offset = 0; names = [] }
let parser_init lg = { lg; names = [] }
let parse_one ps =
match ps.lt with
| [] -> None
| (k, s) :: xs -> Some (k, s, { ps with lt = xs; offset = loc_span_end s; })
let got ps tok =
match parse_one ps with
| Some (k, s, ps) when k == tok -> Some (s, ps)
| _ -> None
let when_got ps tok thn = Option.map thn (got ps tok)
let next_in_noeof ps (ctx : error_ctx) =
match parse_one ps with
| Some (k, s, ps) -> (k, s, ps)
| None ->
let sp = { start = ps.offset; length = zero_len } in
Yanais_syntax_err (EUnexpectedEof ctx, ps.file, sp) |> raise
module FieldsReg = Set.Make(String)
let parse_record (type a) parse_inner ps = when_got ps (Brace Open) (fun (_, ps) -> (
(* note: argument accu is reversed *)
let rec inner (accu: a record) (reg: FieldsReg.t) (ps: parser_env) =
let unexp_eof ps = (
let sp = { start = ps.offset; length = zero_len } in
Yanais_syntax_err (EUnexpectedEof XRecord, ps.file, sp) |> raise
) in
(* after we maybe got a name *)
let rest ps start mbname =
let (xp, ps) = parse_inner ps in
let full_sp = { start; length = Unsigned.UInt32.of_int (ps.offset - start) } in
match ps.lt with
| [] -> unexp_eof ps
| (SemiColon, sp) :: lt -> (
(* continue *)
let reg = match mbname with
| Some i -> FieldsReg.add i reg
| None -> reg
in
inner (((ps.file, full_sp), mbname, xp)::accu) reg
{ ps with lt; offset = loc_span_end sp; }
)
| (_, sp) :: _ -> Yanais_syntax_err (EExpected XSemiColon, ps.file, sp) |> raise
in
(* recognize a name *)
match ps.lt with
| (Brace Close, s) :: lt -> (List.rev accu, { ps with lt; offset = loc_span_end s; })
| (DotIdent i, s) :: (Assign, _) :: lt -> (
if FieldsReg.find_opt i reg |> Option.is_some then (
Yanais_syntax_err (ERecordDupIdent i, ps.file, s) |> raise
) else
let ps = { ps with lt; offset = loc_span_end s; } in
rest ps s.start (Some i)
)
| [] -> unexp_eof ps
| _ -> rest ps ps.offset None
in
inner [] FieldsReg.empty ps
))
(*
let parse_expr ps =
match Gen.next ps.lg with
match parse_one ps with
| None -> None
| Some _ -> Some ()
| Some (k, s, ps) -> (
)
*)

View file

@ -2,12 +2,15 @@
(* it is not necessary to be able to deal with source files >= 4GiB *)
type loc_span =
{ file : string;
start : int;
length : Unsigned.UInt32.t;
{ start : int
; length : Unsigned.UInt32.t
}
type tagged_ident = TaggedIdent of loc_span * string
type loc_span_full = string * loc_span
let loc_span_end ls = ls.start + Unsigned.UInt32.to_int ls.length
type tagged_ident = TaggedIdent of loc_span_full * string
type grp_state = Open | Close
@ -72,6 +75,7 @@ type error_ctx =
| XLet
| XLiteral
| XParens
| XSemiColon
| XPattern
| XRecord
| XSelect
@ -85,6 +89,7 @@ let str_of_ectx = function
| XLet -> "let expresssion"
| XLiteral -> "literal"
| XParens -> "parentheses"
| XSemiColon -> "semi-colon"
| XPattern -> "pattern"
| XRecord -> "record"
| XSelect -> "selection"
@ -100,6 +105,6 @@ type error_kind =
| EPatternDupIdent of string
| EUnknownIdent of string
type error = error_kind * loc_span
type error = error_kind * string * loc_span
exception Yanais_syntax_err of error