ocaml: the parser should use Seq.t instead of a list to avoid expensive list mangling

This commit is contained in:
Alain Emilia Anna Zscheile 2024-02-26 23:45:31 +01:00
parent f283c1c4e7
commit 55ce72ed75
10 changed files with 108 additions and 70 deletions

View file

@ -63,6 +63,9 @@ module Bare : LocaleT = struct
| EPatternDupIdent x ->
pp_print_string fmtp "#pattern_dup_ident; ident=";
pp_print_string fmtp x
| ETagGroupDupNat x ->
pp_print_string fmtp "#tag_group_dup_nat; ident=";
pp_print_int fmtp x
| EUnknownIdent (x, known) ->
pp_print_string fmtp "#unknown_ident; ident=";
pp_print_string fmtp x;
@ -135,6 +138,9 @@ module En : LocaleT = struct
| EPatternDupIdent x ->
pp_print_string fmtp "duplicated pattern field identifier: ";
pp_print_string fmtp x
| ETagGroupDupNat x ->
pp_print_string fmtp "duplicated tag group field nat repr: ";
pp_print_int fmtp x
| EUnknownIdent (x, _) ->
pp_print_string fmtp "unknown identifier: ";
pp_print_string fmtp x

View file

@ -80,8 +80,8 @@ and parse_ty_annot ps = when_got DubColon (fun (_, ps) ->
(span, ptyx, ps)
) ps
and parse_letbinds ps acc = match ps.lt with
| (Let, s)::lt ->
and parse_letbinds ps acc = match ps.lt () with
| Seq.Cons ((Let, s), lt) ->
let (pat, ps) = Pattern.parse { ps with lt; offset = loc_span_end s; } in
let (pty, ps) = match parse_ty_annot ps with
| Some (x, y, ps) -> (Some (x, y), ps)
@ -96,8 +96,8 @@ and parse_letbinds ps acc = match ps.lt with
parse_letbinds ps ((pat, pty, (value_span, value)) :: acc)
| _ -> (acc, ps)
and parse_expr ps = match ps.lt with
| (Lambda, s)::lt -> (
and parse_expr ps = match ps.lt () with
| Seq.Cons ((Lambda, s), lt) -> (
let ps = { ps with lt; offset = loc_span_end s; } in
let (pat, ps) = Pattern.parse ps in
let (pty, ps) = match parse_ty_annot ps with
@ -115,7 +115,7 @@ and parse_expr ps = match ps.lt with
let lam = { pat; pty; body_span = Asai.Range.make (body_span_start, ps.offset); body; } in
(Lambda (msr_of_lambda lam, lam), ps)
)
| (TyLambda, s)::lt -> (
| Seq.Cons ((TyLambda, s), lt) -> (
let ps = { ps with lt; offset = loc_span_end s; } in
let (_, ps) = require_token XLambda (Paren Open) ps in
let (bind_layout, ps) = parse_expr ps in
@ -136,21 +136,29 @@ and parse_expr ps = match ps.lt with
let lam = { pat; pty; body_span = Asai.Range.make (body_span_start, ps.offset); body; } in
(TyLambda (Int.max (msr_of_lambda lam) (msr_of_expr bind_layout), bind_layout, lam), ps)
)
| (Include, s1)::(Paren Open, _)::(TString fname, fnsp)::(Paren Close, s2)::lt -> (
| Seq.Cons ((Include, s1), lt) -> (
let ps = { ps with lt; offset = loc_span_end s1; } in
let (_, ps) = require_token XLambda (Paren Open) ps in
let (fname, fnsp, ps) = match ps.lt () with
| Seq.Cons ((TString fname, s), lt) -> (fname, s, { ps with lt; offset = loc_span_end s; })
| Seq.Cons ((_, s), _) -> Yanais_syntax_err (EExpected XString, s) |> raise
| Seq.Nil -> Yanais_syntax_err (EUnexpectedEof XString, s1) |> raise
in
let (s2, ps) = require_token XLambda (Paren Close) ps in
let span_end = loc_span_end s2 in
let fname = match Asai.Range.source s1 with
| `File forig -> (Filename.dirname forig) ^ "/" ^ fname
| `String _ -> fname
in
let subsource = `File fname in
let sublt = try Lex.run subsource with
let subrunner sublt =
let subparser = parser_init subsource sublt in
let (xp, subparser) = parse_expr subparser in
let ps = { ps with lt; offset = span_end; } in
match subparser.lt () with
| Seq.Nil -> (xp, ps)
| Seq.Cons ((_, posc), _) -> Yanais_syntax_err (EExpected XEof, posc) |> raise
in try Lex.run subsource subrunner with
Sys_error e -> Yanais_syntax_err (ESys e, fnsp) |> raise
in
let subparser = parser_init subsource sublt in
let (xp, subparser) = parse_expr subparser in
let ps = { ps with lt; offset = span_end; } in
if subparser.lt = []
then (xp, ps)
else Yanais_syntax_err (EExpected XEof, Asai.Range.make (subparser.offset, subparser.offset)) |> raise
)
| _ -> parse_minexpr ps

View file

@ -129,14 +129,17 @@ let tokens lx () =
| any -> l_raise lx (EUnhandled (Sedlexing.Utf8.lexeme lx.buf))
| _ -> failwith "Unexpected character"
let run source = match source with
let run source inner =
let run_inner buf fname =
Sedlexing.set_filename buf fname;
let lx = { source; buf; } in
tokens lx |> Seq.of_dispenser |> Seq.memoize |> inner
in match source with
| `File fname ->
let fh = open_in fname in
Fun.protect ~finally:(fun () -> close_in fh) (fun () ->
let buf = Sedlexing.Utf8.from_channel fh in
Sedlexing.set_filename buf fname;
Gen.to_list (tokens { source; buf; }))
run_inner buf fname)
| `String sts ->
let buf = Sedlexing.Utf8.from_string sts.content in
Option.value sts.title ~default:"<none>" |> Sedlexing.set_filename buf;
Gen.to_list (tokens { source; buf; })
run_inner buf (Option.value sts.title ~default:"<none>")

View file

@ -1,3 +1,6 @@
(* A lexer for yanais *)
val run : Asai.Range.source -> Tys.token_ann list
type lexer
(** run a lexer *)
val run : Asai.Range.source -> (Tys.token_ann Seq.t -> 'a) -> 'a

View file

@ -14,12 +14,12 @@ let rec exports stk p = match p with
| PatRecord rcd -> List.fold_left (fun stk (_, _, p) -> exports stk p) stk rcd
let parse_opt ps =
let rec inner ps = match ps.lt with
| (PatOut "", s)::lt ->
let rec inner ps = match ps.lt () with
| Seq.Cons ((PatOut "", s), lt) ->
Some (PatIgnore s, { ps with lt; offset = loc_span_end s; })
| (PatOut name, s)::lt ->
| Seq.Cons ((PatOut name, s), lt) ->
Some (PatName {ident = name; span = s; }, { ps with lt; offset = loc_span_end s; })
| (Brace Open, _)::_ ->
| Seq.Cons ((Brace Open, _), _) ->
let rqp_pattern = require XPattern inner in
Some (Record.parse rqp_pattern ps |> (fun (x, ps) -> (PatRecord x, ps)))
| _ -> None

View file

@ -14,9 +14,9 @@ let parse_opt (type a) parse_inner = when_got (Brace Open) (fun (_, ps) ->
let rest ps start mbname =
let (xp, ps) = parse_inner ps in
let full_sp = loc_span_of_start_end start ps.offset in
match ps.lt with
| [] -> unexp_eof ps
| (SemiColon, sp) :: lt -> (
match ps.lt () with
| Seq.Nil -> unexp_eof ps
| Seq.Cons ((SemiColon, sp), lt) -> (
(* continue *)
let reg = match mbname with
| Some i -> FieldsReg.add i reg
@ -25,20 +25,24 @@ let parse_opt (type a) parse_inner = when_got (Brace Open) (fun (_, ps) ->
inner ((full_sp, mbname, xp)::accu) reg
{ ps with lt; offset = loc_span_end sp; }
)
| (_, sp) :: _ -> Yanais_syntax_err (EExpected XSemiColon, sp) |> raise
| Seq.Cons ((_, sp), _) -> Yanais_syntax_err (EExpected XSemiColon, sp) |> raise
in
(* recognize a name *)
match ps.lt with
| (Brace Close, s) :: lt -> (List.rev accu, { ps with lt; offset = loc_span_end s; })
| (DotIdent i, s) :: (Assign, _) :: lt -> (
if FieldsReg.find_opt i reg |> Option.is_some then (
Yanais_syntax_err (ERecordDupIdent i, s) |> raise
) else
rest { ps with lt; offset = loc_span_end s; } (loc_span_start s) (Some i)
match ps.lt () with
| Seq.Nil -> unexp_eof ps
| Seq.Cons ((Brace Close, s), lt) -> (List.rev accu, { ps with lt; offset = loc_span_end s; })
| Seq.Cons ((DotIdent i, s), lt) -> (
match lt () with
| Seq.Cons ((Assign, s2), lt) -> (
if FieldsReg.find_opt i reg |> Option.is_some then (
Yanais_syntax_err (ERecordDupIdent i, s) |> raise
) else
rest { ps with lt; offset = loc_span_end s2; } (loc_span_start s) (Some i)
)
| _ -> rest ps ps.offset None
)
| [] -> unexp_eof ps
| _ -> rest ps ps.offset None
| Seq.Cons _ -> rest ps ps.offset None
in inner [] FieldsReg.empty ps)
let parse parse_inner = require XRecord (parse_opt parse_inner)

View file

@ -15,6 +15,7 @@ module Message = struct
| Error (EUnknownIdent _ ) -> "YNS-E0003"
| Error (ERecordDupIdent _ ) -> "YNS-E0004"
| Error (EPatternDupIdent _ ) -> "YNS-E0005"
| Error (ETagGroupDupNat _ ) -> "YNS-E0006"
end
include Asai.Reporter.Make(Message)

View file

@ -19,23 +19,36 @@ let conforms (type u) (vrrcd: u FieldsReg.t) ((_, _, tfrg): t) =
let parse_opt ps =
let inner ps ((osp, oisz): loc_span * Literal.int_size) =
let rec inner2 ps frg = match ps.lt with
| (DotIdent nam, s)::(LLiteral Literal.LNatural nat, _)::(SemiColon, s3)::lt ->
let ps = { ps with lt; offset = loc_span_end s3; } in
(if FieldsReg.mem nam frg then
Yanais_syntax_err (ERecordDupIdent nam, s) |> raise
else FieldsReg.add nam nat frg |> inner2 ps)
| (Brace Close, s)::lt ->
let rec inner2 ps frg = match ps.lt () with
| Seq.Cons ((DotIdent nam, s), lt) ->
(match lt () with
| Seq.Cons ((LLiteral Literal.LNatural nat, s2), lt) ->
let frg = (if FieldsReg.mem nam frg then
Yanais_syntax_err (ERecordDupIdent nam, s) |> raise
else if FieldsReg.exists (fun _ -> fun mbnat -> mbnat = nat) frg then
Yanais_syntax_err (ETagGroupDupNat nat, s2) |> raise
else FieldsReg.add nam nat frg) in
(match lt () with
| Seq.Cons ((SemiColon, s3), lt) -> inner2 { ps with lt; offset = loc_span_end s3; } frg
| Seq.Cons ((_, s3), _) -> Yanais_syntax_err (EExpected XTagGroupItem, s3) |> raise
| Seq.Nil -> Yanais_syntax_err (EUnexpectedEof XTagGroupItem, s2) |> raise)
| Seq.Cons ((_, s2), _) -> Yanais_syntax_err (EExpected XTagGroupItem, s2) |> raise
| Seq.Nil -> Yanais_syntax_err (EUnexpectedEof XTagGroupItem, s) |> raise)
| Seq.Cons ((Brace Close, s), lt) ->
let ps = { ps with lt; offset = loc_span_end s; } in
((osp, oisz, frg), ps)
| (_, s)::_ -> Yanais_syntax_err (EExpected XTagGroupItem, s) |> raise
| [] ->
Yanais_syntax_err (EUnexpectedEof XTagGroupItem, osp) |> raise
| Seq.Cons ((_, s), _) -> Yanais_syntax_err (EExpected XTagGroupItem, s) |> raise
| Seq.Nil -> Yanais_syntax_err (EUnexpectedEof XTagGroupItem, osp) |> raise
in inner2 ps FieldsReg.empty
in match ps.lt with
| (KTagGroup, s1)::(LLiteral Literal.LIntSize isz, _)::(Brace Open, s3)::lt ->
let ps = { ps with lt; offset = loc_span_end s3; } in
Some (inner ps (s1, isz))
in match ps.lt () with
| Seq.Cons ((KTagGroup, s1), lt) ->
(match lt () with
| Seq.Cons ((LLiteral Literal.LIntSize isz, s2), lt) ->
(match lt () with
| Seq.Cons ((Brace Open, s3), lt) ->
Some (inner { ps with lt; offset = loc_span_end s3; } (s1, isz))
| _ -> Yanais_syntax_err (EExpected XTagGroup, s2) |> raise)
| _ -> Yanais_syntax_err (EExpected XTagGroup, s1) |> raise)
| _ -> None
let parse ps = require XTagGroup parse_opt ps

View file

@ -144,6 +144,7 @@ type error_kind =
| EUnhandled of string
| ERecordDupIdent of string
| EPatternDupIdent of string
| ETagGroupDupNat of int
| EUnknownIdent of string * (tagged_ident list)
| ESys of string
@ -152,7 +153,7 @@ type error = error_kind * loc_span
exception Yanais_syntax_err of error
type parser_env =
{ lt : token_ann list
{ lt : token_ann Seq.t
; offset : Asai.Range.position
; names : tagged_ident list
}
@ -160,9 +161,9 @@ type parser_env =
let parser_init (source_: Asai.Range.source) lt = { lt; offset = Asai.Range.({ source = source_; offset = 0; start_of_line = 0; line_num = 1; }); names = [] }
let parse_one ps =
match ps.lt with
| [] -> None
| (k, s) :: xs -> Some (k, s, { ps with lt = xs; offset = loc_span_end s; })
match ps.lt () with
| Seq.Nil -> None
| Seq.Cons ((k, s), xs) -> Some (k, s, { ps with lt = xs; offset = loc_span_end s; })
let got tok ps =
match parse_one ps with
@ -182,9 +183,9 @@ let next_in_noeof ps (ctx : error_ctx) =
let require ctx parse_inner ps = match parse_inner ps with
| Some x -> x
| None ->
let sp = match ps.lt with
| (_, sp)::_ -> sp
| [] -> Asai.Range.eof ps.offset
let sp = match ps.lt () with
| Seq.Nil -> Asai.Range.eof ps.offset
| Seq.Cons ((_, sp), _) -> sp
in Yanais_syntax_err (EExpected ctx, sp) |> raise
let require_token ctx tok ps = match parse_one ps with

View file

@ -6,20 +6,19 @@ let runner filename =
let source = `File filename in
let locale = Yanais_core.Locale.get "bare" in
let module Locale = (val locale : Yanais_core.Locale.LocaleT) in
try
let lt = Yanais_syntax.Lex.run source in
let inner lt =
let parser = Yanais_syntax.Tys.parser_init source lt in
let (xp, parser) = Yanais_syntax.Expr.parse_expr parser in
if parser.lt = []
then Format.printf
"got expr: %a\n"
Yanais_syntax.Expr.pp_expr
xp
else Format.printf
"unparsed expression after %a\n"
Asai.Range.dump_position
parser.offset
with
match parser.lt () with
| Seq.Nil -> Format.printf
"got expr: %a\n"
Yanais_syntax.Expr.pp_expr
xp
| Seq.Cons _ -> Format.printf
"unparsed expression after %a\n"
Asai.Range.dump_position
parser.offset
in try Yanais_syntax.Lex.run source inner with
Yanais_syntax.Tys.Yanais_syntax_err e ->
Format.printf "%a\n" Locale.pp_error e