From 55ce72ed75dfe25ebdfcbda448defb1faa077997 Mon Sep 17 00:00:00 2001 From: Alain Emilia Anna Zscheile Date: Mon, 26 Feb 2024 23:45:31 +0100 Subject: [PATCH] ocaml: the parser should use Seq.t instead of a list to avoid expensive list mangling --- ocaml/lib/locale.ml | 6 ++++++ ocaml/libsyntax/expr.ml | 36 +++++++++++++++++++------------- ocaml/libsyntax/lex.ml | 13 +++++++----- ocaml/libsyntax/lex.mli | 5 ++++- ocaml/libsyntax/pattern.ml | 8 ++++---- ocaml/libsyntax/record.ml | 30 +++++++++++++++------------ ocaml/libsyntax/reporter.ml | 1 + ocaml/libsyntax/tagGroup.ml | 41 ++++++++++++++++++++++++------------- ocaml/libsyntax/tys.ml | 15 +++++++------- ocaml/yns.ml | 23 ++++++++++----------- 10 files changed, 108 insertions(+), 70 deletions(-) diff --git a/ocaml/lib/locale.ml b/ocaml/lib/locale.ml index 8aed9f9..7c3c93a 100644 --- a/ocaml/lib/locale.ml +++ b/ocaml/lib/locale.ml @@ -63,6 +63,9 @@ module Bare : LocaleT = struct | EPatternDupIdent x -> pp_print_string fmtp "#pattern_dup_ident; ident="; pp_print_string fmtp x + | ETagGroupDupNat x -> + pp_print_string fmtp "#tag_group_dup_nat; ident="; + pp_print_int fmtp x | EUnknownIdent (x, known) -> pp_print_string fmtp "#unknown_ident; ident="; pp_print_string fmtp x; @@ -135,6 +138,9 @@ module En : LocaleT = struct | EPatternDupIdent x -> pp_print_string fmtp "duplicated pattern field identifier: "; pp_print_string fmtp x + | ETagGroupDupNat x -> + pp_print_string fmtp "duplicated tag group field nat repr: "; + pp_print_int fmtp x | EUnknownIdent (x, _) -> pp_print_string fmtp "unknown identifier: "; pp_print_string fmtp x diff --git a/ocaml/libsyntax/expr.ml b/ocaml/libsyntax/expr.ml index 99538e4..4564819 100644 --- a/ocaml/libsyntax/expr.ml +++ b/ocaml/libsyntax/expr.ml @@ -80,8 +80,8 @@ and parse_ty_annot ps = when_got DubColon (fun (_, ps) -> (span, ptyx, ps) ) ps -and parse_letbinds ps acc = match ps.lt with - | (Let, s)::lt -> +and parse_letbinds ps acc = match ps.lt () with + | Seq.Cons ((Let, s), lt) -> let (pat, ps) = Pattern.parse { ps with lt; offset = loc_span_end s; } in let (pty, ps) = match parse_ty_annot ps with | Some (x, y, ps) -> (Some (x, y), ps) @@ -96,8 +96,8 @@ and parse_letbinds ps acc = match ps.lt with parse_letbinds ps ((pat, pty, (value_span, value)) :: acc) | _ -> (acc, ps) -and parse_expr ps = match ps.lt with - | (Lambda, s)::lt -> ( +and parse_expr ps = match ps.lt () with + | Seq.Cons ((Lambda, s), lt) -> ( let ps = { ps with lt; offset = loc_span_end s; } in let (pat, ps) = Pattern.parse ps in let (pty, ps) = match parse_ty_annot ps with @@ -115,7 +115,7 @@ and parse_expr ps = match ps.lt with let lam = { pat; pty; body_span = Asai.Range.make (body_span_start, ps.offset); body; } in (Lambda (msr_of_lambda lam, lam), ps) ) - | (TyLambda, s)::lt -> ( + | Seq.Cons ((TyLambda, s), lt) -> ( let ps = { ps with lt; offset = loc_span_end s; } in let (_, ps) = require_token XLambda (Paren Open) ps in let (bind_layout, ps) = parse_expr ps in @@ -136,21 +136,29 @@ and parse_expr ps = match ps.lt with let lam = { pat; pty; body_span = Asai.Range.make (body_span_start, ps.offset); body; } in (TyLambda (Int.max (msr_of_lambda lam) (msr_of_expr bind_layout), bind_layout, lam), ps) ) - | (Include, s1)::(Paren Open, _)::(TString fname, fnsp)::(Paren Close, s2)::lt -> ( + | Seq.Cons ((Include, s1), lt) -> ( + let ps = { ps with lt; offset = loc_span_end s1; } in + let (_, ps) = require_token XLambda (Paren Open) ps in + let (fname, fnsp, ps) = match ps.lt () with + | Seq.Cons ((TString fname, s), lt) -> (fname, s, { ps with lt; offset = loc_span_end s; }) + | Seq.Cons ((_, s), _) -> Yanais_syntax_err (EExpected XString, s) |> raise + | Seq.Nil -> Yanais_syntax_err (EUnexpectedEof XString, s1) |> raise + in + let (s2, ps) = require_token XLambda (Paren Close) ps in let span_end = loc_span_end s2 in let fname = match Asai.Range.source s1 with | `File forig -> (Filename.dirname forig) ^ "/" ^ fname | `String _ -> fname in let subsource = `File fname in - let sublt = try Lex.run subsource with + let subrunner sublt = + let subparser = parser_init subsource sublt in + let (xp, subparser) = parse_expr subparser in + let ps = { ps with lt; offset = span_end; } in + match subparser.lt () with + | Seq.Nil -> (xp, ps) + | Seq.Cons ((_, posc), _) -> Yanais_syntax_err (EExpected XEof, posc) |> raise + in try Lex.run subsource subrunner with Sys_error e -> Yanais_syntax_err (ESys e, fnsp) |> raise - in - let subparser = parser_init subsource sublt in - let (xp, subparser) = parse_expr subparser in - let ps = { ps with lt; offset = span_end; } in - if subparser.lt = [] - then (xp, ps) - else Yanais_syntax_err (EExpected XEof, Asai.Range.make (subparser.offset, subparser.offset)) |> raise ) | _ -> parse_minexpr ps diff --git a/ocaml/libsyntax/lex.ml b/ocaml/libsyntax/lex.ml index 03159e5..b13ce9e 100644 --- a/ocaml/libsyntax/lex.ml +++ b/ocaml/libsyntax/lex.ml @@ -129,14 +129,17 @@ let tokens lx () = | any -> l_raise lx (EUnhandled (Sedlexing.Utf8.lexeme lx.buf)) | _ -> failwith "Unexpected character" -let run source = match source with +let run source inner = + let run_inner buf fname = + Sedlexing.set_filename buf fname; + let lx = { source; buf; } in + tokens lx |> Seq.of_dispenser |> Seq.memoize |> inner + in match source with | `File fname -> let fh = open_in fname in Fun.protect ~finally:(fun () -> close_in fh) (fun () -> let buf = Sedlexing.Utf8.from_channel fh in - Sedlexing.set_filename buf fname; - Gen.to_list (tokens { source; buf; })) + run_inner buf fname) | `String sts -> let buf = Sedlexing.Utf8.from_string sts.content in - Option.value sts.title ~default:"" |> Sedlexing.set_filename buf; - Gen.to_list (tokens { source; buf; }) + run_inner buf (Option.value sts.title ~default:"") diff --git a/ocaml/libsyntax/lex.mli b/ocaml/libsyntax/lex.mli index eaa6d82..59787c8 100644 --- a/ocaml/libsyntax/lex.mli +++ b/ocaml/libsyntax/lex.mli @@ -1,3 +1,6 @@ (* A lexer for yanais *) -val run : Asai.Range.source -> Tys.token_ann list +type lexer + +(** run a lexer *) +val run : Asai.Range.source -> (Tys.token_ann Seq.t -> 'a) -> 'a diff --git a/ocaml/libsyntax/pattern.ml b/ocaml/libsyntax/pattern.ml index 40a0f5c..b82c4e0 100644 --- a/ocaml/libsyntax/pattern.ml +++ b/ocaml/libsyntax/pattern.ml @@ -14,12 +14,12 @@ let rec exports stk p = match p with | PatRecord rcd -> List.fold_left (fun stk (_, _, p) -> exports stk p) stk rcd let parse_opt ps = - let rec inner ps = match ps.lt with - | (PatOut "", s)::lt -> + let rec inner ps = match ps.lt () with + | Seq.Cons ((PatOut "", s), lt) -> Some (PatIgnore s, { ps with lt; offset = loc_span_end s; }) - | (PatOut name, s)::lt -> + | Seq.Cons ((PatOut name, s), lt) -> Some (PatName {ident = name; span = s; }, { ps with lt; offset = loc_span_end s; }) - | (Brace Open, _)::_ -> + | Seq.Cons ((Brace Open, _), _) -> let rqp_pattern = require XPattern inner in Some (Record.parse rqp_pattern ps |> (fun (x, ps) -> (PatRecord x, ps))) | _ -> None diff --git a/ocaml/libsyntax/record.ml b/ocaml/libsyntax/record.ml index 92ed0b7..2df146f 100644 --- a/ocaml/libsyntax/record.ml +++ b/ocaml/libsyntax/record.ml @@ -14,9 +14,9 @@ let parse_opt (type a) parse_inner = when_got (Brace Open) (fun (_, ps) -> let rest ps start mbname = let (xp, ps) = parse_inner ps in let full_sp = loc_span_of_start_end start ps.offset in - match ps.lt with - | [] -> unexp_eof ps - | (SemiColon, sp) :: lt -> ( + match ps.lt () with + | Seq.Nil -> unexp_eof ps + | Seq.Cons ((SemiColon, sp), lt) -> ( (* continue *) let reg = match mbname with | Some i -> FieldsReg.add i reg @@ -25,20 +25,24 @@ let parse_opt (type a) parse_inner = when_got (Brace Open) (fun (_, ps) -> inner ((full_sp, mbname, xp)::accu) reg { ps with lt; offset = loc_span_end sp; } ) - | (_, sp) :: _ -> Yanais_syntax_err (EExpected XSemiColon, sp) |> raise + | Seq.Cons ((_, sp), _) -> Yanais_syntax_err (EExpected XSemiColon, sp) |> raise in (* recognize a name *) - match ps.lt with - | (Brace Close, s) :: lt -> (List.rev accu, { ps with lt; offset = loc_span_end s; }) - | (DotIdent i, s) :: (Assign, _) :: lt -> ( - if FieldsReg.find_opt i reg |> Option.is_some then ( - Yanais_syntax_err (ERecordDupIdent i, s) |> raise - ) else - rest { ps with lt; offset = loc_span_end s; } (loc_span_start s) (Some i) + match ps.lt () with + | Seq.Nil -> unexp_eof ps + | Seq.Cons ((Brace Close, s), lt) -> (List.rev accu, { ps with lt; offset = loc_span_end s; }) + | Seq.Cons ((DotIdent i, s), lt) -> ( + match lt () with + | Seq.Cons ((Assign, s2), lt) -> ( + if FieldsReg.find_opt i reg |> Option.is_some then ( + Yanais_syntax_err (ERecordDupIdent i, s) |> raise + ) else + rest { ps with lt; offset = loc_span_end s2; } (loc_span_start s) (Some i) + ) + | _ -> rest ps ps.offset None ) - | [] -> unexp_eof ps - | _ -> rest ps ps.offset None + | Seq.Cons _ -> rest ps ps.offset None in inner [] FieldsReg.empty ps) let parse parse_inner = require XRecord (parse_opt parse_inner) diff --git a/ocaml/libsyntax/reporter.ml b/ocaml/libsyntax/reporter.ml index 7e2db41..da95c70 100644 --- a/ocaml/libsyntax/reporter.ml +++ b/ocaml/libsyntax/reporter.ml @@ -15,6 +15,7 @@ module Message = struct | Error (EUnknownIdent _ ) -> "YNS-E0003" | Error (ERecordDupIdent _ ) -> "YNS-E0004" | Error (EPatternDupIdent _ ) -> "YNS-E0005" + | Error (ETagGroupDupNat _ ) -> "YNS-E0006" end include Asai.Reporter.Make(Message) diff --git a/ocaml/libsyntax/tagGroup.ml b/ocaml/libsyntax/tagGroup.ml index 0e319bd..26dd6e8 100644 --- a/ocaml/libsyntax/tagGroup.ml +++ b/ocaml/libsyntax/tagGroup.ml @@ -19,23 +19,36 @@ let conforms (type u) (vrrcd: u FieldsReg.t) ((_, _, tfrg): t) = let parse_opt ps = let inner ps ((osp, oisz): loc_span * Literal.int_size) = - let rec inner2 ps frg = match ps.lt with - | (DotIdent nam, s)::(LLiteral Literal.LNatural nat, _)::(SemiColon, s3)::lt -> - let ps = { ps with lt; offset = loc_span_end s3; } in - (if FieldsReg.mem nam frg then - Yanais_syntax_err (ERecordDupIdent nam, s) |> raise - else FieldsReg.add nam nat frg |> inner2 ps) - | (Brace Close, s)::lt -> + let rec inner2 ps frg = match ps.lt () with + | Seq.Cons ((DotIdent nam, s), lt) -> + (match lt () with + | Seq.Cons ((LLiteral Literal.LNatural nat, s2), lt) -> + let frg = (if FieldsReg.mem nam frg then + Yanais_syntax_err (ERecordDupIdent nam, s) |> raise + else if FieldsReg.exists (fun _ -> fun mbnat -> mbnat = nat) frg then + Yanais_syntax_err (ETagGroupDupNat nat, s2) |> raise + else FieldsReg.add nam nat frg) in + (match lt () with + | Seq.Cons ((SemiColon, s3), lt) -> inner2 { ps with lt; offset = loc_span_end s3; } frg + | Seq.Cons ((_, s3), _) -> Yanais_syntax_err (EExpected XTagGroupItem, s3) |> raise + | Seq.Nil -> Yanais_syntax_err (EUnexpectedEof XTagGroupItem, s2) |> raise) + | Seq.Cons ((_, s2), _) -> Yanais_syntax_err (EExpected XTagGroupItem, s2) |> raise + | Seq.Nil -> Yanais_syntax_err (EUnexpectedEof XTagGroupItem, s) |> raise) + | Seq.Cons ((Brace Close, s), lt) -> let ps = { ps with lt; offset = loc_span_end s; } in ((osp, oisz, frg), ps) - | (_, s)::_ -> Yanais_syntax_err (EExpected XTagGroupItem, s) |> raise - | [] -> - Yanais_syntax_err (EUnexpectedEof XTagGroupItem, osp) |> raise + | Seq.Cons ((_, s), _) -> Yanais_syntax_err (EExpected XTagGroupItem, s) |> raise + | Seq.Nil -> Yanais_syntax_err (EUnexpectedEof XTagGroupItem, osp) |> raise in inner2 ps FieldsReg.empty - in match ps.lt with - | (KTagGroup, s1)::(LLiteral Literal.LIntSize isz, _)::(Brace Open, s3)::lt -> - let ps = { ps with lt; offset = loc_span_end s3; } in - Some (inner ps (s1, isz)) + in match ps.lt () with + | Seq.Cons ((KTagGroup, s1), lt) -> + (match lt () with + | Seq.Cons ((LLiteral Literal.LIntSize isz, s2), lt) -> + (match lt () with + | Seq.Cons ((Brace Open, s3), lt) -> + Some (inner { ps with lt; offset = loc_span_end s3; } (s1, isz)) + | _ -> Yanais_syntax_err (EExpected XTagGroup, s2) |> raise) + | _ -> Yanais_syntax_err (EExpected XTagGroup, s1) |> raise) | _ -> None let parse ps = require XTagGroup parse_opt ps diff --git a/ocaml/libsyntax/tys.ml b/ocaml/libsyntax/tys.ml index 8e98ff1..ce3b4a2 100644 --- a/ocaml/libsyntax/tys.ml +++ b/ocaml/libsyntax/tys.ml @@ -144,6 +144,7 @@ type error_kind = | EUnhandled of string | ERecordDupIdent of string | EPatternDupIdent of string + | ETagGroupDupNat of int | EUnknownIdent of string * (tagged_ident list) | ESys of string @@ -152,7 +153,7 @@ type error = error_kind * loc_span exception Yanais_syntax_err of error type parser_env = - { lt : token_ann list + { lt : token_ann Seq.t ; offset : Asai.Range.position ; names : tagged_ident list } @@ -160,9 +161,9 @@ type parser_env = let parser_init (source_: Asai.Range.source) lt = { lt; offset = Asai.Range.({ source = source_; offset = 0; start_of_line = 0; line_num = 1; }); names = [] } let parse_one ps = - match ps.lt with - | [] -> None - | (k, s) :: xs -> Some (k, s, { ps with lt = xs; offset = loc_span_end s; }) + match ps.lt () with + | Seq.Nil -> None + | Seq.Cons ((k, s), xs) -> Some (k, s, { ps with lt = xs; offset = loc_span_end s; }) let got tok ps = match parse_one ps with @@ -182,9 +183,9 @@ let next_in_noeof ps (ctx : error_ctx) = let require ctx parse_inner ps = match parse_inner ps with | Some x -> x | None -> - let sp = match ps.lt with - | (_, sp)::_ -> sp - | [] -> Asai.Range.eof ps.offset + let sp = match ps.lt () with + | Seq.Nil -> Asai.Range.eof ps.offset + | Seq.Cons ((_, sp), _) -> sp in Yanais_syntax_err (EExpected ctx, sp) |> raise let require_token ctx tok ps = match parse_one ps with diff --git a/ocaml/yns.ml b/ocaml/yns.ml index 5f0f162..c7b6777 100644 --- a/ocaml/yns.ml +++ b/ocaml/yns.ml @@ -6,20 +6,19 @@ let runner filename = let source = `File filename in let locale = Yanais_core.Locale.get "bare" in let module Locale = (val locale : Yanais_core.Locale.LocaleT) in - try - let lt = Yanais_syntax.Lex.run source in + let inner lt = let parser = Yanais_syntax.Tys.parser_init source lt in let (xp, parser) = Yanais_syntax.Expr.parse_expr parser in - if parser.lt = [] - then Format.printf - "got expr: %a\n" - Yanais_syntax.Expr.pp_expr - xp - else Format.printf - "unparsed expression after %a\n" - Asai.Range.dump_position - parser.offset - with + match parser.lt () with + | Seq.Nil -> Format.printf + "got expr: %a\n" + Yanais_syntax.Expr.pp_expr + xp + | Seq.Cons _ -> Format.printf + "unparsed expression after %a\n" + Asai.Range.dump_position + parser.offset + in try Yanais_syntax.Lex.run source inner with Yanais_syntax.Tys.Yanais_syntax_err e -> Format.printf "%a\n" Locale.pp_error e