ocaml: port loc_span to Asai.Range.t

This commit is contained in:
Alain Emilia Anna Zscheile 2024-02-25 15:08:43 +01:00
parent 5e1f6979ea
commit 2bd7b27100
11 changed files with 102 additions and 114 deletions

View file

@ -1,5 +1,4 @@
(lang dune 2.7)
(using menhir 2.0)
(name yanais)
(version 0.0.1)
@ -13,12 +12,11 @@
(description "The compiler for Yanais")
(depends
(alcotest (>= 1.7 :with-test))
(asai (>= 0.3))
(bwd (>= 2.3))
(cmdliner (>= 1.1))
(fmt (>= 0.8.7))
(gen (>= 0.5))
(integers (>= 0.2))
(menhir (>= 20220210))
(sedlex (>= 3.0))
(uunf (>= 15.0))
)

View file

@ -2,6 +2,6 @@
(name yanais_core)
(public_name yanais.yanais_core)
(synopsis "The compiler for Yanais")
(libraries integers yanais_syntax yanaijepeux))
(libraries yanais_syntax yanaijepeux))
(documentation)

View file

@ -4,7 +4,7 @@ open Format
open Yanais_syntax.Tys
module type LocaleT = sig
val fmt_loc_span : loc_span_full -> string
val pp_loc_span : formatter -> loc_span -> unit
val fmt_error_ctx : error_ctx -> string
val pp_error : formatter -> error -> unit
end
@ -14,16 +14,9 @@ end
*)
module Bare : LocaleT = struct
let fmt_loc_span ((file, s) : loc_span_full) =
let b = Buffer.create 64 in
Buffer.add_string b "file=";
Buffer.add_string b file;
Buffer.add_string b "; start=";
Buffer.add_string b (string_of_int s.start);
Buffer.add_string b "; len=";
Buffer.add_string b (Unsigned.UInt32.to_int s.length |> string_of_int);
Buffer.add_string b "; ";
Buffer.contents b
let pp_loc_span (fmtp : formatter) (s : loc_span) =
Asai.Range.dump fmtp s;
pp_print_string fmtp "; "
let fmt_error_ctx = function
| XComment -> "comment"
@ -42,10 +35,10 @@ module Bare : LocaleT = struct
| XTagGroupItem -> "tag-group-item"
| XIdent -> "identifier"
let pp_error fmtp (k,file,s) =
let pp_error fmtp (k,s) =
pp_open_box fmtp 0;
pp_print_string fmtp "error; ";
fmt_loc_span (file, s) |> pp_print_string fmtp;
pp_loc_span fmtp s;
(match k with
| EExpected x ->
pp_print_string fmtp "#expected; ctx=";
@ -75,16 +68,21 @@ module Bare : LocaleT = struct
end
module En : LocaleT = struct
let fmt_loc_span ((file, s) : loc_span_full) =
let b = Buffer.create 64 in
Buffer.add_string b "File ";
Buffer.add_string b file;
Buffer.add_string b ": Span ";
Buffer.add_string b (string_of_int s.start);
Buffer.add_string b "..+";
Buffer.add_string b (Unsigned.UInt32.to_int s.length |> string_of_int);
Buffer.add_string b ": ";
Buffer.contents b
let pp_loc_span (fmtp : formatter) (s : loc_span) =
let source_name (src : Asai.Range.source) = match src with
| `File s -> s
| `String s -> Option.value s.title ~default:"<none>"
in let tr_pos (p : Asai.Range.position) = (p.line_num - 1, p.offset - p.start_of_line)
in match Asai.Range.view s with
| `Range (startp, endp) ->
let (startl, startc) = tr_pos startp in
let (endl, endc) = tr_pos endp in
Format.fprintf fmtp "File: @,%s %@@ (line %d col %d@ - line %d col %d)"
(source_name startp.source) startl startc endl endc
| `End_of_file p ->
let (pl, pc) = tr_pos p in
Format.fprintf fmtp "File: @,%s, EOF %@@ (line %d col %d)"
(source_name p.source) pl pc
let fmt_error_ctx = function
| XComment -> "comment"
@ -103,10 +101,10 @@ module En : LocaleT = struct
| XTagGroupItem -> "tag group item"
| XIdent -> "identifier"
let pp_error fmtp (k,file,s) =
let pp_error fmtp (k,s) =
pp_open_box fmtp 0;
pp_print_string fmtp "Error: ";
fmt_loc_span (file, s) |> pp_print_string fmtp;
pp_loc_span fmtp s;
(match k with
| EExpected x ->
pp_print_string fmtp "expected ";

View file

@ -2,7 +2,7 @@
(name yanais_syntax)
(public_name yanais.yanais_syntax)
(synopsis "Syntax+Parser for Yanais")
(libraries integers sedlex uunf)
(libraries asai sedlex uunf)
(preprocess
(pps sedlex.ppx)))

View file

@ -4,15 +4,15 @@
open Tys
type stack_ref = int
type sel_ident = loc_span_full * stack_ref
type sel_ident = loc_span * stack_ref
type max_stack_ref = stack_ref
type 'a record = 'a Record.t
type pattern_mt = Pattern.t * ((loc_span_full * expr) option)
type pattern_mt = Pattern.t * ((loc_span * expr) option)
and lambda =
{ pat : pattern_mt
; body_span : loc_span_full
; body_span : loc_span
; body : expr
}
@ -23,7 +23,7 @@ and expr =
| RefOf of sel_ident
| Lambda of max_stack_ref * lambda
| TyLambda of max_stack_ref * expr * lambda (* note that the first element is the layout of the binders *)
| Apply of max_stack_ref * expr * loc_span_full * expr
| Apply of max_stack_ref * expr * loc_span * expr
| RefTy of max_stack_ref * stack_ref * expr
| Record of max_stack_ref * expr record
| TyRecord of max_stack_ref * expr record
@ -49,12 +49,12 @@ let msr_of_expr = function
let rec parse_minexpr ps =
let (k, s, ps) = next_in_noeof ps XExpression in
match k with
| Ident i -> (Use ((ps.file, s), env_lookup i s ps), ps)
| Ident i -> (Use (s, env_lookup i s ps), ps)
| RefOf -> (
let (k, s, ps) = next_in_noeof ps XRefOf in
match k with
| Ident i -> (RefOf ((ps.file, s), env_lookup i s ps), ps)
| _ -> Yanais_syntax_err (EUnexpectedToken (XRefOf, k), ps.file, s) |> raise
| Ident i -> (RefOf (s, env_lookup i s ps), ps)
| _ -> Yanais_syntax_err (EUnexpectedToken (XRefOf, k), s) |> raise
)
| LLiteral l -> (ELiteral l, ps)
| Paren Open -> (
@ -73,13 +73,13 @@ let rec parse_minexpr ps =
let (body, ps) = parse_minexpr
)
*)
| _ -> Yanais_syntax_err (EUnexpectedToken (XExpression, k), ps.file, s) |> raise
| _ -> Yanais_syntax_err (EUnexpectedToken (XExpression, k), s) |> raise
and parse_ty_annot ps = when_got DubColon (fun (_, ps) ->
let span_start = ps.offset in
let (ptyx, ps) = parse_expr ps in
let span = loc_span_of_start_end span_start ps.offset in
((ps.file, span), ptyx, ps)
(span, ptyx, ps)
) ps
and parse_letbinds ps acc = match ps.lt with

View file

@ -4,21 +4,19 @@ open Tys
type lexer =
{ buf : Sedlexing.lexbuf
; file : string
; source : Asai.Range.source
}
let l_digit = [%sedlex.regexp? '0' .. '9']
let l_number = [%sedlex.regexp? Plus l_digit]
let l_ident = [%sedlex.regexp? xid_start, Star xid_continue]
let l_encap lx res offs = (res,
{ start = (Sedlexing.lexeme_start lx.buf) - offs
; length = Unsigned.UInt32.of_int (Sedlexing.lexeme_length lx.buf)
})
let l_encap_curr lx res = (res, Asai.Range.of_lex_range ~source:lx.source (Sedlexing.lexing_bytes_positions lx.buf))
let l_raise lx xc =
let ((), sp) = l_encap lx () 0 in
Yanais_syntax_err (xc, lx.file, sp) |> raise
let l_encap lx res start = (res, Asai.Range.of_lex_range ~source:lx.source
(start, let (_, curr_pos) = Sedlexing.lexing_bytes_positions lx.buf in curr_pos))
let l_raise lx xc = Yanais_syntax_err (l_encap_curr lx xc) |> raise
let l_unhandled lx = l_raise lx (EUnhandled (Sedlexing.Utf8.lexeme lx.buf))
(* this should be equal to Uunf_string.normalize_utf_8, but that function is unavailable with some builds *)
@ -80,10 +78,11 @@ let rec handle_string lx s =
let tokens lx () =
handle_comments lx ();
let buf = lx.buf in
let (_, old_offset) = Sedlexing.lexing_bytes_positions lx.buf in
match%sedlex buf with
| l_number ->
let res = LLiteral (Sedlexing.Utf8.lexeme buf |> fun x -> Literal.LNatural (int_of_string x)) in
Some (l_encap lx res 0)
Some (l_encap_curr lx res)
| l_ident ->
let cur = Sedlexing.Utf8.lexeme buf |> normalize_utf_8 `NFC in
let res = match Literal.of_string cur with
@ -99,42 +98,45 @@ let tokens lx () =
| "\u{03BC}" -> Mu
| _ -> Ident cur
in
Some (l_encap lx res 0)
Some (l_encap_curr lx res)
| '$' ->
let res = l_ow_ident lx |> (fun x -> PatOut x) in
Some (l_encap lx res 1)
Some (l_encap lx res old_offset)
| '.' ->
let res = l_ow_ident lx |> (fun x -> DotIdent x) in
Some (l_encap lx res 1)
Some (l_encap lx res old_offset)
| '^' -> Some (l_encap lx Caret 0)
| ':' -> Some (l_encap lx DubColon 0)
| ';' -> Some (l_encap lx SemiColon 0)
| '=' -> Some (l_encap lx Assign 0)
| '&' -> Some (l_encap lx RefOf 0)
| '(' -> Some (l_encap lx (Paren Open) 0)
| ')' -> Some (l_encap lx (Paren Close) 0)
| '{' -> Some (l_encap lx (Brace Open) 0)
| '}' -> Some (l_encap lx (Brace Close) 0)
| "\u{2190}" -> Some (l_encap lx LArr 0)
| "\u{2192}" -> Some (l_encap lx RArr 0)
| "\u{21D0}" -> Some (l_encap lx LDubArr 0)
| "\u{21D2}" -> Some (l_encap lx RDubArr 0)
| '^' -> Some (l_encap_curr lx Caret)
| ':' -> Some (l_encap_curr lx DubColon)
| ';' -> Some (l_encap_curr lx SemiColon)
| '=' -> Some (l_encap_curr lx Assign)
| '&' -> Some (l_encap_curr lx RefOf)
| '(' -> Some (l_encap_curr lx (Paren Open))
| ')' -> Some (l_encap_curr lx (Paren Close))
| '{' -> Some (l_encap_curr lx (Brace Open))
| '}' -> Some (l_encap_curr lx (Brace Close))
| "\u{2190}" -> Some (l_encap_curr lx LArr)
| "\u{2192}" -> Some (l_encap_curr lx RArr)
| "\u{21D0}" -> Some (l_encap_curr lx LDubArr)
| "\u{21D2}" -> Some (l_encap_curr lx RDubArr)
| "\"" ->
let start = Sedlexing.lexeme_start lx.buf in
let s = handle_string lx [] in
let endp = Sedlexing.lexeme_end lx.buf in
Some (TString s, { start; length = Unsigned.UInt32.of_int (endp - start) })
let (_, new_offset) = Sedlexing.lexing_bytes_positions lx.buf in
Some (TString s, Asai.Range.of_lex_range ~source:lx.source (old_offset, new_offset))
| eof -> None
(* note: white space is already handled by handle_comments *)
| any -> l_unhandled lx
| _ -> failwith "Unexpected character"
let run file dat =
let run source dat =
let lx =
{ buf = Sedlexing.Utf8.from_gen dat
; file = file
; source = source
} in
let file = match source with
| `File f -> f
| `String sts -> Option.value sts.title ~default:"<none>"
in
Sedlexing.set_filename lx.buf file;
Gen.to_list (tokens lx)

View file

@ -1,3 +1,3 @@
(* A lexer for yanais *)
val run : string -> char Gen.t -> Tys.token_ann list
val run : Asai.Range.source -> char Gen.t -> Tys.token_ann list

View file

@ -1,7 +1,7 @@
open Tys
type t =
| PatIgnore of loc_span_full
| PatIgnore of loc_span
| PatName of tagged_ident
| PatRecord of t Record.t
@ -15,9 +15,9 @@ let rec exports stk p = match p with
let parse_opt ps =
let rec inner ps = match ps.lt with
| (PatOut "", s)::lt ->
Some (PatIgnore (ps.file, s), { ps with lt; offset = loc_span_end s; })
Some (PatIgnore s, { ps with lt; offset = loc_span_end s; })
| (PatOut name, s)::lt ->
Some (PatName {ident = name; file = ps.file; span = s; }, { ps with lt; offset = loc_span_end s; })
Some (PatName {ident = name; span = s; }, { ps with lt; offset = loc_span_end s; })
| (Brace Open, _)::_ ->
let rqp_pattern = require XPattern inner in
Some (Record.parse rqp_pattern ps |> (fun (x, ps) -> (PatRecord x, ps)))
@ -36,6 +36,6 @@ let parse_opt ps =
let ptexports = exports [] ret in
match chk4dups ptexports FieldsReg.empty with
| None -> Some (ret, ps)
| Some { ident; file; span; } -> Yanais_syntax_err (EPatternDupIdent ident, file, span) |> raise
| Some { ident; span; } -> Yanais_syntax_err (EPatternDupIdent ident, span) |> raise
let parse ps = require XPattern parse_opt ps

View file

@ -1,16 +1,14 @@
open Tys
type 'a t = (loc_span_full * (string option) * 'a) list
type 'a t = (loc_span * (string option) * 'a) list
module FieldsReg = Set.Make(String)
let parse_opt (type a) parse_inner = when_got (Brace Open) (fun (_, ps) ->
(* note: argument accu is reversed *)
let rec inner (accu: a t) (reg: FieldsReg.t) (ps: parser_env) =
let unexp_eof ps = (
let sp = { start = ps.offset; length = zero_len } in
Yanais_syntax_err (EUnexpectedEof XRecord, ps.file, sp) |> raise
) in
let unexp_eof ps =
(Yanais_syntax_err (EUnexpectedEof XRecord, Asai.Range.make (ps.offset, ps.offset)) |> raise) in
(* after we maybe got a name *)
let rest ps start mbname =
@ -24,10 +22,10 @@ let parse_opt (type a) parse_inner = when_got (Brace Open) (fun (_, ps) ->
| Some i -> FieldsReg.add i reg
| None -> reg
in
inner (((ps.file, full_sp), mbname, xp)::accu) reg
inner ((full_sp, mbname, xp)::accu) reg
{ ps with lt; offset = loc_span_end sp; }
)
| (_, sp) :: _ -> Yanais_syntax_err (EExpected XSemiColon, ps.file, sp) |> raise
| (_, sp) :: _ -> Yanais_syntax_err (EExpected XSemiColon, sp) |> raise
in
(* recognize a name *)
@ -35,10 +33,9 @@ let parse_opt (type a) parse_inner = when_got (Brace Open) (fun (_, ps) ->
| (Brace Close, s) :: lt -> (List.rev accu, { ps with lt; offset = loc_span_end s; })
| (DotIdent i, s) :: (Assign, _) :: lt -> (
if FieldsReg.find_opt i reg |> Option.is_some then (
Yanais_syntax_err (ERecordDupIdent i, ps.file, s) |> raise
Yanais_syntax_err (ERecordDupIdent i, s) |> raise
) else
let ps = { ps with lt; offset = loc_span_end s; } in
rest ps s.start (Some i)
rest { ps with lt; offset = loc_span_end s; } (loc_span_start s) (Some i)
)
| [] -> unexp_eof ps
| _ -> rest ps ps.offset None

View file

@ -4,7 +4,7 @@ module FieldsReg = Map.Make(String)
module FinReg = Map.Make(Int)
(* a tag group, which has an underlying type, and a bunch of key-index pairs *)
type t = loc_span_full * Literal.int_size * (int FieldsReg.t)
type t = loc_span * Literal.int_size * (int FieldsReg.t)
let rev_lookup ival ((_, _, frg): t) =
FieldsReg.filter (fun _ x -> x == ival) frg |> FieldsReg.choose
@ -18,25 +18,24 @@ let conforms (type u) (vrrcd: u FieldsReg.t) ((_, _, tfrg): t) =
in FieldsReg.fold inner vrrcd (Some FinReg.empty)
let parse_opt ps =
let inner ps ((osp, oisz): loc_span_full * Literal.int_size) =
let inner ps ((osp, oisz): loc_span * Literal.int_size) =
let rec inner2 ps frg = match ps.lt with
| (DotIdent nam, s)::(LLiteral Literal.LNatural nat, _)::(SemiColon, s3)::lt ->
let ps = { ps with lt; offset = loc_span_end s3; } in
(if FieldsReg.mem nam frg then
Yanais_syntax_err (ERecordDupIdent nam, ps.file, s) |> raise
Yanais_syntax_err (ERecordDupIdent nam, s) |> raise
else FieldsReg.add nam nat frg |> inner2 ps)
| (Brace Close, s)::lt ->
let ps = { ps with lt; offset = loc_span_end s; } in
((osp, oisz, frg), ps)
| (k, s)::_ -> Yanais_syntax_err (EUnexpectedToken (XTagGroupItem, k), ps.file, s) |> raise
| (k, s)::_ -> Yanais_syntax_err (EUnexpectedToken (XTagGroupItem, k), s) |> raise
| [] ->
let (fl, sp) = osp in
Yanais_syntax_err (EUnexpectedEof XTagGroupItem, fl, sp) |> raise
Yanais_syntax_err (EUnexpectedEof XTagGroupItem, osp) |> raise
in inner2 ps FieldsReg.empty
in match ps.lt with
| (KTagGroup, s1)::(LLiteral Literal.LIntSize isz, _)::(Brace Open, s3)::lt ->
let ps = { ps with lt; offset = loc_span_end s3; } in
Some (inner ps ((ps.file, s1), isz))
Some (inner ps (s1, isz))
| _ -> None
let parse ps = require XTagGroup parse_opt ps

View file

@ -1,21 +1,18 @@
(* this exists to prevent cyclic dependencies between files *)
(* it is not necessary to be able to deal with source files >= 4GiB *)
type loc_span =
{ start : int
; length : Unsigned.UInt32.t
}
type loc_span = Asai.Range.t
let loc_span_end rng = match Asai.Range.view rng with
| `Range (_, spe) -> spe
| `End_of_file spe -> spe
type loc_span_full = string * loc_span
let loc_span_of_start_end start endx = Asai.Range.make (start, endx)
let loc_span_end ls = ls.start + Unsigned.UInt32.to_int ls.length
let loc_span_of_start_end start endx =
{ start; length = Unsigned.UInt32.of_int (endx - start); }
let loc_span_start rng = match Asai.Range.view rng with
| `Range (sps, _) -> sps
| `End_of_file sps -> sps
type tagged_ident =
{ ident: string
; file : string
; span : loc_span
}
@ -122,20 +119,17 @@ type error_kind =
| EPatternDupIdent of string
| EUnknownIdent of string
type error = error_kind * string * loc_span
type error = error_kind * loc_span
exception Yanais_syntax_err of error
type parser_env =
{ lt : token_ann list
; file : string
; offset : int
; offset : Asai.Range.position
; names : tagged_ident list
}
let zero_len = Unsigned.UInt32.zero
let parser_init file lt = { lt; file; offset = 0; names = [] }
let parser_init (source_: Asai.Range.source) lt = { lt; offset = Asai.Range.({ source = source_; offset = 0; start_of_line = 0; line_num = 1; }); names = [] }
let parse_one ps =
match ps.lt with
@ -153,17 +147,17 @@ let next_in_noeof ps (ctx : error_ctx) =
match parse_one ps with
| Some (k, s, ps) -> (k, s, ps)
| None ->
let sp = { start = ps.offset; length = zero_len } in
Yanais_syntax_err (EUnexpectedEof ctx, ps.file, sp) |> raise
let sp = Asai.Range.make (ps.offset, ps.offset) in
Yanais_syntax_err (EUnexpectedEof ctx, sp) |> raise
(* make an optional parser required *)
(** make an optional parser required *)
let require ctx parse_inner ps = match parse_inner ps with
| Some x -> x
| None ->
let sp = match ps.lt with
| (_, sp)::_ -> sp
| [] -> { start = ps.offset; length = zero_len }
in Yanais_syntax_err (EExpected ctx, ps.file, sp) |> raise
| [] -> Asai.Range.eof ps.offset
in Yanais_syntax_err (EExpected ctx, sp) |> raise
let env_lookup_opt name ps =
let rec inner names count = match names with
@ -172,5 +166,5 @@ let env_lookup_opt name ps =
in inner ps.names 0
let env_lookup name span ps = match env_lookup_opt name ps with
| None -> Yanais_syntax_err (EUnknownIdent name, ps.file, span) |> raise
| None -> Yanais_syntax_err (EUnknownIdent name, span) |> raise
| Some y -> y