diff --git a/ocaml/dune-project b/ocaml/dune-project index 2c6159d..69dc51b 100644 --- a/ocaml/dune-project +++ b/ocaml/dune-project @@ -10,9 +10,10 @@ (description "The compiler for Yanais") (depends (cmdliner (>= 1.1)) + (gen (>= 0.5)) (integers (>= 0.2)) (menhir (>= 20220210)) - (uucp (>= 15.0)) + (sedlex (>= 3.0)) ) ) diff --git a/ocaml/lib/dune b/ocaml/lib/dune index 4661737..fea2956 100644 --- a/ocaml/lib/dune +++ b/ocaml/lib/dune @@ -2,6 +2,6 @@ (name yanais_core) (public_name yanais.yanais_core) (synopsis "The compiler for Yanais") - (libraries integers uucp)) + (libraries integers yanais_parse)) (documentation) diff --git a/ocaml/lib/syntax.ml b/ocaml/lib/syntax.ml new file mode 100644 index 0000000..cbd9d21 --- /dev/null +++ b/ocaml/lib/syntax.ml @@ -0,0 +1,33 @@ +(* unevaluated expressions with no runtime variables *) + +(* de-Bruijn index onto the stack *) +type stack_ref = int + +open Yanais_parse + +type pattern = + | PatIgnore of Lex.loc_span + | PatName of Lex.tagged_ident + | PatRecord of pattern record + +and pattern_mt = pattern * ((Lex.loc_span * expr) option) + +and 'a record = (Lex.loc_span * (string option) * 'a) list + +and lambda = + { pat : pattern_mt + ; body_span : Lex.loc_span + ; body : expr + } + +and expr = + | Infer + | ELiteral of Literal.lit + | Use of stack_ref + | Lambda of lambda + | TyLambda of expr * lambda + | Apply of expr * Lex.loc_span * expr + | RefOf of expr + | RefTy of stack_ref * expr + | Record of expr record + | TyRecord of expr record diff --git a/ocaml/libparse/dune b/ocaml/libparse/dune new file mode 100644 index 0000000..ee94d07 --- /dev/null +++ b/ocaml/libparse/dune @@ -0,0 +1,9 @@ +(library + (name yanais_parse) + (public_name yanais.yanais_parse) + (synopsis "Parser for Yanais") + (libraries integers sedlex) + (preprocess + (pps sedlex.ppx))) + +(documentation) diff --git a/ocaml/libparse/lex.ml b/ocaml/libparse/lex.ml new file mode 100644 index 0000000..7630136 --- /dev/null +++ b/ocaml/libparse/lex.ml @@ -0,0 +1,86 @@ +(* A lexer for yanais *) + +(* it is not necessary to be able to deal with source files >= 4GiB *) +type loc_span = + { file : string; + start : int; + length : Unsigned.UInt32.t; + } + +type tagged_ident = TaggedIdent of loc_span * string + +type grp_state = Open | Close + +type token = + | Paren of grp_state + | Brace of grp_state + | LArr + | RArr + | LDubArr + | RDubArr + | Caret + | Dot + | DubColon + | SemiColon + | Assign + | LLiteral of Literal.lit + | Ident of string + | PatOut of string + | DotIdent of string + | Symbol + +type token_ann = token * loc_span + +type lexer = + { buf : Sedlexing.lexbuf; + file : string + } + +let l_digit = [%sedlex.regexp? '0' .. '9'] +let l_number = [%sedlex.regexp? Plus l_digit] +let l_ident = [%sedlex.regexp? xid_start, Star xid_continue] + +let l_encap lx res = (res, + { file = lx.file + ; start = Sedlexing.lexeme_start lx.buf + ; length = Unsigned.UInt32.of_int (Sedlexing.lexeme_length lx.buf) + }) + +let l_ow_ident lx wrap = + let buf = lx.buf in + match%sedlex buf with + | ("" | l_ident | eof) -> Sedlexing.Utf8.lexeme buf |> wrap |> l_encap lx + | _ -> failwith "Unexpected character (internal error)" + +let rec tokens lx = + let buf = lx.buf in + match%sedlex buf with + | l_number -> + let res = LLiteral (Sedlexing.Utf8.lexeme buf |> fun x -> Literal.LNatural (int_of_string x)) in + let res = l_encap lx res in + res :: (tokens lx) + | l_ident -> + let cur = Sedlexing.Utf8.lexeme buf in + let res = match Literal.of_str cur with + | None -> Ident cur + | Some x -> LLiteral x + in + let res = l_encap lx res in + res :: (tokens lx) + | '$' -> + let res = l_ow_ident lx (fun x -> PatOut x) in + res :: (tokens lx) + | '.' -> + let res = l_ow_ident lx (fun x -> DotIdent x) in + res :: (tokens lx) + | white_space -> tokens lx + | eof -> [] + | _ -> failwith "Unexpected character" + +let run file dat = + let lx = + { buf = Sedlexing.Utf8.from_gen dat + ; file = file + } in + Sedlexing.set_filename lx.buf file; + tokens lx diff --git a/ocaml/lib/lex.ml b/ocaml/libparse/lex.mli similarity index 52% rename from ocaml/lib/lex.ml rename to ocaml/libparse/lex.mli index 5316797..b899a21 100644 --- a/ocaml/lib/lex.ml +++ b/ocaml/libparse/lex.mli @@ -1,5 +1,13 @@ (* A lexer for yanais *) +type loc_span = { + file : string; + start : int; + length : Unsigned.UInt32.t; + } + +type tagged_ident = TaggedIdent of loc_span * string + type grp_state = Open | Close type token = @@ -14,7 +22,12 @@ type token = | DubColon | SemiColon | Assign + | LLiteral of Literal.lit | Ident of string | PatOut of string | DotIdent of string | Symbol + +type token_ann = token * loc_span + +val run : string -> char Gen.t -> token_ann list diff --git a/ocaml/lib/lit.ml b/ocaml/libparse/literal.ml similarity index 100% rename from ocaml/lib/lit.ml rename to ocaml/libparse/literal.ml diff --git a/ocaml/lib/lit.mli b/ocaml/libparse/literal.mli similarity index 100% rename from ocaml/lib/lit.mli rename to ocaml/libparse/literal.mli