ocaml: a lexer
This commit is contained in:
parent
d7354de720
commit
ef3977ddca
8 changed files with 144 additions and 2 deletions
|
@ -10,9 +10,10 @@
|
||||||
(description "The compiler for Yanais")
|
(description "The compiler for Yanais")
|
||||||
(depends
|
(depends
|
||||||
(cmdliner (>= 1.1))
|
(cmdliner (>= 1.1))
|
||||||
|
(gen (>= 0.5))
|
||||||
(integers (>= 0.2))
|
(integers (>= 0.2))
|
||||||
(menhir (>= 20220210))
|
(menhir (>= 20220210))
|
||||||
(uucp (>= 15.0))
|
(sedlex (>= 3.0))
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,6 @@
|
||||||
(name yanais_core)
|
(name yanais_core)
|
||||||
(public_name yanais.yanais_core)
|
(public_name yanais.yanais_core)
|
||||||
(synopsis "The compiler for Yanais")
|
(synopsis "The compiler for Yanais")
|
||||||
(libraries integers uucp))
|
(libraries integers yanais_parse))
|
||||||
|
|
||||||
(documentation)
|
(documentation)
|
||||||
|
|
33
ocaml/lib/syntax.ml
Normal file
33
ocaml/lib/syntax.ml
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
(* unevaluated expressions with no runtime variables *)
|
||||||
|
|
||||||
|
(* de-Bruijn index onto the stack *)
|
||||||
|
type stack_ref = int
|
||||||
|
|
||||||
|
open Yanais_parse
|
||||||
|
|
||||||
|
type pattern =
|
||||||
|
| PatIgnore of Lex.loc_span
|
||||||
|
| PatName of Lex.tagged_ident
|
||||||
|
| PatRecord of pattern record
|
||||||
|
|
||||||
|
and pattern_mt = pattern * ((Lex.loc_span * expr) option)
|
||||||
|
|
||||||
|
and 'a record = (Lex.loc_span * (string option) * 'a) list
|
||||||
|
|
||||||
|
and lambda =
|
||||||
|
{ pat : pattern_mt
|
||||||
|
; body_span : Lex.loc_span
|
||||||
|
; body : expr
|
||||||
|
}
|
||||||
|
|
||||||
|
and expr =
|
||||||
|
| Infer
|
||||||
|
| ELiteral of Literal.lit
|
||||||
|
| Use of stack_ref
|
||||||
|
| Lambda of lambda
|
||||||
|
| TyLambda of expr * lambda
|
||||||
|
| Apply of expr * Lex.loc_span * expr
|
||||||
|
| RefOf of expr
|
||||||
|
| RefTy of stack_ref * expr
|
||||||
|
| Record of expr record
|
||||||
|
| TyRecord of expr record
|
9
ocaml/libparse/dune
Normal file
9
ocaml/libparse/dune
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
(library
|
||||||
|
(name yanais_parse)
|
||||||
|
(public_name yanais.yanais_parse)
|
||||||
|
(synopsis "Parser for Yanais")
|
||||||
|
(libraries integers sedlex)
|
||||||
|
(preprocess
|
||||||
|
(pps sedlex.ppx)))
|
||||||
|
|
||||||
|
(documentation)
|
86
ocaml/libparse/lex.ml
Normal file
86
ocaml/libparse/lex.ml
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
(* A lexer for yanais *)
|
||||||
|
|
||||||
|
(* it is not necessary to be able to deal with source files >= 4GiB *)
|
||||||
|
type loc_span =
|
||||||
|
{ file : string;
|
||||||
|
start : int;
|
||||||
|
length : Unsigned.UInt32.t;
|
||||||
|
}
|
||||||
|
|
||||||
|
type tagged_ident = TaggedIdent of loc_span * string
|
||||||
|
|
||||||
|
type grp_state = Open | Close
|
||||||
|
|
||||||
|
type token =
|
||||||
|
| Paren of grp_state
|
||||||
|
| Brace of grp_state
|
||||||
|
| LArr
|
||||||
|
| RArr
|
||||||
|
| LDubArr
|
||||||
|
| RDubArr
|
||||||
|
| Caret
|
||||||
|
| Dot
|
||||||
|
| DubColon
|
||||||
|
| SemiColon
|
||||||
|
| Assign
|
||||||
|
| LLiteral of Literal.lit
|
||||||
|
| Ident of string
|
||||||
|
| PatOut of string
|
||||||
|
| DotIdent of string
|
||||||
|
| Symbol
|
||||||
|
|
||||||
|
type token_ann = token * loc_span
|
||||||
|
|
||||||
|
type lexer =
|
||||||
|
{ buf : Sedlexing.lexbuf;
|
||||||
|
file : string
|
||||||
|
}
|
||||||
|
|
||||||
|
let l_digit = [%sedlex.regexp? '0' .. '9']
|
||||||
|
let l_number = [%sedlex.regexp? Plus l_digit]
|
||||||
|
let l_ident = [%sedlex.regexp? xid_start, Star xid_continue]
|
||||||
|
|
||||||
|
let l_encap lx res = (res,
|
||||||
|
{ file = lx.file
|
||||||
|
; start = Sedlexing.lexeme_start lx.buf
|
||||||
|
; length = Unsigned.UInt32.of_int (Sedlexing.lexeme_length lx.buf)
|
||||||
|
})
|
||||||
|
|
||||||
|
let l_ow_ident lx wrap =
|
||||||
|
let buf = lx.buf in
|
||||||
|
match%sedlex buf with
|
||||||
|
| ("" | l_ident | eof) -> Sedlexing.Utf8.lexeme buf |> wrap |> l_encap lx
|
||||||
|
| _ -> failwith "Unexpected character (internal error)"
|
||||||
|
|
||||||
|
let rec tokens lx =
|
||||||
|
let buf = lx.buf in
|
||||||
|
match%sedlex buf with
|
||||||
|
| l_number ->
|
||||||
|
let res = LLiteral (Sedlexing.Utf8.lexeme buf |> fun x -> Literal.LNatural (int_of_string x)) in
|
||||||
|
let res = l_encap lx res in
|
||||||
|
res :: (tokens lx)
|
||||||
|
| l_ident ->
|
||||||
|
let cur = Sedlexing.Utf8.lexeme buf in
|
||||||
|
let res = match Literal.of_str cur with
|
||||||
|
| None -> Ident cur
|
||||||
|
| Some x -> LLiteral x
|
||||||
|
in
|
||||||
|
let res = l_encap lx res in
|
||||||
|
res :: (tokens lx)
|
||||||
|
| '$' ->
|
||||||
|
let res = l_ow_ident lx (fun x -> PatOut x) in
|
||||||
|
res :: (tokens lx)
|
||||||
|
| '.' ->
|
||||||
|
let res = l_ow_ident lx (fun x -> DotIdent x) in
|
||||||
|
res :: (tokens lx)
|
||||||
|
| white_space -> tokens lx
|
||||||
|
| eof -> []
|
||||||
|
| _ -> failwith "Unexpected character"
|
||||||
|
|
||||||
|
let run file dat =
|
||||||
|
let lx =
|
||||||
|
{ buf = Sedlexing.Utf8.from_gen dat
|
||||||
|
; file = file
|
||||||
|
} in
|
||||||
|
Sedlexing.set_filename lx.buf file;
|
||||||
|
tokens lx
|
|
@ -1,5 +1,13 @@
|
||||||
(* A lexer for yanais *)
|
(* A lexer for yanais *)
|
||||||
|
|
||||||
|
type loc_span = {
|
||||||
|
file : string;
|
||||||
|
start : int;
|
||||||
|
length : Unsigned.UInt32.t;
|
||||||
|
}
|
||||||
|
|
||||||
|
type tagged_ident = TaggedIdent of loc_span * string
|
||||||
|
|
||||||
type grp_state = Open | Close
|
type grp_state = Open | Close
|
||||||
|
|
||||||
type token =
|
type token =
|
||||||
|
@ -14,7 +22,12 @@ type token =
|
||||||
| DubColon
|
| DubColon
|
||||||
| SemiColon
|
| SemiColon
|
||||||
| Assign
|
| Assign
|
||||||
|
| LLiteral of Literal.lit
|
||||||
| Ident of string
|
| Ident of string
|
||||||
| PatOut of string
|
| PatOut of string
|
||||||
| DotIdent of string
|
| DotIdent of string
|
||||||
| Symbol
|
| Symbol
|
||||||
|
|
||||||
|
type token_ann = token * loc_span
|
||||||
|
|
||||||
|
val run : string -> char Gen.t -> token_ann list
|
Loading…
Reference in a new issue