ocaml: a lexer
This commit is contained in:
parent
d7354de720
commit
ef3977ddca
|
@ -10,9 +10,10 @@
|
|||
(description "The compiler for Yanais")
|
||||
(depends
|
||||
(cmdliner (>= 1.1))
|
||||
(gen (>= 0.5))
|
||||
(integers (>= 0.2))
|
||||
(menhir (>= 20220210))
|
||||
(uucp (>= 15.0))
|
||||
(sedlex (>= 3.0))
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
(name yanais_core)
|
||||
(public_name yanais.yanais_core)
|
||||
(synopsis "The compiler for Yanais")
|
||||
(libraries integers uucp))
|
||||
(libraries integers yanais_parse))
|
||||
|
||||
(documentation)
|
||||
|
|
33
ocaml/lib/syntax.ml
Normal file
33
ocaml/lib/syntax.ml
Normal file
|
@ -0,0 +1,33 @@
|
|||
(* unevaluated expressions with no runtime variables *)
|
||||
|
||||
(* de-Bruijn index onto the stack *)
|
||||
type stack_ref = int
|
||||
|
||||
open Yanais_parse
|
||||
|
||||
type pattern =
|
||||
| PatIgnore of Lex.loc_span
|
||||
| PatName of Lex.tagged_ident
|
||||
| PatRecord of pattern record
|
||||
|
||||
and pattern_mt = pattern * ((Lex.loc_span * expr) option)
|
||||
|
||||
and 'a record = (Lex.loc_span * (string option) * 'a) list
|
||||
|
||||
and lambda =
|
||||
{ pat : pattern_mt
|
||||
; body_span : Lex.loc_span
|
||||
; body : expr
|
||||
}
|
||||
|
||||
and expr =
|
||||
| Infer
|
||||
| ELiteral of Literal.lit
|
||||
| Use of stack_ref
|
||||
| Lambda of lambda
|
||||
| TyLambda of expr * lambda
|
||||
| Apply of expr * Lex.loc_span * expr
|
||||
| RefOf of expr
|
||||
| RefTy of stack_ref * expr
|
||||
| Record of expr record
|
||||
| TyRecord of expr record
|
9
ocaml/libparse/dune
Normal file
9
ocaml/libparse/dune
Normal file
|
@ -0,0 +1,9 @@
|
|||
(library
|
||||
(name yanais_parse)
|
||||
(public_name yanais.yanais_parse)
|
||||
(synopsis "Parser for Yanais")
|
||||
(libraries integers sedlex)
|
||||
(preprocess
|
||||
(pps sedlex.ppx)))
|
||||
|
||||
(documentation)
|
86
ocaml/libparse/lex.ml
Normal file
86
ocaml/libparse/lex.ml
Normal file
|
@ -0,0 +1,86 @@
|
|||
(* A lexer for yanais *)
|
||||
|
||||
(* it is not necessary to be able to deal with source files >= 4GiB *)
|
||||
type loc_span =
|
||||
{ file : string;
|
||||
start : int;
|
||||
length : Unsigned.UInt32.t;
|
||||
}
|
||||
|
||||
type tagged_ident = TaggedIdent of loc_span * string
|
||||
|
||||
type grp_state = Open | Close
|
||||
|
||||
type token =
|
||||
| Paren of grp_state
|
||||
| Brace of grp_state
|
||||
| LArr
|
||||
| RArr
|
||||
| LDubArr
|
||||
| RDubArr
|
||||
| Caret
|
||||
| Dot
|
||||
| DubColon
|
||||
| SemiColon
|
||||
| Assign
|
||||
| LLiteral of Literal.lit
|
||||
| Ident of string
|
||||
| PatOut of string
|
||||
| DotIdent of string
|
||||
| Symbol
|
||||
|
||||
type token_ann = token * loc_span
|
||||
|
||||
type lexer =
|
||||
{ buf : Sedlexing.lexbuf;
|
||||
file : string
|
||||
}
|
||||
|
||||
let l_digit = [%sedlex.regexp? '0' .. '9']
|
||||
let l_number = [%sedlex.regexp? Plus l_digit]
|
||||
let l_ident = [%sedlex.regexp? xid_start, Star xid_continue]
|
||||
|
||||
let l_encap lx res = (res,
|
||||
{ file = lx.file
|
||||
; start = Sedlexing.lexeme_start lx.buf
|
||||
; length = Unsigned.UInt32.of_int (Sedlexing.lexeme_length lx.buf)
|
||||
})
|
||||
|
||||
let l_ow_ident lx wrap =
|
||||
let buf = lx.buf in
|
||||
match%sedlex buf with
|
||||
| ("" | l_ident | eof) -> Sedlexing.Utf8.lexeme buf |> wrap |> l_encap lx
|
||||
| _ -> failwith "Unexpected character (internal error)"
|
||||
|
||||
let rec tokens lx =
|
||||
let buf = lx.buf in
|
||||
match%sedlex buf with
|
||||
| l_number ->
|
||||
let res = LLiteral (Sedlexing.Utf8.lexeme buf |> fun x -> Literal.LNatural (int_of_string x)) in
|
||||
let res = l_encap lx res in
|
||||
res :: (tokens lx)
|
||||
| l_ident ->
|
||||
let cur = Sedlexing.Utf8.lexeme buf in
|
||||
let res = match Literal.of_str cur with
|
||||
| None -> Ident cur
|
||||
| Some x -> LLiteral x
|
||||
in
|
||||
let res = l_encap lx res in
|
||||
res :: (tokens lx)
|
||||
| '$' ->
|
||||
let res = l_ow_ident lx (fun x -> PatOut x) in
|
||||
res :: (tokens lx)
|
||||
| '.' ->
|
||||
let res = l_ow_ident lx (fun x -> DotIdent x) in
|
||||
res :: (tokens lx)
|
||||
| white_space -> tokens lx
|
||||
| eof -> []
|
||||
| _ -> failwith "Unexpected character"
|
||||
|
||||
let run file dat =
|
||||
let lx =
|
||||
{ buf = Sedlexing.Utf8.from_gen dat
|
||||
; file = file
|
||||
} in
|
||||
Sedlexing.set_filename lx.buf file;
|
||||
tokens lx
|
|
@ -1,5 +1,13 @@
|
|||
(* A lexer for yanais *)
|
||||
|
||||
type loc_span = {
|
||||
file : string;
|
||||
start : int;
|
||||
length : Unsigned.UInt32.t;
|
||||
}
|
||||
|
||||
type tagged_ident = TaggedIdent of loc_span * string
|
||||
|
||||
type grp_state = Open | Close
|
||||
|
||||
type token =
|
||||
|
@ -14,7 +22,12 @@ type token =
|
|||
| DubColon
|
||||
| SemiColon
|
||||
| Assign
|
||||
| LLiteral of Literal.lit
|
||||
| Ident of string
|
||||
| PatOut of string
|
||||
| DotIdent of string
|
||||
| Symbol
|
||||
|
||||
type token_ann = token * loc_span
|
||||
|
||||
val run : string -> char Gen.t -> token_ann list
|
Loading…
Reference in a new issue