ocaml: a lexer

This commit is contained in:
Alain Zscheile 2024-02-10 03:39:07 +01:00
parent d7354de720
commit ef3977ddca
8 changed files with 144 additions and 2 deletions

View file

@ -10,9 +10,10 @@
(description "The compiler for Yanais")
(depends
(cmdliner (>= 1.1))
(gen (>= 0.5))
(integers (>= 0.2))
(menhir (>= 20220210))
(uucp (>= 15.0))
(sedlex (>= 3.0))
)
)

View file

@ -2,6 +2,6 @@
(name yanais_core)
(public_name yanais.yanais_core)
(synopsis "The compiler for Yanais")
(libraries integers uucp))
(libraries integers yanais_parse))
(documentation)

33
ocaml/lib/syntax.ml Normal file
View file

@ -0,0 +1,33 @@
(* unevaluated expressions with no runtime variables *)
(* de-Bruijn index onto the stack *)
type stack_ref = int
open Yanais_parse
type pattern =
| PatIgnore of Lex.loc_span
| PatName of Lex.tagged_ident
| PatRecord of pattern record
and pattern_mt = pattern * ((Lex.loc_span * expr) option)
and 'a record = (Lex.loc_span * (string option) * 'a) list
and lambda =
{ pat : pattern_mt
; body_span : Lex.loc_span
; body : expr
}
and expr =
| Infer
| ELiteral of Literal.lit
| Use of stack_ref
| Lambda of lambda
| TyLambda of expr * lambda
| Apply of expr * Lex.loc_span * expr
| RefOf of expr
| RefTy of stack_ref * expr
| Record of expr record
| TyRecord of expr record

9
ocaml/libparse/dune Normal file
View file

@ -0,0 +1,9 @@
(library
(name yanais_parse)
(public_name yanais.yanais_parse)
(synopsis "Parser for Yanais")
(libraries integers sedlex)
(preprocess
(pps sedlex.ppx)))
(documentation)

86
ocaml/libparse/lex.ml Normal file
View file

@ -0,0 +1,86 @@
(* A lexer for yanais *)
(* it is not necessary to be able to deal with source files >= 4GiB *)
type loc_span =
{ file : string;
start : int;
length : Unsigned.UInt32.t;
}
type tagged_ident = TaggedIdent of loc_span * string
type grp_state = Open | Close
type token =
| Paren of grp_state
| Brace of grp_state
| LArr
| RArr
| LDubArr
| RDubArr
| Caret
| Dot
| DubColon
| SemiColon
| Assign
| LLiteral of Literal.lit
| Ident of string
| PatOut of string
| DotIdent of string
| Symbol
type token_ann = token * loc_span
type lexer =
{ buf : Sedlexing.lexbuf;
file : string
}
let l_digit = [%sedlex.regexp? '0' .. '9']
let l_number = [%sedlex.regexp? Plus l_digit]
let l_ident = [%sedlex.regexp? xid_start, Star xid_continue]
let l_encap lx res = (res,
{ file = lx.file
; start = Sedlexing.lexeme_start lx.buf
; length = Unsigned.UInt32.of_int (Sedlexing.lexeme_length lx.buf)
})
let l_ow_ident lx wrap =
let buf = lx.buf in
match%sedlex buf with
| ("" | l_ident | eof) -> Sedlexing.Utf8.lexeme buf |> wrap |> l_encap lx
| _ -> failwith "Unexpected character (internal error)"
let rec tokens lx =
let buf = lx.buf in
match%sedlex buf with
| l_number ->
let res = LLiteral (Sedlexing.Utf8.lexeme buf |> fun x -> Literal.LNatural (int_of_string x)) in
let res = l_encap lx res in
res :: (tokens lx)
| l_ident ->
let cur = Sedlexing.Utf8.lexeme buf in
let res = match Literal.of_str cur with
| None -> Ident cur
| Some x -> LLiteral x
in
let res = l_encap lx res in
res :: (tokens lx)
| '$' ->
let res = l_ow_ident lx (fun x -> PatOut x) in
res :: (tokens lx)
| '.' ->
let res = l_ow_ident lx (fun x -> DotIdent x) in
res :: (tokens lx)
| white_space -> tokens lx
| eof -> []
| _ -> failwith "Unexpected character"
let run file dat =
let lx =
{ buf = Sedlexing.Utf8.from_gen dat
; file = file
} in
Sedlexing.set_filename lx.buf file;
tokens lx

View file

@ -1,5 +1,13 @@
(* A lexer for yanais *)
type loc_span = {
file : string;
start : int;
length : Unsigned.UInt32.t;
}
type tagged_ident = TaggedIdent of loc_span * string
type grp_state = Open | Close
type token =
@ -14,7 +22,12 @@ type token =
| DubColon
| SemiColon
| Assign
| LLiteral of Literal.lit
| Ident of string
| PatOut of string
| DotIdent of string
| Symbol
type token_ann = token * loc_span
val run : string -> char Gen.t -> token_ann list