ocaml: basic locale stuff
This commit is contained in:
parent
6d0f18ee67
commit
97b4f61688
132
ocaml/lib/locale.ml
Normal file
132
ocaml/lib/locale.ml
Normal file
|
@ -0,0 +1,132 @@
|
|||
(* Localization support *)
|
||||
|
||||
open Format
|
||||
open Yanais_syntax.Tys
|
||||
|
||||
module type LocaleT = sig
|
||||
val fmt_loc_span : loc_span -> string
|
||||
val fmt_error_ctx : error_ctx -> string
|
||||
val pp_error : formatter -> error -> unit
|
||||
end
|
||||
|
||||
(* TODO:
|
||||
* print locations as lines and columns
|
||||
*)
|
||||
|
||||
module Bare : LocaleT = struct
|
||||
let fmt_loc_span (s : loc_span) =
|
||||
let b = Buffer.create 64 in
|
||||
Buffer.add_string b "file=";
|
||||
Buffer.add_string b s.file;
|
||||
Buffer.add_string b "; start=";
|
||||
Buffer.add_string b (string_of_int s.start);
|
||||
Buffer.add_string b "; len=";
|
||||
Buffer.add_string b (Unsigned.UInt32.to_int s.length |> string_of_int);
|
||||
Buffer.add_string b "; ";
|
||||
Buffer.contents b
|
||||
|
||||
let fmt_error_ctx = function
|
||||
| XComment -> "comment"
|
||||
| XExpression -> "expression"
|
||||
| XLambda -> "[ty]lambda"
|
||||
| XLet -> "let-expresssion"
|
||||
| XLiteral -> "literal"
|
||||
| XParens -> "parentheses"
|
||||
| XPattern -> "pattern"
|
||||
| XRecord -> "record"
|
||||
| XSelect -> "selection"
|
||||
| XString -> "string"
|
||||
| XIdent -> "identifier"
|
||||
|
||||
let pp_error fmtp (k,s) =
|
||||
pp_open_box fmtp 0;
|
||||
pp_print_string fmtp "error; ";
|
||||
pp_print_string fmtp (fmt_loc_span s);
|
||||
(match k with
|
||||
| EExpected x ->
|
||||
pp_print_string fmtp "#expected; ctx=";
|
||||
pp_print_string fmtp (fmt_error_ctx x)
|
||||
| EUnexpectedEof x ->
|
||||
pp_print_string fmtp "#unexpected_eof; ctx=";
|
||||
pp_print_string fmtp (fmt_error_ctx x)
|
||||
| EUnexpectedToken (x, t) ->
|
||||
pp_print_string fmtp "#unexpected_eof; ctx=";
|
||||
pp_print_string fmtp (fmt_error_ctx x);
|
||||
pp_print_string fmtp "; token=";
|
||||
pp_print_string fmtp (token_to_string t)
|
||||
| EUnhandled x ->
|
||||
pp_print_string fmtp "#unhandled; ";
|
||||
pp_print_string fmtp (String.escaped x)
|
||||
| ERecordDupIdent x ->
|
||||
pp_print_string fmtp "#record_dup_ident; ident=";
|
||||
pp_print_string fmtp x
|
||||
| EPatternDupIdent x ->
|
||||
pp_print_string fmtp "#pattern_dup_ident; ident=";
|
||||
pp_print_string fmtp x
|
||||
| EUnknownIdent x ->
|
||||
pp_print_string fmtp "#unknown_ident; ident=";
|
||||
pp_print_string fmtp x
|
||||
);
|
||||
pp_close_box fmtp ()
|
||||
end
|
||||
|
||||
module En : LocaleT = struct
|
||||
let fmt_loc_span (s : loc_span) =
|
||||
let b = Buffer.create 64 in
|
||||
Buffer.add_string b "File ";
|
||||
Buffer.add_string b s.file;
|
||||
Buffer.add_string b ": Span ";
|
||||
Buffer.add_string b (string_of_int s.start);
|
||||
Buffer.add_string b "..+";
|
||||
Buffer.add_string b (Unsigned.UInt32.to_int s.length |> string_of_int);
|
||||
Buffer.add_string b ": ";
|
||||
Buffer.contents b
|
||||
|
||||
let fmt_error_ctx = function
|
||||
| XComment -> "comment"
|
||||
| XExpression -> "expression"
|
||||
| XLambda -> "(ty)lambda"
|
||||
| XLet -> "let expresssion"
|
||||
| XLiteral -> "literal"
|
||||
| XParens -> "parentheses"
|
||||
| XPattern -> "pattern"
|
||||
| XRecord -> "record"
|
||||
| XSelect -> "selection"
|
||||
| XString -> "string"
|
||||
| XIdent -> "identifier"
|
||||
|
||||
let pp_error fmtp (k,s) =
|
||||
pp_open_box fmtp 0;
|
||||
pp_print_string fmtp "Error: ";
|
||||
pp_print_string fmtp (fmt_loc_span s);
|
||||
(match k with
|
||||
| EExpected x ->
|
||||
pp_print_string fmtp "#expected; ctx=";
|
||||
pp_print_string fmtp (fmt_error_ctx x)
|
||||
| EUnexpectedEof x ->
|
||||
pp_print_string fmtp "#unexpected_eof; ctx=";
|
||||
pp_print_string fmtp (fmt_error_ctx x)
|
||||
| EUnexpectedToken (x, t) ->
|
||||
pp_print_string fmtp "#unexpected_eof; ctx=";
|
||||
pp_print_string fmtp (fmt_error_ctx x);
|
||||
pp_print_string fmtp "; token=";
|
||||
pp_print_string fmtp (token_to_string t)
|
||||
| EUnhandled x ->
|
||||
pp_print_string fmtp "#unhandled; ";
|
||||
pp_print_string fmtp (String.escaped x)
|
||||
| ERecordDupIdent x ->
|
||||
pp_print_string fmtp "#record_dup_ident; ident=";
|
||||
pp_print_string fmtp x
|
||||
| EPatternDupIdent x ->
|
||||
pp_print_string fmtp "#pattern_dup_ident; ident=";
|
||||
pp_print_string fmtp x
|
||||
| EUnknownIdent x ->
|
||||
pp_print_string fmtp "#unknown_ident; ident=";
|
||||
pp_print_string fmtp x
|
||||
);
|
||||
pp_close_box fmtp ()
|
||||
end
|
||||
|
||||
let get = function
|
||||
| "en" -> (module En: LocaleT)
|
||||
| _ -> (module Bare: LocaleT)
|
|
@ -1,52 +1,15 @@
|
|||
(* A lexer for yanais *)
|
||||
|
||||
(* it is not necessary to be able to deal with source files >= 4GiB *)
|
||||
type loc_span =
|
||||
{ file : string;
|
||||
start : int;
|
||||
length : Unsigned.UInt32.t;
|
||||
}
|
||||
|
||||
type tagged_ident = TaggedIdent of loc_span * string
|
||||
|
||||
type grp_state = Open | Close
|
||||
|
||||
type token =
|
||||
| Paren of grp_state
|
||||
| Brace of grp_state
|
||||
| LArr
|
||||
| RArr
|
||||
| LDubArr
|
||||
| RDubArr
|
||||
| Caret
|
||||
| DubColon
|
||||
| SemiColon
|
||||
| Assign
|
||||
| LLiteral of Literal.lit
|
||||
| Ident of string
|
||||
(* note: PatOut and DotIdent might carry an empty identifier *)
|
||||
| PatOut of string
|
||||
| DotIdent of string
|
||||
| TString of string
|
||||
(* keywords *)
|
||||
| Lambda
|
||||
| TyLambda
|
||||
| Mu
|
||||
| Data
|
||||
| Layout
|
||||
| Let
|
||||
| Match
|
||||
|
||||
type token_ann = token * loc_span
|
||||
open Tys
|
||||
|
||||
type lexer =
|
||||
{ buf : Sedlexing.lexbuf;
|
||||
file : string
|
||||
}
|
||||
|
||||
let l_digit = [%sedlex.regexp? '0' .. '9']
|
||||
let l_digit = [%sedlex.regexp? '0' .. '9']
|
||||
let l_number = [%sedlex.regexp? Plus l_digit]
|
||||
let l_ident = [%sedlex.regexp? xid_start, Star xid_continue]
|
||||
let l_ident = [%sedlex.regexp? xid_start, Star xid_continue]
|
||||
|
||||
let l_encap lx res offs = (res,
|
||||
{ file = lx.file
|
||||
|
@ -54,6 +17,25 @@ let l_encap lx res offs = (res,
|
|||
; length = Unsigned.UInt32.of_int (Sedlexing.lexeme_length lx.buf)
|
||||
})
|
||||
|
||||
let l_raise lx xc = Yanais_syntax_err (l_encap lx xc 0) |> raise
|
||||
let l_unhandled lx = l_raise lx (EUnhandled (Sedlexing.Utf8.lexeme lx.buf))
|
||||
|
||||
(* this should be equal to Uunf_string.normalize_utf_8, but that function is unavailable with some builds *)
|
||||
let normalize_utf_8 nf s =
|
||||
let rec add buf normalizer v = match Uunf.add normalizer v with
|
||||
| `Uchar u -> Buffer.add_utf_8_uchar buf u; add buf normalizer `Await
|
||||
| `Await | `End -> ()
|
||||
in
|
||||
let rec loop buf s i max normalizer =
|
||||
if i > max then (add buf normalizer `End; Buffer.contents buf) else
|
||||
let dec = String.get_utf_8_uchar s i in
|
||||
add buf normalizer (`Uchar (Uchar.utf_decode_uchar dec));
|
||||
loop buf s (i + Uchar.utf_decode_length dec) max normalizer
|
||||
in
|
||||
let buf = Buffer.create (String.length s * 3) in
|
||||
let normalizer = Uunf.create nf in
|
||||
loop buf s 0 (String.length s - 1) normalizer
|
||||
|
||||
let l_ow_ident lx =
|
||||
let buf = lx.buf in
|
||||
match%sedlex buf with
|
||||
|
@ -67,6 +49,7 @@ let handle_comments lx () =
|
|||
let nlvl = match%sedlex buf with
|
||||
| "(*" -> lvl + 1
|
||||
| "*)" -> lvl - 1
|
||||
| eof -> l_raise lx (EUnexpectedEof XComment)
|
||||
| _ -> lvl
|
||||
in inner nlvl
|
||||
else
|
||||
|
@ -93,22 +76,6 @@ let rec handle_string lx s =
|
|||
| Some x -> handle_string lx (x::s)
|
||||
| None -> List.rev s |> String.concat "" (* note: no normalization here *)
|
||||
|
||||
(* this should be equal to Uunf_string.normalize_utf_8, but that function is unavailable with some builds *)
|
||||
let normalize_utf_8 nf s =
|
||||
let rec add buf normalizer v = match Uunf.add normalizer v with
|
||||
| `Uchar u -> Buffer.add_utf_8_uchar buf u; add buf normalizer `Await
|
||||
| `Await | `End -> ()
|
||||
in
|
||||
let rec loop buf s i max normalizer =
|
||||
if i > max then (add buf normalizer `End; Buffer.contents buf) else
|
||||
let dec = String.get_utf_8_uchar s i in
|
||||
add buf normalizer (`Uchar (Uchar.utf_decode_uchar dec));
|
||||
loop buf s (i + Uchar.utf_decode_length dec) max normalizer
|
||||
in
|
||||
let buf = Buffer.create (String.length s * 3) in
|
||||
let normalizer = Uunf.create nf in
|
||||
loop buf s 0 (String.length s - 1) normalizer
|
||||
|
||||
let tokens lx () =
|
||||
handle_comments lx ();
|
||||
let buf = lx.buf in
|
||||
|
@ -118,7 +85,7 @@ let tokens lx () =
|
|||
Some (l_encap lx res 0)
|
||||
| l_ident ->
|
||||
let cur = Sedlexing.Utf8.lexeme buf |> normalize_utf_8 `NFC in
|
||||
let res = match Literal.of_str cur with
|
||||
let res = match Literal.of_string cur with
|
||||
| Some x -> LLiteral x
|
||||
| None -> match cur with
|
||||
| "data" -> Data
|
||||
|
@ -150,6 +117,7 @@ let tokens lx () =
|
|||
| "\u{2192}" -> Some (l_encap lx RArr 0)
|
||||
| "\u{21D0}" -> Some (l_encap lx LDubArr 0)
|
||||
| "\u{21D2}" -> Some (l_encap lx RDubArr 0)
|
||||
|
||||
| "\"" ->
|
||||
let start = Sedlexing.lexeme_start lx.buf in
|
||||
let s = handle_string lx [] in
|
||||
|
@ -160,6 +128,7 @@ let tokens lx () =
|
|||
})
|
||||
| eof -> None
|
||||
(* note: white space is already handled by handle_comments *)
|
||||
| any -> l_unhandled lx
|
||||
| _ -> failwith "Unexpected character"
|
||||
|
||||
let run file dat =
|
||||
|
|
|
@ -1,41 +1,3 @@
|
|||
(* A lexer for yanais *)
|
||||
|
||||
type loc_span = {
|
||||
file : string;
|
||||
start : int;
|
||||
length : Unsigned.UInt32.t;
|
||||
}
|
||||
|
||||
type tagged_ident = TaggedIdent of loc_span * string
|
||||
|
||||
type grp_state = Open | Close
|
||||
|
||||
type token =
|
||||
| Paren of grp_state
|
||||
| Brace of grp_state
|
||||
| LArr
|
||||
| RArr
|
||||
| LDubArr
|
||||
| RDubArr
|
||||
| Caret
|
||||
| DubColon
|
||||
| SemiColon
|
||||
| Assign
|
||||
| LLiteral of Literal.lit
|
||||
| Ident of string
|
||||
(* note: PatOut and DotIdent might carry an empty identifier *)
|
||||
| PatOut of string
|
||||
| DotIdent of string
|
||||
| TString of string
|
||||
(* keywords *)
|
||||
| Lambda
|
||||
| TyLambda
|
||||
| Mu
|
||||
| Data
|
||||
| Layout
|
||||
| Let
|
||||
| Match
|
||||
|
||||
type token_ann = token * loc_span
|
||||
|
||||
val run : string -> char Gen.t -> token_ann Gen.t
|
||||
val run : string -> char Gen.t -> Tys.token_ann Gen.t
|
||||
|
|
|
@ -35,7 +35,7 @@ let isz_to_str = function
|
|||
|
||||
let str_post loc s = String.sub s loc (String.length s - loc)
|
||||
|
||||
let of_str = function
|
||||
let of_string = function
|
||||
| "Type" -> LTy Type |> Option.some
|
||||
| "Bool" -> LTy Bool |> Option.some
|
||||
| "Char" -> LTy Char |> Option.some
|
||||
|
@ -53,7 +53,7 @@ let of_str = function
|
|||
| Some x -> Some (LNatural x)
|
||||
| None -> None)
|
||||
|
||||
let to_str = function
|
||||
let to_string = function
|
||||
| LTy Type -> "Type"
|
||||
| LTy Bool -> "Bool"
|
||||
| LTy Char -> "Char"
|
||||
|
|
|
@ -14,5 +14,5 @@ type lit =
|
|||
| LIntSize of int_size
|
||||
| LNatural of int
|
||||
|
||||
val of_str : string -> lit Option.t
|
||||
val to_str : lit -> string
|
||||
val of_string : string -> lit Option.t
|
||||
val to_string : lit -> string
|
||||
|
|
|
@ -3,17 +3,17 @@
|
|||
type stack_ref = int
|
||||
|
||||
type pattern =
|
||||
| PatIgnore of Lex.loc_span
|
||||
| PatName of Lex.tagged_ident
|
||||
| PatIgnore of Tys.loc_span
|
||||
| PatName of Tys.tagged_ident
|
||||
| PatRecord of pattern record
|
||||
|
||||
and pattern_mt = pattern * ((Lex.loc_span * expr) option)
|
||||
and pattern_mt = pattern * ((Tys.loc_span * expr) option)
|
||||
|
||||
and 'a record = (Lex.loc_span * (string option) * 'a) list
|
||||
and 'a record = (Tys.loc_span * (string option) * 'a) list
|
||||
|
||||
and lambda =
|
||||
{ pat : pattern_mt
|
||||
; body_span : Lex.loc_span
|
||||
; body_span : Tys.loc_span
|
||||
; body : expr
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@ and expr =
|
|||
| Use of stack_ref
|
||||
| Lambda of lambda
|
||||
| TyLambda of expr * lambda
|
||||
| Apply of expr * Lex.loc_span * expr
|
||||
| Apply of expr * Tys.loc_span * expr
|
||||
| RefOf of expr
|
||||
| RefTy of stack_ref * expr
|
||||
| Record of expr record
|
||||
|
@ -32,6 +32,20 @@ and expr =
|
|||
(* parser *)
|
||||
|
||||
type parser_env =
|
||||
{ lg : Lex.token_ann Gen.t
|
||||
; stack : string list
|
||||
{ lg : Tys.token_ann Gen.t
|
||||
; names : string list
|
||||
}
|
||||
|
||||
type parser_err = unit
|
||||
|
||||
type 'suc parser_result =
|
||||
| POk of 'suc
|
||||
| PNone
|
||||
| PErr of parser_err
|
||||
|
||||
let parser_init lg = { lg; names = [] }
|
||||
|
||||
let parse_expr ps =
|
||||
match Gen.next ps.lg with
|
||||
| None -> None
|
||||
| Some _ -> Some ()
|
||||
|
|
105
ocaml/libsyntax/tys.ml
Normal file
105
ocaml/libsyntax/tys.ml
Normal file
|
@ -0,0 +1,105 @@
|
|||
(* this exists to prevent cyclic dependencies between files *)
|
||||
|
||||
(* it is not necessary to be able to deal with source files >= 4GiB *)
|
||||
type loc_span =
|
||||
{ file : string;
|
||||
start : int;
|
||||
length : Unsigned.UInt32.t;
|
||||
}
|
||||
|
||||
type tagged_ident = TaggedIdent of loc_span * string
|
||||
|
||||
type grp_state = Open | Close
|
||||
|
||||
type token =
|
||||
| Paren of grp_state
|
||||
| Brace of grp_state
|
||||
| LArr
|
||||
| RArr
|
||||
| LDubArr
|
||||
| RDubArr
|
||||
| Caret
|
||||
| DubColon
|
||||
| SemiColon
|
||||
| Assign
|
||||
| LLiteral of Literal.lit
|
||||
| Ident of string
|
||||
(* note: PatOut and DotIdent might carry an empty identifier *)
|
||||
| PatOut of string
|
||||
| DotIdent of string
|
||||
| TString of string
|
||||
(* keywords *)
|
||||
| Lambda
|
||||
| TyLambda
|
||||
| Mu
|
||||
| Data
|
||||
| Layout
|
||||
| Let
|
||||
| Match
|
||||
|
||||
type token_ann = token * loc_span
|
||||
|
||||
let token_to_string = function
|
||||
| Paren Open -> "("
|
||||
| Paren Close -> ")"
|
||||
| Brace Open -> "{"
|
||||
| Brace Close -> "}"
|
||||
| LArr -> "\u{2190}"
|
||||
| RArr -> "\u{2192}"
|
||||
| LDubArr -> "\u{21D0}"
|
||||
| RDubArr -> "\u{21D2}"
|
||||
| Caret -> "^"
|
||||
| DubColon -> ":"
|
||||
| SemiColon -> ";"
|
||||
| Assign -> "="
|
||||
| LLiteral l -> Literal.to_string l
|
||||
| Ident x -> x
|
||||
| PatOut x -> "$" ^ x
|
||||
| DotIdent x -> "." ^ x
|
||||
| TString x -> String.escaped x
|
||||
| Lambda -> "\u{03BB}"
|
||||
| TyLambda -> "\u{039B}"
|
||||
| Mu -> "\u{03BC}"
|
||||
| Data -> "data"
|
||||
| Layout -> "layout"
|
||||
| Let -> "let"
|
||||
| Match -> "match"
|
||||
|
||||
type error_ctx =
|
||||
| XComment
|
||||
| XExpression
|
||||
| XLambda
|
||||
| XLet
|
||||
| XLiteral
|
||||
| XParens
|
||||
| XPattern
|
||||
| XRecord
|
||||
| XSelect
|
||||
| XString
|
||||
| XIdent
|
||||
|
||||
let str_of_ectx = function
|
||||
| XComment -> "comment"
|
||||
| XExpression -> "expression"
|
||||
| XLambda -> "(ty)lambda"
|
||||
| XLet -> "let expresssion"
|
||||
| XLiteral -> "literal"
|
||||
| XParens -> "parentheses"
|
||||
| XPattern -> "pattern"
|
||||
| XRecord -> "record"
|
||||
| XSelect -> "selection"
|
||||
| XString -> "string"
|
||||
| XIdent -> "identifier"
|
||||
|
||||
type error_kind =
|
||||
| EExpected of error_ctx
|
||||
| EUnexpectedEof of error_ctx
|
||||
| EUnexpectedToken of error_ctx * token
|
||||
| EUnhandled of string
|
||||
| ERecordDupIdent of string
|
||||
| EPatternDupIdent of string
|
||||
| EUnknownIdent of string
|
||||
|
||||
type error = error_kind * loc_span
|
||||
|
||||
exception Yanais_syntax_err of error
|
Loading…
Reference in a new issue