ocaml: basic locale stuff

This commit is contained in:
Alain Zscheile 2024-02-11 02:13:54 +01:00
parent 6d0f18ee67
commit 97b4f61688
7 changed files with 290 additions and 108 deletions

132
ocaml/lib/locale.ml Normal file
View file

@ -0,0 +1,132 @@
(* Localization support *)
open Format
open Yanais_syntax.Tys
module type LocaleT = sig
val fmt_loc_span : loc_span -> string
val fmt_error_ctx : error_ctx -> string
val pp_error : formatter -> error -> unit
end
(* TODO:
* print locations as lines and columns
*)
module Bare : LocaleT = struct
let fmt_loc_span (s : loc_span) =
let b = Buffer.create 64 in
Buffer.add_string b "file=";
Buffer.add_string b s.file;
Buffer.add_string b "; start=";
Buffer.add_string b (string_of_int s.start);
Buffer.add_string b "; len=";
Buffer.add_string b (Unsigned.UInt32.to_int s.length |> string_of_int);
Buffer.add_string b "; ";
Buffer.contents b
let fmt_error_ctx = function
| XComment -> "comment"
| XExpression -> "expression"
| XLambda -> "[ty]lambda"
| XLet -> "let-expresssion"
| XLiteral -> "literal"
| XParens -> "parentheses"
| XPattern -> "pattern"
| XRecord -> "record"
| XSelect -> "selection"
| XString -> "string"
| XIdent -> "identifier"
let pp_error fmtp (k,s) =
pp_open_box fmtp 0;
pp_print_string fmtp "error; ";
pp_print_string fmtp (fmt_loc_span s);
(match k with
| EExpected x ->
pp_print_string fmtp "#expected; ctx=";
pp_print_string fmtp (fmt_error_ctx x)
| EUnexpectedEof x ->
pp_print_string fmtp "#unexpected_eof; ctx=";
pp_print_string fmtp (fmt_error_ctx x)
| EUnexpectedToken (x, t) ->
pp_print_string fmtp "#unexpected_eof; ctx=";
pp_print_string fmtp (fmt_error_ctx x);
pp_print_string fmtp "; token=";
pp_print_string fmtp (token_to_string t)
| EUnhandled x ->
pp_print_string fmtp "#unhandled; ";
pp_print_string fmtp (String.escaped x)
| ERecordDupIdent x ->
pp_print_string fmtp "#record_dup_ident; ident=";
pp_print_string fmtp x
| EPatternDupIdent x ->
pp_print_string fmtp "#pattern_dup_ident; ident=";
pp_print_string fmtp x
| EUnknownIdent x ->
pp_print_string fmtp "#unknown_ident; ident=";
pp_print_string fmtp x
);
pp_close_box fmtp ()
end
module En : LocaleT = struct
let fmt_loc_span (s : loc_span) =
let b = Buffer.create 64 in
Buffer.add_string b "File ";
Buffer.add_string b s.file;
Buffer.add_string b ": Span ";
Buffer.add_string b (string_of_int s.start);
Buffer.add_string b "..+";
Buffer.add_string b (Unsigned.UInt32.to_int s.length |> string_of_int);
Buffer.add_string b ": ";
Buffer.contents b
let fmt_error_ctx = function
| XComment -> "comment"
| XExpression -> "expression"
| XLambda -> "(ty)lambda"
| XLet -> "let expresssion"
| XLiteral -> "literal"
| XParens -> "parentheses"
| XPattern -> "pattern"
| XRecord -> "record"
| XSelect -> "selection"
| XString -> "string"
| XIdent -> "identifier"
let pp_error fmtp (k,s) =
pp_open_box fmtp 0;
pp_print_string fmtp "Error: ";
pp_print_string fmtp (fmt_loc_span s);
(match k with
| EExpected x ->
pp_print_string fmtp "#expected; ctx=";
pp_print_string fmtp (fmt_error_ctx x)
| EUnexpectedEof x ->
pp_print_string fmtp "#unexpected_eof; ctx=";
pp_print_string fmtp (fmt_error_ctx x)
| EUnexpectedToken (x, t) ->
pp_print_string fmtp "#unexpected_eof; ctx=";
pp_print_string fmtp (fmt_error_ctx x);
pp_print_string fmtp "; token=";
pp_print_string fmtp (token_to_string t)
| EUnhandled x ->
pp_print_string fmtp "#unhandled; ";
pp_print_string fmtp (String.escaped x)
| ERecordDupIdent x ->
pp_print_string fmtp "#record_dup_ident; ident=";
pp_print_string fmtp x
| EPatternDupIdent x ->
pp_print_string fmtp "#pattern_dup_ident; ident=";
pp_print_string fmtp x
| EUnknownIdent x ->
pp_print_string fmtp "#unknown_ident; ident=";
pp_print_string fmtp x
);
pp_close_box fmtp ()
end
let get = function
| "en" -> (module En: LocaleT)
| _ -> (module Bare: LocaleT)

View file

@ -1,52 +1,15 @@
(* A lexer for yanais *)
(* it is not necessary to be able to deal with source files >= 4GiB *)
type loc_span =
{ file : string;
start : int;
length : Unsigned.UInt32.t;
}
type tagged_ident = TaggedIdent of loc_span * string
type grp_state = Open | Close
type token =
| Paren of grp_state
| Brace of grp_state
| LArr
| RArr
| LDubArr
| RDubArr
| Caret
| DubColon
| SemiColon
| Assign
| LLiteral of Literal.lit
| Ident of string
(* note: PatOut and DotIdent might carry an empty identifier *)
| PatOut of string
| DotIdent of string
| TString of string
(* keywords *)
| Lambda
| TyLambda
| Mu
| Data
| Layout
| Let
| Match
type token_ann = token * loc_span
open Tys
type lexer =
{ buf : Sedlexing.lexbuf;
file : string
}
let l_digit = [%sedlex.regexp? '0' .. '9']
let l_digit = [%sedlex.regexp? '0' .. '9']
let l_number = [%sedlex.regexp? Plus l_digit]
let l_ident = [%sedlex.regexp? xid_start, Star xid_continue]
let l_ident = [%sedlex.regexp? xid_start, Star xid_continue]
let l_encap lx res offs = (res,
{ file = lx.file
@ -54,6 +17,25 @@ let l_encap lx res offs = (res,
; length = Unsigned.UInt32.of_int (Sedlexing.lexeme_length lx.buf)
})
let l_raise lx xc = Yanais_syntax_err (l_encap lx xc 0) |> raise
let l_unhandled lx = l_raise lx (EUnhandled (Sedlexing.Utf8.lexeme lx.buf))
(* this should be equal to Uunf_string.normalize_utf_8, but that function is unavailable with some builds *)
let normalize_utf_8 nf s =
let rec add buf normalizer v = match Uunf.add normalizer v with
| `Uchar u -> Buffer.add_utf_8_uchar buf u; add buf normalizer `Await
| `Await | `End -> ()
in
let rec loop buf s i max normalizer =
if i > max then (add buf normalizer `End; Buffer.contents buf) else
let dec = String.get_utf_8_uchar s i in
add buf normalizer (`Uchar (Uchar.utf_decode_uchar dec));
loop buf s (i + Uchar.utf_decode_length dec) max normalizer
in
let buf = Buffer.create (String.length s * 3) in
let normalizer = Uunf.create nf in
loop buf s 0 (String.length s - 1) normalizer
let l_ow_ident lx =
let buf = lx.buf in
match%sedlex buf with
@ -67,6 +49,7 @@ let handle_comments lx () =
let nlvl = match%sedlex buf with
| "(*" -> lvl + 1
| "*)" -> lvl - 1
| eof -> l_raise lx (EUnexpectedEof XComment)
| _ -> lvl
in inner nlvl
else
@ -93,22 +76,6 @@ let rec handle_string lx s =
| Some x -> handle_string lx (x::s)
| None -> List.rev s |> String.concat "" (* note: no normalization here *)
(* this should be equal to Uunf_string.normalize_utf_8, but that function is unavailable with some builds *)
let normalize_utf_8 nf s =
let rec add buf normalizer v = match Uunf.add normalizer v with
| `Uchar u -> Buffer.add_utf_8_uchar buf u; add buf normalizer `Await
| `Await | `End -> ()
in
let rec loop buf s i max normalizer =
if i > max then (add buf normalizer `End; Buffer.contents buf) else
let dec = String.get_utf_8_uchar s i in
add buf normalizer (`Uchar (Uchar.utf_decode_uchar dec));
loop buf s (i + Uchar.utf_decode_length dec) max normalizer
in
let buf = Buffer.create (String.length s * 3) in
let normalizer = Uunf.create nf in
loop buf s 0 (String.length s - 1) normalizer
let tokens lx () =
handle_comments lx ();
let buf = lx.buf in
@ -118,7 +85,7 @@ let tokens lx () =
Some (l_encap lx res 0)
| l_ident ->
let cur = Sedlexing.Utf8.lexeme buf |> normalize_utf_8 `NFC in
let res = match Literal.of_str cur with
let res = match Literal.of_string cur with
| Some x -> LLiteral x
| None -> match cur with
| "data" -> Data
@ -150,6 +117,7 @@ let tokens lx () =
| "\u{2192}" -> Some (l_encap lx RArr 0)
| "\u{21D0}" -> Some (l_encap lx LDubArr 0)
| "\u{21D2}" -> Some (l_encap lx RDubArr 0)
| "\"" ->
let start = Sedlexing.lexeme_start lx.buf in
let s = handle_string lx [] in
@ -160,6 +128,7 @@ let tokens lx () =
})
| eof -> None
(* note: white space is already handled by handle_comments *)
| any -> l_unhandled lx
| _ -> failwith "Unexpected character"
let run file dat =

View file

@ -1,41 +1,3 @@
(* A lexer for yanais *)
type loc_span = {
file : string;
start : int;
length : Unsigned.UInt32.t;
}
type tagged_ident = TaggedIdent of loc_span * string
type grp_state = Open | Close
type token =
| Paren of grp_state
| Brace of grp_state
| LArr
| RArr
| LDubArr
| RDubArr
| Caret
| DubColon
| SemiColon
| Assign
| LLiteral of Literal.lit
| Ident of string
(* note: PatOut and DotIdent might carry an empty identifier *)
| PatOut of string
| DotIdent of string
| TString of string
(* keywords *)
| Lambda
| TyLambda
| Mu
| Data
| Layout
| Let
| Match
type token_ann = token * loc_span
val run : string -> char Gen.t -> token_ann Gen.t
val run : string -> char Gen.t -> Tys.token_ann Gen.t

View file

@ -35,7 +35,7 @@ let isz_to_str = function
let str_post loc s = String.sub s loc (String.length s - loc)
let of_str = function
let of_string = function
| "Type" -> LTy Type |> Option.some
| "Bool" -> LTy Bool |> Option.some
| "Char" -> LTy Char |> Option.some
@ -53,7 +53,7 @@ let of_str = function
| Some x -> Some (LNatural x)
| None -> None)
let to_str = function
let to_string = function
| LTy Type -> "Type"
| LTy Bool -> "Bool"
| LTy Char -> "Char"

View file

@ -14,5 +14,5 @@ type lit =
| LIntSize of int_size
| LNatural of int
val of_str : string -> lit Option.t
val to_str : lit -> string
val of_string : string -> lit Option.t
val to_string : lit -> string

View file

@ -3,17 +3,17 @@
type stack_ref = int
type pattern =
| PatIgnore of Lex.loc_span
| PatName of Lex.tagged_ident
| PatIgnore of Tys.loc_span
| PatName of Tys.tagged_ident
| PatRecord of pattern record
and pattern_mt = pattern * ((Lex.loc_span * expr) option)
and pattern_mt = pattern * ((Tys.loc_span * expr) option)
and 'a record = (Lex.loc_span * (string option) * 'a) list
and 'a record = (Tys.loc_span * (string option) * 'a) list
and lambda =
{ pat : pattern_mt
; body_span : Lex.loc_span
; body_span : Tys.loc_span
; body : expr
}
@ -23,7 +23,7 @@ and expr =
| Use of stack_ref
| Lambda of lambda
| TyLambda of expr * lambda
| Apply of expr * Lex.loc_span * expr
| Apply of expr * Tys.loc_span * expr
| RefOf of expr
| RefTy of stack_ref * expr
| Record of expr record
@ -32,6 +32,20 @@ and expr =
(* parser *)
type parser_env =
{ lg : Lex.token_ann Gen.t
; stack : string list
{ lg : Tys.token_ann Gen.t
; names : string list
}
type parser_err = unit
type 'suc parser_result =
| POk of 'suc
| PNone
| PErr of parser_err
let parser_init lg = { lg; names = [] }
let parse_expr ps =
match Gen.next ps.lg with
| None -> None
| Some _ -> Some ()

105
ocaml/libsyntax/tys.ml Normal file
View file

@ -0,0 +1,105 @@
(* this exists to prevent cyclic dependencies between files *)
(* it is not necessary to be able to deal with source files >= 4GiB *)
type loc_span =
{ file : string;
start : int;
length : Unsigned.UInt32.t;
}
type tagged_ident = TaggedIdent of loc_span * string
type grp_state = Open | Close
type token =
| Paren of grp_state
| Brace of grp_state
| LArr
| RArr
| LDubArr
| RDubArr
| Caret
| DubColon
| SemiColon
| Assign
| LLiteral of Literal.lit
| Ident of string
(* note: PatOut and DotIdent might carry an empty identifier *)
| PatOut of string
| DotIdent of string
| TString of string
(* keywords *)
| Lambda
| TyLambda
| Mu
| Data
| Layout
| Let
| Match
type token_ann = token * loc_span
let token_to_string = function
| Paren Open -> "("
| Paren Close -> ")"
| Brace Open -> "{"
| Brace Close -> "}"
| LArr -> "\u{2190}"
| RArr -> "\u{2192}"
| LDubArr -> "\u{21D0}"
| RDubArr -> "\u{21D2}"
| Caret -> "^"
| DubColon -> ":"
| SemiColon -> ";"
| Assign -> "="
| LLiteral l -> Literal.to_string l
| Ident x -> x
| PatOut x -> "$" ^ x
| DotIdent x -> "." ^ x
| TString x -> String.escaped x
| Lambda -> "\u{03BB}"
| TyLambda -> "\u{039B}"
| Mu -> "\u{03BC}"
| Data -> "data"
| Layout -> "layout"
| Let -> "let"
| Match -> "match"
type error_ctx =
| XComment
| XExpression
| XLambda
| XLet
| XLiteral
| XParens
| XPattern
| XRecord
| XSelect
| XString
| XIdent
let str_of_ectx = function
| XComment -> "comment"
| XExpression -> "expression"
| XLambda -> "(ty)lambda"
| XLet -> "let expresssion"
| XLiteral -> "literal"
| XParens -> "parentheses"
| XPattern -> "pattern"
| XRecord -> "record"
| XSelect -> "selection"
| XString -> "string"
| XIdent -> "identifier"
type error_kind =
| EExpected of error_ctx
| EUnexpectedEof of error_ctx
| EUnexpectedToken of error_ctx * token
| EUnhandled of string
| ERecordDupIdent of string
| EPatternDupIdent of string
| EUnknownIdent of string
type error = error_kind * loc_span
exception Yanais_syntax_err of error