Applicative text parser
TODO: markdown & gemini coming git-svn-id: https://svn.yakumo.dev/yakumo.izuru/text_parse/trunk@1 cb476dc4-a1c2-9446-a177-162899b6b847
This commit is contained in:
commit
77bc45eb1b
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
_build
|
||||
*.txt
|
||||
*.merlin
|
23
bin/cli.ml
Normal file
23
bin/cli.ml
Normal file
@ -0,0 +1,23 @@
|
||||
module Test = struct
|
||||
type t = unit
|
||||
let blank_line () = print_string "{bl}"
|
||||
let angled_uri s () = print_string ("{>}" ^ s ^ "{<}")
|
||||
let plain_text s () = print_string s
|
||||
let heading_hashbang i s () = print_string (string_of_int i ^ s)
|
||||
let paragraph_s () = print_string "{p>}"
|
||||
let paragraph_e () = print_string "{<p}"
|
||||
let key_value a b () = print_endline (a ^"~"^ String.trim b)
|
||||
end
|
||||
|
||||
let () =
|
||||
let string_of_file filename =
|
||||
let ch = open_in filename in
|
||||
let s = really_input_string ch (in_channel_length ch) in
|
||||
close_in ch;
|
||||
s in
|
||||
let filename = Sys.argv.(1) in
|
||||
(* let module Parse = Text.MakeSimple (Html) in *)
|
||||
let module Parse = Parsers.Plain_text.Make (Test) in
|
||||
(*let subsyntaxes = [| (module Parser.Key_value.Make (Test) : Text.Parser with type t = Test.t) |] in*)
|
||||
(*let of_string text acc = Text.parse subsyntaxes { text; pos = 0; right_boundary = String.length text - 1 } acc in*)
|
||||
Parse.of_string (string_of_file filename) ()
|
4
bin/dune
Normal file
4
bin/dune
Normal file
@ -0,0 +1,4 @@
|
||||
(executable
|
||||
(name cli)
|
||||
(modules cli)
|
||||
(libraries text parsers))
|
3
converters/dune
Normal file
3
converters/dune
Normal file
@ -0,0 +1,3 @@
|
||||
(library
|
||||
(name converter)
|
||||
(public_name text_parse.converter))
|
28
converters/gemini.ml
Normal file
28
converters/gemini.ml
Normal file
@ -0,0 +1,28 @@
|
||||
type t = string
|
||||
let blank_line a = a ^ "\n"
|
||||
let plain_text s a = a ^ s
|
||||
let sentence_s a = a ^ ""
|
||||
let sentence_e a = a ^ " "
|
||||
let sentence_segment s a = a ^ " " ^ s
|
||||
let reference_name n a = a ^ "[" ^ n ^ "]"
|
||||
let bracketed_referent_s n a = a ^ "[" ^ n ^ "]: "
|
||||
let bracketed_referent_e a = a ^ "\n"
|
||||
let angled_uri u a = a ^ "\n=> " ^ u
|
||||
let bold t a = a ^ "*" ^ t ^ "*"
|
||||
let italic t a = a ^ "/" ^ t ^ "/"
|
||||
let underline t a = a ^ "_" ^ t ^ "_"
|
||||
let inline_monospace t a = a ^ "`" ^ t ^ "`"
|
||||
let heading_hashbang lvl h a = a ^ String.make lvl '#' ^ h ^ "\n"
|
||||
let paragraph_s a = a
|
||||
let paragraph_e a = a
|
||||
let preformatted s a = a ^ "<pre>" ^ s ^ "</pre>"
|
||||
let bullet_list_s a = a
|
||||
let bullet_list_e a = a
|
||||
let bullet_item_s ch a = a ^ Char.escaped ch
|
||||
let bullet_item_e a = a ^ "\n"
|
||||
let ordered_list_s a = a
|
||||
let ordered_list_e a = a
|
||||
let ordered_item_s = bullet_item_s
|
||||
let ordered_item_e = bullet_item_e
|
||||
let key_value_pair k v a = prerr_endline @@ k ^ "~" ^ v; a
|
||||
|
39
converters/html.ml
Normal file
39
converters/html.ml
Normal file
@ -0,0 +1,39 @@
|
||||
let esc x =
|
||||
let fn a c = match c with
|
||||
| '&' -> a ^ "&"
|
||||
| '<' -> a ^ "<"
|
||||
| '"' -> a ^ """
|
||||
| '\''-> a ^ "'"
|
||||
| x -> a ^ String.make 1 x
|
||||
in
|
||||
Seq.fold_left fn "" (String.to_seq x)
|
||||
|
||||
type t = string
|
||||
let blank_line a = a ^ ""
|
||||
let plain_text s a = a ^ esc s
|
||||
let sentence_s a = a ^ ""
|
||||
let sentence_e a = a ^ " "
|
||||
let sentence_segment s a = a ^ esc s ^ " "
|
||||
let reference_name n a = a ^ {|<a href="#|} ^ n ^ {|">|} ^ esc n ^ "</a>"
|
||||
let bracketed_referent_s n a = a ^ {|<a id="|} ^ n ^ {|">|} ^ esc n ^ "</a>: "
|
||||
let bracketed_referent_e a = a ^ "<br/>"
|
||||
let angled_uri u a = a ^ {|<<a href="|} ^ u ^ {|">|} ^ esc u ^ {|</a>>|}
|
||||
let bold t a = a ^ "<b>" ^ esc t ^ "</b>"
|
||||
let italic t a = a ^ "<i>" ^ esc t ^ "</i>"
|
||||
let underline t a = a ^ "<u>" ^ esc t ^ "</u>"
|
||||
let inline_monospace t a = a ^ "<code>" ^ esc t ^ "</code>"
|
||||
let heading_hashbang lvl h a =
|
||||
let lvl = string_of_int lvl in
|
||||
a ^ "<h" ^ lvl ^ " id=\"" ^ esc (String.lowercase_ascii h) ^"\">" ^ esc h ^ "</h" ^ lvl ^ ">"
|
||||
let paragraph_s a = a ^ "<p>"
|
||||
let paragraph_e a = a ^ "</p>"
|
||||
let preformatted s a = a ^ "<pre>" ^ esc s ^ "</pre>"
|
||||
let bullet_list_s a = a ^ "<ul>"
|
||||
let bullet_list_e a = a ^ "</ul>"
|
||||
let bullet_item_s _ch a = a ^ "<li>"
|
||||
let bullet_item_e a = a ^ "</li>"
|
||||
let ordered_list_s a = a ^ "<ol>"
|
||||
let ordered_list_e a = a ^ "</ol>"
|
||||
let ordered_item_s = bullet_item_s
|
||||
let ordered_item_e = bullet_item_e
|
||||
let key_value k v a = prerr_endline @@ k ^ "~" ^ v; a
|
22
cursor.ml
Normal file
22
cursor.ml
Normal file
@ -0,0 +1,22 @@
|
||||
type t = { text : string; pos : int; right_boundary : int }
|
||||
|
||||
let overran cursor = cursor.pos >= cursor.right_boundary
|
||||
let next_char cursor = { cursor with pos = cursor.pos + 1 }
|
||||
let char_at cur offset = String.get cur.text (cur.pos + offset)
|
||||
let char cur = String.get cur.text cur.pos
|
||||
let distance a b = b.pos - a.pos
|
||||
|
||||
let sub ?left ?right cur = { cur with
|
||||
pos = Option.value left ~default:cur.pos;
|
||||
right_boundary = Option.value right ~default:cur.right_boundary }
|
||||
|
||||
let unwrap num cur = sub ~left:(cur.pos+num) ~right:(cur.right_boundary-num) cur
|
||||
|
||||
let segment_string cur = String.sub cur.text cur.pos (cur.right_boundary - cur.pos)
|
||||
|
||||
(*todo: reconsider +1 result and type cursor*)
|
||||
let rec find_end e = function
|
||||
| cur when cur.pos + 1 = String.length cur.text -> Some cur.pos
|
||||
| cur when overran cur -> None
|
||||
| cur when e cur (char cur) -> Some (cur.pos + 1)
|
||||
| cur -> find_end e (next_char cur)
|
4
dune
Normal file
4
dune
Normal file
@ -0,0 +1,4 @@
|
||||
(library
|
||||
(name text_parse)
|
||||
(public_name text_parse)
|
||||
(modules parser syntax cursor))
|
16
dune-project
Normal file
16
dune-project
Normal file
@ -0,0 +1,16 @@
|
||||
(lang dune 2.4)
|
||||
(name text_parse)
|
||||
(version 1.02)
|
||||
(homepage "https://cgit.orbitalfox.eu/text-parse-ml")
|
||||
|
||||
(source (uri git://orbitalfox.eu/text-parse-ml))
|
||||
(license EUPL)
|
||||
(authors "orbifx")
|
||||
(maintainers "fox@orbitalfox.eu")
|
||||
(bug_reports "mailto:fox@orbitalfox.eu?subject=Text parse ML Issue:")
|
||||
|
||||
(generate_opam_files true)
|
||||
|
||||
(package
|
||||
(name text_parse)
|
||||
(synopsis "Applicative text parsing"))
|
37
parser.ml
Normal file
37
parser.ml
Normal file
@ -0,0 +1,37 @@
|
||||
module type S = sig
|
||||
include Syntax.S
|
||||
type t
|
||||
val parse: Cursor.t -> t -> t
|
||||
end
|
||||
|
||||
module type Sub_parsers = sig
|
||||
type t
|
||||
val subparsers: (module S with type t = t) array
|
||||
end
|
||||
|
||||
let at s e cur ch = if s cur ch then Cursor.find_end e cur else None
|
||||
|
||||
let apply_default (type a) (module P: S with type t = a) (acc: a) cursor_default cursor =
|
||||
if cursor_default = cursor then acc
|
||||
else P.parse (Cursor.sub ~right:(cursor.Cursor.pos) cursor_default) acc
|
||||
|
||||
let rec branch: type a. ?idx:int -> a -> Cursor.t -> Cursor.t -> (module S with type t = a) array -> (a * Cursor.t) =
|
||||
fun ?idx:(i=1) acc cursor_default cursor syntaxes ->
|
||||
if Cursor.overran cursor then (apply_default syntaxes.(0) acc cursor_default cursor), cursor
|
||||
else
|
||||
try let (module P: S with type t = a) = syntaxes.(i) in
|
||||
(match at P.s P.e cursor (Cursor.char cursor) with
|
||||
| Some right ->
|
||||
let acc = apply_default syntaxes.(0) acc cursor_default cursor in
|
||||
let acc = P.parse (Cursor.sub ~right cursor) acc in
|
||||
let cursor = Cursor.sub ~left:right cursor in
|
||||
branch acc cursor cursor syntaxes
|
||||
| None | exception Invalid_argument _ -> branch ~idx:(i+1) acc cursor_default cursor syntaxes)
|
||||
with Invalid_argument _ ->
|
||||
branch acc cursor_default (Cursor.next_char cursor) syntaxes
|
||||
|
||||
let rec parse subsyntaxes cursor acc =
|
||||
if Cursor.overran cursor then acc
|
||||
else
|
||||
let acc, cursor = branch acc cursor cursor subsyntaxes in
|
||||
parse subsyntaxes cursor acc
|
11
parsers/blank_line.ml
Normal file
11
parsers/blank_line.ml
Normal file
@ -0,0 +1,11 @@
|
||||
module type Fn = sig
|
||||
type t
|
||||
val blank_line: t -> t
|
||||
end
|
||||
|
||||
module Make (F : Fn) = struct
|
||||
type t = F.t
|
||||
let s _cur = function '\n' -> true | _ -> false
|
||||
let e _cur = function '\n' -> true | _ -> false
|
||||
let parse _cursor acc = F.blank_line acc
|
||||
end
|
31
parsers/bullet.ml
Normal file
31
parsers/bullet.ml
Normal file
@ -0,0 +1,31 @@
|
||||
module type Fn = sig
|
||||
type t
|
||||
val bullet_item_s: char -> 'a -> 'a
|
||||
val bullet_item_e: 'a -> 'a
|
||||
val bullet_list_s: 'a -> 'a
|
||||
val bullet_list_e: 'a -> 'a
|
||||
end
|
||||
|
||||
open Text_parse.Parser
|
||||
open Text_parse.Syntax
|
||||
open Text_parse.Cursor
|
||||
|
||||
module Item (F : Fn) = struct
|
||||
type t = F.t
|
||||
let s _cursor = function '-' | '+' | '*' -> true | _ -> false
|
||||
let e cursor _ch = newline (char_at cursor 1)
|
||||
let subsyntaxes = [||]
|
||||
let parse cur acc =
|
||||
let bullet_char = char cur in
|
||||
let left = match find_end (fun _cur c -> c <> ' ') { cur with pos = cur.pos + 1 }
|
||||
with Some x -> x-1 | None -> 0 in
|
||||
F.bullet_item_s bullet_char acc |> parse subsyntaxes (sub ~left cur) |> F.bullet_item_e
|
||||
end
|
||||
|
||||
module List (F : Fn) = struct
|
||||
type t = F.t
|
||||
let s _cursor = function '-' | '+' | '*' -> true | _ -> false
|
||||
let e cursor _ch = newline (char_at cursor 1) && not (s cursor (char_at cursor 2))
|
||||
let subsyntaxes = [| (module Item (F) : Text_parse.Parser.S with type t = F.t) |]
|
||||
let parse cur acc = F.bullet_list_s acc |> parse subsyntaxes cur |> F.bullet_list_e
|
||||
end
|
4
parsers/dune
Normal file
4
parsers/dune
Normal file
@ -0,0 +1,4 @@
|
||||
(library
|
||||
(name parsers)
|
||||
(public_name text_parse.parsers)
|
||||
(libraries text_parse))
|
32
parsers/emphasis.ml
Normal file
32
parsers/emphasis.ml
Normal file
@ -0,0 +1,32 @@
|
||||
module type Fn = sig
|
||||
val bold: string -> 'a -> 'a
|
||||
val italic: string -> 'a -> 'a
|
||||
val underline: string -> 'a -> 'a
|
||||
val inline_monospace: string -> 'a -> 'a
|
||||
end
|
||||
|
||||
open Text_parse.Cursor
|
||||
|
||||
module Bold (F : Fn) = struct
|
||||
let s _cursor = function '*' -> true | _ -> false
|
||||
let e = s
|
||||
let parse cur acc = F.bold (segment_string (unwrap 1 cur)) acc
|
||||
end
|
||||
|
||||
module Italic (F : Fn) = struct
|
||||
let s _cursor = function '/' -> true | _ -> false
|
||||
let e = s
|
||||
let parse cur acc = F.italic (segment_string (unwrap 1 cur)) acc
|
||||
end
|
||||
|
||||
module Underline (F : Fn) = struct
|
||||
let s _cursor = function '_' -> true | _ -> false
|
||||
let e = s
|
||||
let parse cur acc = F.underline (segment_string (unwrap 1 cur)) acc
|
||||
end
|
||||
|
||||
module Inline_monospace (F : Fn) = struct
|
||||
let s _cursor = function '`' -> true | _ -> false
|
||||
let e = s
|
||||
let parse cur acc = F.inline_monospace (segment_string (unwrap 1 cur)) acc
|
||||
end
|
17
parsers/heading.ml
Normal file
17
parsers/heading.ml
Normal file
@ -0,0 +1,17 @@
|
||||
module type Fn = sig
|
||||
type t
|
||||
val heading_hashbang: int -> string -> t -> t
|
||||
end
|
||||
|
||||
open Text_parse.Syntax
|
||||
open Text_parse.Cursor
|
||||
|
||||
module Hashbang (F : Fn) = struct
|
||||
type t = F.t
|
||||
let s _cur = function '#' -> true | _ -> false
|
||||
let e _cur = newline
|
||||
let parse cursor acc =
|
||||
let level = match find_end (fun _cur c -> c <> '#') cursor with
|
||||
Some x -> x - cursor.pos - 1 | None -> 0 in
|
||||
F.heading_hashbang level (segment_string { cursor with pos = cursor.pos + level + 1; right_boundary = cursor.right_boundary-1 }) acc
|
||||
end
|
19
parsers/key_value.ml
Normal file
19
parsers/key_value.ml
Normal file
@ -0,0 +1,19 @@
|
||||
module type Fn = sig
|
||||
type t
|
||||
val key_value: string -> string -> t -> t
|
||||
end
|
||||
|
||||
open Text_parse.Syntax
|
||||
open Text_parse.Cursor
|
||||
|
||||
module Make (F : Fn) = struct
|
||||
type t = F.t
|
||||
let s _cur c = letter c
|
||||
let e _cur c = newline c
|
||||
let parse cursor acc =
|
||||
let colon_pos = match find_end (fun _cur c -> c = ':') cursor with
|
||||
Some x -> x - cursor.pos - 1 | None -> 0 in (*todo:None shouldn't be allowed by scope*)
|
||||
let key = segment_string { cursor with right_boundary = cursor.pos+colon_pos } in
|
||||
let value = segment_string { cursor with pos = cursor.pos+colon_pos+1; right_boundary = cursor.right_boundary } in
|
||||
F.key_value key value acc
|
||||
end
|
27
parsers/markdown.ml
Normal file
27
parsers/markdown.ml
Normal file
@ -0,0 +1,27 @@
|
||||
module type Markdown_t = sig
|
||||
include Blank_line.Fn
|
||||
include Reference.Fn with type t := t
|
||||
include Bullet.Fn with type t := t
|
||||
include Ordered.Fn with type t := t
|
||||
include Heading.Fn with type t := t
|
||||
include Preformatted.Fn with type t := t
|
||||
include Paragraph.Fn with type t := t
|
||||
end
|
||||
|
||||
open Text_parse.Parser
|
||||
open Text_parse.Cursor
|
||||
|
||||
module Make (F : Markdown_t) = struct
|
||||
let subsyntaxes = [|
|
||||
(module Blank_line.Make (F) : Text_parse.Parser.S with type t = F.t);
|
||||
(module Heading.Hashbang (F));
|
||||
(module Reference.Referent (F));
|
||||
(module Bullet.List (F));
|
||||
(module Ordered.List (F));
|
||||
(module Preformatted.Tabbed (F));
|
||||
(*(module Paragraph.Make (F));*)
|
||||
|]
|
||||
|
||||
let of_string text acc =
|
||||
parse subsyntaxes { text; pos = 0; right_boundary = String.length text - 1 } acc
|
||||
end
|
37
parsers/ordered.ml
Normal file
37
parsers/ordered.ml
Normal file
@ -0,0 +1,37 @@
|
||||
module type Fn = sig
|
||||
type t
|
||||
val ordered_item_s: char -> 'a -> 'a
|
||||
val ordered_item_e: 'a -> 'a
|
||||
val ordered_list_s: 'a -> 'a
|
||||
val ordered_list_e: 'a -> 'a
|
||||
end
|
||||
|
||||
open Text_parse.Parser
|
||||
open Text_parse.Syntax
|
||||
open Text_parse.Cursor
|
||||
|
||||
module Item (F : Fn) = struct
|
||||
type t = F.t
|
||||
let s cur ch =
|
||||
let is_enum c = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in
|
||||
let is_delim c = c = '.' || c = ')' in
|
||||
is_enum ch && is_delim (char_at cur 1)
|
||||
let e cursor _ch = newline (char_at cursor 1)
|
||||
let subsyntaxes = [||]
|
||||
let parse cur acc =
|
||||
let bullet_char = char cur in
|
||||
let left = match find_end (fun _cur c -> c <> ' ') { cur with pos = cur.pos + 1 }
|
||||
with Some x -> x | None -> 0 in
|
||||
F.ordered_item_s bullet_char acc |> parse subsyntaxes (sub ~left cur) |> F.ordered_item_e
|
||||
end
|
||||
|
||||
module List (F: Fn) = struct
|
||||
type t = F.t
|
||||
let s cur ch =
|
||||
let is_enum c = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in
|
||||
let is_delim c = c = '.' || c = ')' in (*todo: can't have `.` if sentence ends with it and lists are in sense*)
|
||||
is_enum ch && is_delim (char_at cur 1)
|
||||
let e cursor _ch = newline (char_at cursor 1) && newline (char_at cursor 2)(* not (s {cursor with pos = cursor.pos+2} (char_at cursor 2)) *)
|
||||
let subsyntaxes = [| (module Item (F) : Text_parse.Parser.S with type t = F.t) |]
|
||||
let parse cur acc = F.ordered_list_s acc |> parse subsyntaxes cur |> F.ordered_list_e
|
||||
end
|
19
parsers/paragraph.ml
Normal file
19
parsers/paragraph.ml
Normal file
@ -0,0 +1,19 @@
|
||||
module type Fn = sig
|
||||
type t
|
||||
val paragraph_s: t -> t
|
||||
val paragraph_e: t -> t
|
||||
end
|
||||
|
||||
open Text_parse.Parser
|
||||
open Text_parse.Syntax
|
||||
open Text_parse.Cursor
|
||||
|
||||
module Make (F : Fn)(S : Text_parse.Parser.Sub_parsers with type t = F.t) = struct
|
||||
type t = F.t
|
||||
let s _cur ch = printable ch
|
||||
let e cur = function
|
||||
| '\n' -> char_at cur (-1) = '\n'
|
||||
| _ when cur.pos + 1 = cur.right_boundary -> true
|
||||
| _ -> false
|
||||
let parse cur acc = F.paragraph_s acc |> parse S.subparsers cur |> F.paragraph_e
|
||||
end
|
36
parsers/plain_text.ml
Normal file
36
parsers/plain_text.ml
Normal file
@ -0,0 +1,36 @@
|
||||
module type Fn = sig
|
||||
type t
|
||||
val plain_text: string -> t -> t
|
||||
end
|
||||
|
||||
open Text_parse.Parser
|
||||
open Text_parse.Cursor
|
||||
|
||||
module Plain_text (F : Fn) = struct
|
||||
type t = F.t
|
||||
let s _cursor _ch = true
|
||||
let e cursor = function
|
||||
| '\n' -> char_at cursor (-1) = '\n'
|
||||
| _ when cursor.pos + 1 = cursor.right_boundary -> true
|
||||
| _ -> false
|
||||
let parse cur acc = F.plain_text (segment_string cur) acc
|
||||
end
|
||||
|
||||
module type Plain_text_t = sig
|
||||
include Blank_line.Fn
|
||||
include Heading.Fn with type t := t
|
||||
include Uri.Fn with type t := t
|
||||
include Paragraph.Fn with type t := t
|
||||
include Fn with type t := t
|
||||
end
|
||||
|
||||
|
||||
module Make (F : Plain_text_t) = struct
|
||||
module P = struct
|
||||
type t = F.t
|
||||
let subparsers = [| (module Plain_text (F) : Text_parse.Parser.S with type t = F.t); (module Uri.Angled (F)) |]
|
||||
end
|
||||
|
||||
let subparsers = [| (module Paragraph.Make (F)(P) : Text_parse.Parser.S with type t = F.t); (module Blank_line.Make (F)); (module Heading.Hashbang (F)); (module Paragraph.Make (F)(P)); |]
|
||||
let of_string text acc = parse subparsers { text; pos = 0; right_boundary = String.length text - 1 } acc
|
||||
end
|
13
parsers/preformatted.ml
Normal file
13
parsers/preformatted.ml
Normal file
@ -0,0 +1,13 @@
|
||||
module type Fn = sig
|
||||
type t
|
||||
val tab_preformatted: string -> t -> t
|
||||
end
|
||||
|
||||
open Text_parse.Cursor
|
||||
|
||||
module Tabbed (F : Fn) = struct
|
||||
type t = F.t
|
||||
let s _cur ch = '\t' = ch
|
||||
let e cur = function '\n' -> not (char_at cur 1 = '\t') | _ -> false
|
||||
let parse cur acc = F.tab_preformatted (segment_string cur) acc
|
||||
end
|
31
parsers/reference.ml
Normal file
31
parsers/reference.ml
Normal file
@ -0,0 +1,31 @@
|
||||
module type Fn = sig
|
||||
type t
|
||||
val reference_name: string -> string -> t -> t
|
||||
val referent_s: string -> t -> t
|
||||
val referent_e: t -> t
|
||||
end
|
||||
|
||||
open Text_parse.Parser
|
||||
open Text_parse.Cursor
|
||||
open Text_parse.Syntax
|
||||
|
||||
module Name (F : Fn) = struct
|
||||
type t = F.t
|
||||
let s _cursor = function '[' -> true | _ -> false
|
||||
let e _cursor = function ']' -> true | _ -> false
|
||||
let parse cur acc = F.reference_name (segment_string (unwrap 1 cur)) acc
|
||||
end
|
||||
|
||||
module Referent (F : Fn) = struct
|
||||
type t = F.t
|
||||
let find_name_end = find_end (fun cur c -> c = ']' && (char_at cur 1) = ':')
|
||||
let s cur = function '[' -> Option.is_some (find_name_end cur) | _ -> false
|
||||
let e _cur = newline
|
||||
let subsyntaxes = [| |]
|
||||
let parse cur acc =
|
||||
let name_boundary = match find_name_end cur with Some x -> x | None -> 0 in
|
||||
let name = segment_string { cur with pos = cur.pos+1; right_boundary = name_boundary-1 } in
|
||||
let text_cur = { cur with pos = name_boundary+2 } in
|
||||
F.referent_s name acc |> parse subsyntaxes text_cur |> F.referent_e
|
||||
end
|
||||
|
37
parsers/sentence.ml
Normal file
37
parsers/sentence.ml
Normal file
@ -0,0 +1,37 @@
|
||||
module type Fn = sig
|
||||
val sentence_segment: string -> 'a -> 'a
|
||||
val sentence_s: 'a -> 'a
|
||||
val sentence_e: 'a -> 'a
|
||||
end
|
||||
|
||||
open Text_parse.Parser
|
||||
open Text_parse.Syntax
|
||||
open Text_parse.Cursor
|
||||
|
||||
module Sentence_segment (F : Fn) = struct
|
||||
let s _cursor = printable
|
||||
let e cursor = function
|
||||
| '.' -> char_at cursor 1 = ' ' || newline (char_at cursor 1) (* todo punctuations *)
|
||||
| '\n' -> char_at cursor 1 = '\n'
|
||||
| _ when cursor.pos + 1 = cursor.right_boundary -> true
|
||||
| _ when char_at cursor 1 = '[' -> true
|
||||
| _ when char_at cursor 1 = '*' -> true
|
||||
| _ when char_at cursor 1 = '_' -> true
|
||||
| _ when char_at cursor 1 = '/' -> true
|
||||
| _ when char_at cursor 1 = '`' -> true
|
||||
| _ when char_at cursor 1 = '<' -> true
|
||||
| _ -> false
|
||||
let at = at s e
|
||||
let parse cur acc = F.sentence_segment (segment_string cur) acc
|
||||
end
|
||||
|
||||
module Sentence (F : Fn) = struct
|
||||
let s _cursor = printable
|
||||
let e cursor = function
|
||||
| '.' -> char_at cursor 1 = ' ' (* todo punctuations *)
|
||||
| '\n' -> char_at cursor 1 = '\n'
|
||||
| _ -> false
|
||||
let at = at s e
|
||||
let subsyntaxes = [| |]
|
||||
let parse cur acc = F.sentence_s acc |> parse subsyntaxes cur |> F.sentence_e
|
||||
end
|
27
parsers/uri.ml
Normal file
27
parsers/uri.ml
Normal file
@ -0,0 +1,27 @@
|
||||
module type Fn = sig
|
||||
type t
|
||||
val angled_uri: string -> t -> t
|
||||
end
|
||||
|
||||
open Text_parse.Syntax
|
||||
open Text_parse.Cursor
|
||||
|
||||
module Angled (F : Fn) = struct
|
||||
type t = F.t
|
||||
let s cur = function '<' -> letter (char_at cur 1) | _ -> false
|
||||
let e _cur = function '>' -> true | _ -> false
|
||||
let parse cur acc = F.angled_uri (segment_string (unwrap 1 cur)) acc
|
||||
end
|
||||
|
||||
(* module Uri (F : TextFn) = struct
|
||||
* type t = F.t
|
||||
* let rec is_scheme cur = function
|
||||
* | ':' -> true
|
||||
* | ch when letter ch -> is_scheme (next_char cur) (char_at cur 1)
|
||||
* | _ -> false
|
||||
* let s cur ch = letter ch && is_scheme (next_char cur) (char_at cur 1)
|
||||
* let e cur _ch = match char_at cur 1 with '\n' | ' ' -> true | _ -> false
|
||||
* let at = at s e
|
||||
* let parse cur acc = F.angled_uri (segment_string cur) acc
|
||||
* end *)
|
||||
|
10
syntax.ml
Normal file
10
syntax.ml
Normal file
@ -0,0 +1,10 @@
|
||||
module type S = sig
|
||||
val s: Cursor.t -> char -> bool
|
||||
val e: Cursor.t -> char -> bool
|
||||
end
|
||||
|
||||
(*let str c = String.make 1 c*)
|
||||
|
||||
let newline = function '\n' -> true | _ -> false
|
||||
let printable ch = ch >= ' ' && ch <= '~'
|
||||
let letter ch = (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')
|
27
text_parse.opam
Normal file
27
text_parse.opam
Normal file
@ -0,0 +1,27 @@
|
||||
# This file is generated by dune, edit dune-project instead
|
||||
opam-version: "2.0"
|
||||
version: "1.02"
|
||||
synopsis: "Applicative text parsing"
|
||||
maintainer: ["fox@orbitalfox.eu"]
|
||||
authors: ["orbifx"]
|
||||
license: "EUPL"
|
||||
homepage: "https://cgit.orbitalfox.eu/text-parse-ml"
|
||||
bug-reports: "mailto:fox@orbitalfox.eu?subject=Text parse ML Issue:"
|
||||
depends: [
|
||||
"dune" {>= "2.4"}
|
||||
]
|
||||
build: [
|
||||
["dune" "subst"] {pinned}
|
||||
[
|
||||
"dune"
|
||||
"build"
|
||||
"-p"
|
||||
name
|
||||
"-j"
|
||||
jobs
|
||||
"@install"
|
||||
"@runtest" {with-test}
|
||||
"@doc" {with-doc}
|
||||
]
|
||||
]
|
||||
dev-repo: "git://orbitalfox.eu/text-parse-ml"
|
Loading…
x
Reference in New Issue
Block a user