
TODO: markdown & gemini coming git-svn-id: https://svn.yakumo.dev/yakumo.izuru/text_parse/trunk@1 cb476dc4-a1c2-9446-a177-162899b6b847
38 lines
1.2 KiB
OCaml
38 lines
1.2 KiB
OCaml
module type Fn = sig
|
|
val sentence_segment: string -> 'a -> 'a
|
|
val sentence_s: 'a -> 'a
|
|
val sentence_e: 'a -> 'a
|
|
end
|
|
|
|
open Text_parse.Parser
|
|
open Text_parse.Syntax
|
|
open Text_parse.Cursor
|
|
|
|
module Sentence_segment (F : Fn) = struct
|
|
let s _cursor = printable
|
|
let e cursor = function
|
|
| '.' -> char_at cursor 1 = ' ' || newline (char_at cursor 1) (* todo punctuations *)
|
|
| '\n' -> char_at cursor 1 = '\n'
|
|
| _ when cursor.pos + 1 = cursor.right_boundary -> true
|
|
| _ when char_at cursor 1 = '[' -> true
|
|
| _ when char_at cursor 1 = '*' -> true
|
|
| _ when char_at cursor 1 = '_' -> true
|
|
| _ when char_at cursor 1 = '/' -> true
|
|
| _ when char_at cursor 1 = '`' -> true
|
|
| _ when char_at cursor 1 = '<' -> true
|
|
| _ -> false
|
|
let at = at s e
|
|
let parse cur acc = F.sentence_segment (segment_string cur) acc
|
|
end
|
|
|
|
module Sentence (F : Fn) = struct
|
|
let s _cursor = printable
|
|
let e cursor = function
|
|
| '.' -> char_at cursor 1 = ' ' (* todo punctuations *)
|
|
| '\n' -> char_at cursor 1 = '\n'
|
|
| _ -> false
|
|
let at = at s e
|
|
let subsyntaxes = [| |]
|
|
let parse cur acc = F.sentence_s acc |> parse subsyntaxes cur |> F.sentence_e
|
|
end
|