108 lines
4.9 KiB
OCaml
108 lines
4.9 KiB
OCaml
let version = "%%VERSION%%"
|
|
|
|
let http_body fn uri =
|
|
let open Lwt in
|
|
let open Cohttp_lwt_unix in
|
|
Client.get uri >>= fun (headers, body) ->
|
|
body |> Cohttp_lwt.Body.to_string >|= fun body -> fn (headers, body)
|
|
|
|
let response (headers, body) =
|
|
let open Cohttp in
|
|
match Header.get (headers |> Response.headers) "content-type" with
|
|
| Some "application/msgpack" | Some "text/plain" | Some "application/octet-stream" -> Ok body
|
|
| Some x -> Error ("Invalid content-type: " ^ x)
|
|
| None -> Error ("No content-type")
|
|
|
|
let http_apply fn uri = Lwt_main.run (http_body fn uri)
|
|
|
|
module S = Set.Make(String)
|
|
|
|
(*let is_selected sl =*)
|
|
(* let check str a b c = Option.(fold ~none:(is_none b && is_none c) ~some:(fun x -> x = str) a) in*)
|
|
(* function*)
|
|
(* | `Author s -> check s sl.authors sl.topics sl.keywords*)
|
|
(* | `Topic s -> check s sl.topics sl.authors sl.keywords*)
|
|
(* | `Keyword s -> check s sl.keywords sl.topics sl.authors*)
|
|
|
|
(* TODO: parse using Header_pack *)
|
|
|
|
let sub_id text = Logarion.(String.sub (text.Text.uuid |> Id.to_string) 0 8)
|
|
let fname dir text = dir ^ sub_id text ^ ".txt"
|
|
|
|
let newer time id dir =
|
|
match Logarion.File_store.to_text dir (String.sub (Logarion.Id.to_string id) 0 8 ^ ".txt") with
|
|
| Error x -> prerr_endline x; true
|
|
| Ok txt -> time > (Logarion.(Header_pack.date (Date.listing txt.date)))
|
|
| exception (Sys_error _) -> true
|
|
|
|
let pull_text url dir id =
|
|
let path = Uri.path url in
|
|
let u = Uri.with_path url (path ^ "/" ^ String.sub (Logarion.Id.to_string id) 0 8 ^ ".txt") in
|
|
match http_apply response u with
|
|
| Error msg -> prerr_endline @@ " Failed " ^ Uri.to_string u ^ " " ^ msg
|
|
| Ok txt ->
|
|
match Logarion.Text.of_string txt with
|
|
| Error s -> prerr_endline s
|
|
| Ok text ->
|
|
let file = open_out_gen [Open_creat; Open_trunc; Open_wronly] 0o640 (fname dir text) in
|
|
output_string file txt; close_out file
|
|
|
|
(*TODO: adapt Archive predication function to work with free sets*)
|
|
let parse_index _is_selected fn url = function
|
|
| Msgpck.List (info :: _fields :: [texts]) ->
|
|
let version, archive_name, _archivists =
|
|
match info with Msgpck.List (v::n::a) -> Msgpck.(to_int v, to_string n, a) | _ -> invalid_arg "Pack header"
|
|
in
|
|
print_endline @@ archive_name ^ " (pack format version " ^ string_of_int version ^ ")";
|
|
(match Msgpck.to_list texts with
|
|
| [] -> prerr_endline "Empty index"
|
|
| texts ->
|
|
(try Unix.mkdir "peers" 0o740 with Unix.Unix_error (EEXIST, _, _) -> () | _ -> prerr_endline "Error making peers");
|
|
let dir = "peers/" ^ match Uri.host url with None -> "local/" | Some s -> s ^ "/" in
|
|
(try Unix.mkdir dir 0o740 with Unix.Unix_error (EEXIST, _, _) -> () | _ -> prerr_endline "Error making domain dir");
|
|
print_endline @@ "Copying into: " ^ dir;
|
|
let text_num = List.length texts in
|
|
let of_pck i x =
|
|
print_string @@ "\rDownloading " ^ string_of_int (i+1) ^ "/" ^ string_of_int text_num; flush stdout;
|
|
match x with
|
|
| Msgpck.List (id::time::title::_authors::_topics::_keywords) ->
|
|
(match Logarion.Id.of_bytes Msgpck.(to_bytes id) with
|
|
| None -> prerr_endline ("Invalid id for " ^ Msgpck.to_string title)
|
|
| Some id -> if newer Msgpck.(to_uint32 time) id dir then fn url dir id)
|
|
| _ -> prerr_endline ("Invalid record structure")
|
|
in List.iteri of_pck texts)
|
|
| _ -> prerr_endline "malformed feed"
|
|
|
|
let pull_index url _authors _keywords _topics =
|
|
let index_url = Uri.of_string (url ^ "/index.pck") in
|
|
match http_apply response index_url with
|
|
| Error msg -> prerr_endline @@ "Failed index request for " ^ url ^ " " ^ msg
|
|
| Ok body ->
|
|
let _i, pack = Msgpck.StringBuf.read body in
|
|
(* let predicates =*)
|
|
(* A.predicate A.authored authors_opt*)
|
|
(* @ A.predicate A.keyworded keywords_opt*)
|
|
(* @ A.predicate A.topics topics_opt*)
|
|
(* in*)
|
|
let is_selected text = List.fold_left (fun a e -> a && e text) true [](*predicates*) in
|
|
parse_index is_selected pull_text (Uri.of_string url) pack
|
|
|
|
let pull_list_indices auths words topics =
|
|
match open_in "peers.txt" with
|
|
| exception (Sys_error msg) -> prerr_endline msg
|
|
| file ->
|
|
let rec read () = try pull_index (input_line file) auths words topics; read () with End_of_file -> () in
|
|
read ()
|
|
|
|
let pull = function | "" -> pull_list_indices | x -> pull_index x
|
|
|
|
open Cmdliner
|
|
|
|
let pull_term =
|
|
let authors = Arg.(value & opt (some string) None & info ["a"; "authors"] ~docv:"AUTHORS" ~doc:"select authors") in
|
|
let keywords = Arg.(value & opt (some string) None & info ["k"; "keywords"] ~docv:"KEYWORDS" ~doc:"select keywords") in
|
|
let topics = Arg.(value & opt (some string) None & info ["t"; "topics"] ~docv:"TOPICS" ~doc:"select topics") in
|
|
let url = Arg.(value & pos 0 string "" & info [] ~docv:"URL" ~doc:"HTTP URL of Logarion") in
|
|
Term.(const pull $ url $ authors $ keywords $ topics),
|
|
Term.info "pull" ~doc:"pull listed texts" ~man:[ `S "DESCRIPTION"; `P "Pull texts from archive at address"]
|