kosuzu/cmd/txt/pull.ml
yakumo.izuru 758d1d6d47 Because sweet girls are the best, officially rebranding Logarion to Kosuzu
Signed-off-by: Izuru Yakumo <yakumo.izuru@chaotic.ninja>

git-svn-id: file:///srv/svn/repo/kosuzu/trunk@73 eb64cd80-c68d-6f47-b6a3-0ada418499da
2024-08-22 16:32:00 +00:00

138 lines
5.3 KiB
OCaml

let writer accum data =
Buffer.add_string accum data;
String.length data
let getContent connection url =
Curl.set_url connection url;
Curl.perform connection
let curl_pull url =
let result = Buffer.create 4069
and errorBuffer = ref "" in
let connection = Curl.init () in
try
Curl.set_errorbuffer connection errorBuffer;
Curl.set_writefunction connection (writer result);
Curl.set_followlocation connection true;
Curl.set_url connection url;
Curl.perform connection;
Curl.cleanup connection;
Ok result
with
| Curl.CurlException (_reason, _code, _str) ->
Curl.cleanup connection;
Error (Printf.sprintf "Error: %s %s" url !errorBuffer)
| Failure s ->
Curl.cleanup connection;
Error (Printf.sprintf "Caught exception: %s" s)
let newer time id dir =
match Kosuzu.File_store.to_text @@ Filename.(concat dir (Kosuzu.Id.short id) ^ ".txt") with
| Error x -> prerr_endline x; true
| Ok txt -> time > (Kosuzu.(Header_pack.date (Date.listing txt.date)))
| exception (Sys_error _) -> true
let print_peers p =
let open Kosuzu.Header_pack in
match Msgpck.to_list p.peers with [] -> ()
| ps -> print_endline @@
List.fold_left (fun a x -> Printf.sprintf "%s %s" a (Msgpck.to_string x)) "peers: " ps
type filter_t = { authors: Kosuzu.Person.Set.t; topics: Kosuzu.String_set.t }
let print_pull_start width total title dir =
Printf.printf "%*d/%s %s => %s %!" width 0 total title dir
let print_pull width total i =
Printf.printf "\r%*d/%s %!" width (i+1) total
let printers total title dir =
let width = String.length total in
print_pull_start width total title dir;
print_pull width total
let fname dir text = Filename.concat dir (Kosuzu.Text.short_id text ^ ".txt")
let pull_text url dir id =
let u = Filename.concat url ((Kosuzu.Id.short id) ^ ".txt") in
match curl_pull u with
| Error msg -> Printf.eprintf "Failed getting %s: %s" u msg
| Ok txt -> let txt = Buffer.contents txt in
match Kosuzu.Text.of_string txt with
| Error s -> prerr_endline s
| Ok text ->
let file = open_out_gen [Open_creat; Open_trunc; Open_wronly] 0o640 (fname dir text) in
output_string file txt; close_out file
let per_text url dir filter print i id time title authors topics _refs _reps = match id with
| "" -> Printf.eprintf "\nInvalid id for %s\n" title
| id -> let open Kosuzu in
print i;
if newer time id dir
&& (String_set.empty = filter.topics
|| String_set.exists (fun t -> List.mem t topics) filter.topics)
&& (Person.Set.empty = filter.authors
|| Person.Set.exists (fun t -> List.mem (Person.to_string t) authors) filter.authors)
then pull_text url dir id
let pull_index url authors_opt topics_opt =
let index_url = Filename.concat url "index.pck" in
match curl_pull index_url with
| Error s -> prerr_endline s; false
| Ok body ->
match Kosuzu.Header_pack.of_string (Buffer.contents body) with
| Error s -> Printf.printf "Error with %s: %s\n" url s; false
| Ok pk when pk.info.id = "" ->
Printf.printf "Empty ID index.pck, skipping %s\n" url; false
| Ok pk when not (Kosuzu.Validate.validate_id_length pk.info.id) ->
Printf.printf "Index pack ID longer than 32 characters, skipping %s\n" url; false
| Ok pk when not (Kosuzu.Validate.validate_id_chars pk.info.id) ->
Printf.printf "Index pack contains invalid ID characters, skipping %s\n" url; false
| Ok pk ->
let dir = Filename.concat Kosuzu.Peers.text_dir pk.info.id in
Kosuzu.File_store.with_dir dir;
let file = open_out_gen [Open_creat; Open_trunc; Open_wronly] 0o640
(Filename.concat dir "index.pck") in
output_string file ( Kosuzu.Header_pack.string {
pk with info = { pk.info with locations = url::pk.info.locations }});
close_out file;
let filter = let open Kosuzu in {
authors = (match authors_opt with Some s -> Person.Set.of_string s | None -> Person.Set.empty);
topics =( match topics_opt with Some s -> String_set.of_string s | None -> String_set.empty);
} in
let name = match pk.info.title with "" -> url | title -> title in
let print = printers (string_of_int @@ Kosuzu.Header_pack.numof_texts pk) name dir in
try Kosuzu.Header_pack.iteri (per_text url dir filter print) pk; print_newline (); true
with Invalid_argument msg -> Printf.printf "\nFailed to parse %s: %s\n%!" url msg; false
let pull_list auths topics =
Curl.global_init Curl.CURLINIT_GLOBALALL;
let pull got_one peer_url = if got_one then got_one else
(pull_index peer_url auths topics) in
let open Kosuzu in
let fold_locations init peer =
ignore @@ List.fold_left pull init peer.Peers.pack.Header_pack.info.locations;
false
in
ignore @@ Peers.fold fold_locations false;
Curl.global_cleanup ()
let pull url auths topics = match url with
| "" -> pull_list auths topics | x -> ignore (pull_index x auths topics)
open Cmdliner
let authors = Arg.(value & opt (some string) None & info ["a"; "authors"] ~docv:"Comma-separated names" ~doc:"Filter by authors")
let topics = Arg.(value & opt (some string) None & info ["t"; "topics"] ~docv:"Comma-separated topics" ~doc:"Filter by topics")
let url = Arg.(value & pos 0 string "" & info [] ~docv:"URL" ~doc:"Repository location")
let pull_t = Term.(const pull $ url $ authors $ topics)
let cmd =
let doc = "Pull listed texts" in
let man = [
`S Manpage.s_description;
`P "Pull texts from known repositories." ]
in
let info = Cmd.info "pull" ~doc ~man in
Cmd.v info pull_t