Skip to content

Commit

Permalink
Foundations of the CLI (#24)
Browse files Browse the repository at this point in the history
* Foundations of the CLI

* Fixing Finch Behaviour for Escripts

* Wayback Fallback

* File Storage and Chalisa.json done

* Support fork for escripts vs application runtime

---------

Co-authored-by: ks0m1c_dharma <sakiyamuni@sams.ara>
Co-authored-by: Ritesh Kumar <ritesh@emerald.pink>
  • Loading branch information
3 people authored Feb 13, 2024
1 parent 34b43c1 commit df3ea6c
Show file tree
Hide file tree
Showing 9 changed files with 3,756 additions and 2 deletions.
8 changes: 8 additions & 0 deletions README.org
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ Wherever there be anything you dost not comprehend, cease to continue writing
-- Vyasa, Adi Parva - Mahabharatam
#+END_QUOTE

### CLI Helpsheet

```elixir
# build script
mix escript.build
# fetch from domain/path --storage :mode
./vyasa fetch shlokam.org/hanumanchalisa --storage file
```
* Forms of Prior Art
** [[http://worrydream.com/refs/Nelson-ComputerLibDreamMachines1975.pdf#page=57][Xanadu Pattern]]

Expand Down
2 changes: 2 additions & 0 deletions config/dev.exs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ config :vyasa, VyasaWeb.Endpoint,
# If desired, both `http:` and `https:` keys can be
# configured to run both http and https servers on
# different ports.
#
#config :ssl, cacertfile: 'priv/cacerts.pem'

# Watch static and templates for browser reloading.
config :vyasa, VyasaWeb.Endpoint,
Expand Down
46 changes: 46 additions & 0 deletions lib/vyasa/corpus/engine/fallback.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
defmodule Vyasa.Corpus.Engine.Fallback do
@url "https://archive.org/wayback/available?url="

def run(path) do
#storage opts
@url
|> fetch_url(path)
|> IO.inspect()
|> fetch_tree()
end

def fetch_url(url, path \\ "") do
case Req.get(url <> path, conn_opts()) do
{:ok, %{body: %{"archived_snapshots" =>
%{"closest" =>
%{"url" => url}}}}} ->
{:ok, url}
{:ok, %{body: %{"archived_snapshots" => %{}}}} ->
IO.inspect("Not Found", label: :fallback_err)
{:err, :not_found}
{:error, reason} ->
IO.inspect(reason, label: :fallback_err)
{:err, :fallback_failed}
end
end

def fetch_tree({:ok, url}) do
url
|> to_https()
|> Req.get!(conn_opts())
|> Map.get(:body)
end

def fetch_tree(err), do: err

defp to_https(url) do
url
|> URI.parse()
|> Map.put(:port, nil)
|> Map.put(:scheme, "https")
|> URI.to_string()
end

defp conn_opts(), do: [connect_options: [transport_opts: [cacerts: :public_key.cacerts_get()]]]

end
115 changes: 115 additions & 0 deletions lib/vyasa/corpus/engine/shlokam.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
defmodule Vyasa.Corpus.Engine.Shlokam do
@url "https://shlokam.org/"
alias Vyasa.Corpus.Engine.Fallback

def run(path, opts \\ []) do
#storage opts
@url
|> fetch_tree(path)
|> scrape()
|> store(path, Keyword.get(opts, :o, nil))
end

def fetch_tree(url, path \\ "") do
case Req.get!(url <> path, connect_options: [transport_opts: [cacerts: :public_key.cacerts_get()]]) do
%{body: body} ->
{:ok, body
|> Floki.parse_document!()
|> Floki.find(".uncode_text_column")}
%{status: 301, headers: header} ->
header
|> Keyword.get(:location)
|> fetch_tree()

error ->
IO.inspect(error, label: :primary_site_err)
Fallback.run(url <> path)
end
end

defp scrape({:ok, tree}) do
tree
|> Enum.reduce(%{title: nil, description: nil, verses: []}, fn
{"div", _, [{"h3", [], ["Description"]} | para]}, acc ->
# IO.inspect(rem, label: "div")
desc =
para
|> Floki.text()

%{acc | description: desc}

{"div", _, [{"h3", _, _} = h3_tree]}, acc ->
title =
h3_tree
|> Floki.text()

%{acc | title: title}

{"div", _, [{"div", [{"class", "verse_sanskrit"}], _verse} | _] = verse_tree}, acc ->
[curr | [%{"count" => count} | _] = verses] =
Enum.reduce(verse_tree, [], fn
# n case verse break
{"hr", [{"class", "verse_separator"}], []}, [curr | [%{"count" => c} | _] = acc] ->
[Map.put(curr, "count", c + 1) | acc]

# init verse break
{"hr", [{"class", "verse_separator"}], []}, [curr | acc] ->
[Map.put(curr, "count", 1) | acc]

# n case after verse break
{"div", [{"class", class}], _} = c_tree, [%{"count" => _} | _] = acc ->
[%{class => c_tree |> Floki.text()} | acc]

# n case before verse break
{"div", [{"class", class}], _} = c_tree, [curr | acc] when is_map(curr)->
[Map.put(curr, class, c_tree |> Floki.text()) | acc]

# init
{"div", [{"class", class}], _} = c_tree, [] ->
[%{class => c_tree |> Floki.text()}]

others, acc ->
IO.inspect(others)
acc
end)

#formatting & tying loose ends
clean_verses = [Map.put(curr, "count", count + 1)| verses]
|> Enum.reverse()

%{acc | verses: clean_verses}

_, acc ->
acc
end)
end

defp scrape(err), do: err

def store(_text, _tree, "db") do
# TODO parsing logic into text indexer structs and db insert ops
end

def store(tree, text, nil) do
# TODO parsing logic into text indexer structs and db insert ops
json = Jason.encode!(tree)
Application.app_dir(:vyasa, "priv")
|> Path.join("/static/corpus/shlokam.org/#{text}.json")
|> tap(&File.touch(&1))
|> tap(&File.write!(&1, json))
end

def store(tree, text, file_path) do
# TODO parsing logic into text indexer structs and db insert ops
json = Jason.encode!(tree)
file_path
|> Path.join("/shlokam.org")
|> tap(&File.mkdir(&1))
|> Path.join("/#{text}.json")
|> tap(&File.touch(&1))
|> tap(&File.write!(&1, json))
end



end
40 changes: 40 additions & 0 deletions lib/vyasa_cli.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
defmodule VyasaCLI do
def main(args \\ []) do
IO.inspect(args)
args
|> parse_args()
|> response()
|> IO.puts()
end


defp parse_args([command | ["--" <> _] = args]) do
{opts, _, _} =
args
|> OptionParser.parse(switches: [storage: :string])
{command, opts}
end

defp parse_args([command | [arg | _] = args]) do
{opts, _, _} =
args
|> OptionParser.parse(switches: [o: :string, path: :string])

{command, arg, opts}
end
defp response({"fetch", "shlokam.org/" <> path, opts}) do
Vyasa.Corpus.Engine.Shlokam.run(path, opts)
end

defp response({"fetch", _, _}) do
"Unsupported domain
Try one of the following:
shlokam.org/
"
end

defp response(_) do
"Command doesnt belong to us "
end

end
9 changes: 8 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ defmodule Vyasa.MixProject do
elixirc_options: [
warnings_as_errors: true
],
escript: escript(),
start_permanent: Mix.env() == :prod,
aliases: aliases(),
deps: deps()
Expand All @@ -26,6 +27,11 @@ defmodule Vyasa.MixProject do
]
end

# Defining Scripting Env
defp escript do
[main_module: VyasaCLI]
end

# Specifies which paths to compile per environment.
defp elixirc_paths(:test), do: ["lib", "test/support"]
defp elixirc_paths(_), do: ["lib"]
Expand All @@ -42,7 +48,7 @@ defmodule Vyasa.MixProject do
{:phoenix_html, "~> 3.3"},
{:phoenix_live_reload, "~> 1.2", only: :dev},
{:phoenix_live_view, "~> 0.20.1"},
{:floki, ">= 0.30.0", only: :test},
{:floki, ">= 0.30.0"},
{:phoenix_live_dashboard, "~> 0.8.2"},
{:esbuild, "~> 0.8", runtime: Mix.env() == :dev},
{:tailwind, "~> 0.2.0", runtime: Mix.env() == :dev},
Expand All @@ -58,6 +64,7 @@ defmodule Vyasa.MixProject do
{:vix, "~> 0.5"},
{:kino, "~> 0.12.0"},
{:cors_plug, "~> 3.0"},
{:req, "~> 0.4.8"}
]
end

Expand Down
3 changes: 2 additions & 1 deletion mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
"hpax": {:hex, :hpax, "0.1.2", "09a75600d9d8bbd064cdd741f21fc06fc1f4cf3d0fcc335e5aa19be1a7235c84", [:mix], [], "hexpm", "2c87843d5a23f5f16748ebe77969880e29809580efdaccd615cd3bed628a8c13"},
"httpoison": {:hex, :httpoison, "2.2.1", "87b7ed6d95db0389f7df02779644171d7319d319178f6680438167d7b69b1f3d", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "51364e6d2f429d80e14fe4b5f8e39719cacd03eb3f9a9286e61e216feac2d2df"},
"idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"},
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"},
"image": {:hex, :image, "0.39.0", "c003ce095ee32c40f194ca1a48ffa6d3e67192e567183e70dfd8b77bb7144119", [:mix], [{:bumblebee, "~> 0.3", [hex: :bumblebee, repo: "hexpm", optional: true]}, {:evision, "~> 0.1.33", [hex: :evision, repo: "hexpm", optional: true]}, {:exla, "~> 0.5", [hex: :exla, repo: "hexpm", optional: true]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: true]}, {:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: true]}, {:phoenix_html, "~> 2.14 or ~> 3.2", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:plug, "~> 1.13", [hex: :plug, repo: "hexpm", optional: true]}, {:rustler, "> 0.0.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:sweet_xml, "~> 0.7", [hex: :sweet_xml, repo: "hexpm", optional: false]}, {:vix, "~> 0.23", [hex: :vix, repo: "hexpm", optional: false]}], "hexpm", "d83de26e009a59cb8c53d2efecaacd7c9b990897dc3c7095a5b8847e73c8f968"},
"jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"},
"kino": {:hex, :kino, "0.12.0", "dfebe415f31cf2f54d98600178c08fd017931d9dada6fb59b7716b5431bee4a1", [:mix], [{:fss, "~> 0.1.0", [hex: :fss, repo: "hexpm", optional: false]}, {:nx, "~> 0.1", [hex: :nx, repo: "hexpm", optional: true]}, {:table, "~> 0.1.2", [hex: :table, repo: "hexpm", optional: false]}], "hexpm", "6796a64ae978fc2e50e52cae022ae6a3d28b8e50589af158b8f3904e7f748377"},
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"},
"mime": {:hex, :mime, "2.0.5", "dc34c8efd439abe6ae0343edbb8556f4d63f178594894720607772a041b04b02", [:mix], [], "hexpm", "da0d64a365c45bc9935cc5c8a7fc5e49a0e0f9932a761c55d6c52b142780a05c"},
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"},
"mint": {:hex, :mint, "1.5.1", "8db5239e56738552d85af398798c80648db0e90f343c8469f6c6d8898944fb6f", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "4a63e1e76a7c3956abd2c72f370a0d0aecddc3976dea5c27eccbecfa5e7d5b1e"},
Expand All @@ -46,6 +46,7 @@
"plug_crypto": {:hex, :plug_crypto, "2.0.0", "77515cc10af06645abbfb5e6ad7a3e9714f805ae118fa1a70205f80d2d70fe73", [:mix], [], "hexpm", "53695bae57cc4e54566d993eb01074e4d894b65a3766f1c43e2c61a1b0f45ea9"},
"postgrex": {:hex, :postgrex, "0.17.3", "c92cda8de2033a7585dae8c61b1d420a1a1322421df84da9a82a6764580c503d", [:mix], [{:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "946cf46935a4fdca7a81448be76ba3503cff082df42c6ec1ff16a4bdfbfb098d"},
"ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"},
"req": {:hex, :req, "0.4.8", "2b754a3925ddbf4ad78c56f30208ced6aefe111a7ea07fb56c23dccc13eb87ae", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.9", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "7146e51d52593bb7f20d00b5308a5d7d17d663d6e85cd071452b613a8277100c"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"},
"sweet_xml": {:hex, :sweet_xml, "0.7.4", "a8b7e1ce7ecd775c7e8a65d501bc2cd933bff3a9c41ab763f5105688ef485d08", [:mix], [], "hexpm", "e7c4b0bdbf460c928234951def54fe87edf1a170f6896675443279e2dbeba167"},
"swoosh": {:hex, :swoosh, "1.14.0", "710e363e114dedb4080b737e0307f5410887ffc9a239f818231e5618b6b84e1b", [:mix], [{:cowboy, "~> 1.1 or ~> 2.4", [hex: :cowboy, repo: "hexpm", optional: true]}, {:ex_aws, "~> 2.1", [hex: :ex_aws, repo: "hexpm", optional: true]}, {:finch, "~> 0.6", [hex: :finch, repo: "hexpm", optional: true]}, {:gen_smtp, "~> 0.13 or ~> 1.0", [hex: :gen_smtp, repo: "hexpm", optional: true]}, {:hackney, "~> 1.9", [hex: :hackney, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mail, "~> 0.2", [hex: :mail, repo: "hexpm", optional: true]}, {:mime, "~> 1.1 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug, "~> 1.9", [hex: :plug, repo: "hexpm", optional: true]}, {:plug_cowboy, ">= 1.0.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.2 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "dccfc986ac99c18345ab3e1a8b934b2d817fd6d59a2494f0af78502184c71025"},
Expand Down
Loading

0 comments on commit df3ea6c

Please sign in to comment.