From fa6d7e3b5a427c4ea64783fcaed87726a606671b Mon Sep 17 00:00:00 2001
From: Marco Matthies <71844+marcom@users.noreply.github.com>
Date: Thu, 18 Jan 2024 13:50:36 +0100
Subject: [PATCH] Improve overriding energy parameter dirs via env vars (#5)

Improve overriding energy parameter dirs via env vars

Always ensure the DATAPATH, CYCLEFOLD_DATAPATH env vars are pointing
to the correct path from the `RNAstructure_jll` binary package.

These paths can be overridden with the RNASTRUCTURE_JL_DATAPATH,
RNASTRUCTURE_JLCYCLEFOLD_DATAPATH env vars if necessary.

Don't warn if the DATAPATH, CYCLEFOLD_DATAPATH env vars are already
set to the correct directories.  This avoids unnecessary warnings when
running the tests.

Fixes #4.
---
 README.md             | 17 ++++++++++++
 src/RNAstructure.jl   | 27 ++++++++++++------
 test/init-env-vars.jl | 64 +++++++++++++++++++++++++++++++++++++++++++
 test/runtests.jl      |  1 +
 4 files changed, 100 insertions(+), 9 deletions(-)
 create mode 100644 test/init-env-vars.jl

diff --git a/README.md b/README.md
index d13ce2a..a986fa6 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,23 @@ for more details.
 Some programs make exceptions to these rules, check the manual pages
 of the RNAstructure programs for details on any differences.
 
+### Note: Overriding energy parameter directories
+
+The environment variables `RNASTRUCTURE_JL_DATAPATH` can be set to
+override the directory where energy parameters are read from. For the
+`cyclefold_*` functions the environment variable is called
+`RNASTRUCTURE_JL_CYCLEFOLD_DATAPATH`.
+
+In the original RNAstructure program these environment variables are
+called `DATAPATH` and `CYCLEFOLD_DATAPATH`. `RNAstructure.jl` (this
+package) sets these environment variables automatically to the
+corresponding installation directory of the `RNAstructure_jll` binary
+package.  The names of the env vars were changed to avoid clashes with
+possible settings you might already have in your shell startup files
+from a pre-existing manual RNAstructure installation, which could be a
+different version and have different parameters. In this way, you can
+be sure that this package uses the correct parameters, while still
+allowing to override them if necessary.
 
 ### Minimum free energy (MFE) and structure
 
diff --git a/src/RNAstructure.jl b/src/RNAstructure.jl
index e1e6c37..da6361a 100644
--- a/src/RNAstructure.jl
+++ b/src/RNAstructure.jl
@@ -18,16 +18,25 @@ include("pairtable-to-dbn.jl")
 include("plot.jl")
 
 function __init__()
-    if !haskey(ENV, "DATAPATH")
-        ENV["DATAPATH"] = joinpath(RNAstructure_jll.artifact_dir, "data_tables")
-    else
-        @info "RNAstructure: energy params already set, DATAPATH=$(ENV["DATAPATH"])"
-    end
-    if !haskey(ENV, "CYCLEFOLD_DATAPATH")
-        ENV["CYCLEFOLD_DATAPATH"] = joinpath(RNAstructure_jll.artifact_dir, "CycleFold", "datafiles")
-    else
-        @info "RNAstructure: CycleFold energy params already set, CYCLEFOLD_DATAPATH=$(ENV["CYCLEFOLD_DATAPATH"])"
+    datapath = joinpath(RNAstructure_jll.artifact_dir, "data_tables")
+    cyclefold_datapath = joinpath(RNAstructure_jll.artifact_dir, "CycleFold", "datafiles")
+
+    # set env vars DATAPATH, CYCLEFOLD_DATAPATH needed for RNAstructure_jll
+    # these can be overridden with RNASTRUCTURE_JL_DATAPATH, RNASTRUCTURE_JL_CYCLEFOLD_DATAPATH
+    for (env_varname, default_path) in [("DATAPATH", datapath), ("CYCLEFOLD_DATAPATH", cyclefold_datapath)]
+        if haskey(ENV, "RNASTRUCTURE_JL_$(env_varname)")
+            @info "Setting ENV[\"$(env_varname)\"] = ENV[\"RNASTRUCTURE_JL_$(env_varname)\"]"
+            ENV[env_varname] = ENV["RNASTRUCTURE_JL_$(env_varname)"]
+        else
+            if haskey(ENV, env_varname) && ENV[env_varname] != default_path
+                # only warn if env var is set to non-default path
+                @warn ("RNAstructure: $(env_varname) env var set, replacing with $default_path\n"
+                       * "To override $(env_varname) used by RNAstructure, set the RNASTRUCTURE_JL_$(env_varname) env var")
+            end
+            ENV[env_varname] = default_path
+        end
     end
+
     # TODO: set OMP_NUM_THREADS env var for smp programs (number of threads to use)
     return nothing
 end
diff --git a/test/init-env-vars.jl b/test/init-env-vars.jl
new file mode 100644
index 0000000..c2f3ee7
--- /dev/null
+++ b/test/init-env-vars.jl
@@ -0,0 +1,64 @@
+@testset "__init__ env vars" begin
+    showtestset()
+
+    upstream_env_vars = [
+        "DATAPATH",
+        "CYCLEFOLD_DATAPATH",
+    ]
+    our_env_vars = ["RNASTRUCTURE_JL_" * e for e in upstream_env_vars]
+    env_var_mapping = Dict(e => "RNASTRUCTURE_JL_" * e for e in upstream_env_vars)
+    all_env_vars = [upstream_env_vars..., our_env_vars...]
+
+    function delete_all_env_vars()
+        for e in all_env_vars
+            delete!(ENV, e);
+        end
+    end
+
+    # save relevant ENV vars so that we can change them here for
+    # testing and then restore them later
+    saved_env_vars = Dict{String,String}()
+    for e in all_env_vars
+        if haskey(ENV, e)
+            saved_env_vars[e] = ENV[e]
+            delete!(ENV, e)
+        end
+    end
+
+    @test RNAstructure.__init__() == nothing
+    delete_all_env_vars()
+
+    # setting DATAPATH, etc
+    for e in upstream_env_vars
+        ENV[e] = "non-existent-dir-path"
+        warn_msg = ("RNAstructure: $e env var set, replacing with $(saved_env_vars[e])\n"
+                    * "To override $e used by RNAstructure, set the RNASTRUCTURE_JL_$e env var")
+        @test (@test_logs (:warn, (warn_msg)) RNAstructure.__init__()) == nothing
+        delete_all_env_vars()
+    end
+
+    # setting RNASTRUCTURE_JL_*
+    for (upstream_env, our_env) in env_var_mapping
+        ENV[our_env] = "non-existent-dir-path"
+        info_msg = ("Setting ENV[\"$upstream_env\"] = ENV[\"$our_env\"]")
+        @test (@test_logs (:info, (info_msg)) RNAstructure.__init__()) == nothing
+        delete_all_env_vars()
+    end
+
+    # setting both upstream_env_vars and our_env_vars, our_env_vars
+    # should have precedence
+    for (upstream_env, our_env) in env_var_mapping
+        ENV[upstream_env] = "non-existent-dir-path"
+        ENV[our_env] = "non-existent-dir-path"
+        info_msg = ("Setting ENV[\"$upstream_env\"] = ENV[\"$our_env\"]")
+        @test (@test_logs (:info, (info_msg)) RNAstructure.__init__()) == nothing
+        delete_all_env_vars()
+    end
+
+    # restore env vars that we previously unset
+    for e in all_env_vars
+        if haskey(saved_env_vars, e)
+            ENV[e] = saved_env_vars[e]
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index a3be9cb..91b17f5 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -14,6 +14,7 @@ showtestset() = println(" "^(2 * Test.get_testset_depth()), "testing ",
 @testset verbose=true "RNAstructure" begin
     showtestset()
     include("aqua.jl")
+    include("init-env-vars.jl")
     include("ct-format.jl")
     include("plot.jl")
     include("RNAstructure.jl")