Skip to content

Commit

Permalink
Corrected uniform for VJ sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas authored and Thomas committed Feb 27, 2025
1 parent 3a50552 commit 1ea4eee
Show file tree
Hide file tree
Showing 8 changed files with 346 additions and 22 deletions.
173 changes: 170 additions & 3 deletions Examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"os.environ[\"RUST_BACKTRACE\"] = \"1\"\n",
"\n",
"# load the model\n",
"igor_model = righor.load_model(\"human\", \"trb\")\n",
"igor_model = righor.load_model(\"human\", \"igk\")\n",
"# alternatively, you can load a model from igor files \n",
"# igor_model = righor.load_model_from_files(params.txt, marginals.txt, anchor_v.csv, anchor_j.csv)\n",
"\n",
Expand All @@ -40,14 +40,181 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "7477e1e0-bc4e-4ca2-be07-1424288ae3de",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"thread '<unnamed>' panicked at /home/thomas/.cargo/registry/src/index.crates.io-6f17d22bba15001f/ndarray-0.16.1/src/arraytraits.rs:36:5:\n",
"ndarray: index out of bounds\n",
"stack backtrace:\n",
" 0: std::panicking::begin_panic\n",
" at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/std/src/panicking.rs:734:5\n",
" 1: ndarray::arraytraits::array_out_of_bounds\n",
" at /home/thomas/.cargo/registry/src/index.crates.io-6f17d22bba15001f/ndarray-0.16.1/src/arraytraits.rs:36:5\n",
" 2: ndarray::arraytraits::<impl core::ops::index::Index<I> for ndarray::ArrayBase<S,D>>::index::{{closure}}\n",
" at /home/thomas/.cargo/registry/src/index.crates.io-6f17d22bba15001f/ndarray-0.16.1/src/arraytraits.rs:67:40\n",
" 3: core::option::Option<T>::unwrap_or_else\n",
" at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/core/src/option.rs:1010:21\n",
" 4: ndarray::arraytraits::<impl core::ops::index::Index<I> for ndarray::ArrayBase<S,D>>::index\n",
" at /home/thomas/.cargo/registry/src/index.crates.io-6f17d22bba15001f/ndarray-0.16.1/src/arraytraits.rs:67:22\n",
" 5: righor::shared::markov_chain::DNAMarkovChain::precompute_degenerate\n",
" at ./src/shared/markov_chain.rs:228:70\n",
" 6: righor::shared::markov_chain::DNAMarkovChain::precompute\n",
" at ./src/shared/markov_chain.rs:107:9\n",
" 7: righor::shared::markov_chain::DNAMarkovChain::new\n",
" at ./src/shared/markov_chain.rs:78:9\n",
" 8: <righor::vdj::model::Model as righor::shared::model::Modelable>::uniform\n",
" at ./src/vdj/model.rs:391:39\n",
" 9: <righor::vj::model::Model as righor::shared::model::Modelable>::uniform\n",
" at ./src/vj/model.rs:291:20\n",
" 10: righor::shared::model::Model::uniform\n",
" at ./src/shared/model.rs:268:39\n",
" 11: righor::PyModel::uniform\n",
" at ./src/lib.rs:220:20\n",
" 12: righor::PyModel::__pymethod_uniform__\n",
" at ./src/lib.rs:77:1\n",
" 13: pyo3::impl_::trampoline::noargs::{{closure}}\n",
" at /home/thomas/.cargo/registry/src/index.crates.io-6f17d22bba15001f/pyo3-0.22.6/src/impl_/trampoline.rs:34:21\n",
" 14: pyo3::impl_::trampoline::trampoline::{{closure}}\n",
" at /home/thomas/.cargo/registry/src/index.crates.io-6f17d22bba15001f/pyo3-0.22.6/src/impl_/trampoline.rs:188:54\n",
" 15: std::panicking::try::do_call\n",
" at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/std/src/panicking.rs:554:40\n",
" 16: std::panicking::try\n",
" at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/std/src/panicking.rs:518:19\n",
" 17: std::panic::catch_unwind\n",
" at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/std/src/panic.rs:345:14\n",
" 18: pyo3::impl_::trampoline::trampoline\n",
" at /home/thomas/.cargo/registry/src/index.crates.io-6f17d22bba15001f/pyo3-0.22.6/src/impl_/trampoline.rs:188:9\n",
" 19: pyo3::impl_::trampoline::noargs\n",
" at /home/thomas/.cargo/registry/src/index.crates.io-6f17d22bba15001f/pyo3-0.22.6/src/impl_/trampoline.rs:34:5\n",
" 20: righor::_::__INVENTORY::trampoline\n",
" at ./src/lib.rs:77:1\n",
" 21: method_vectorcall_NOARGS\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/descrobject.c:454:24\n",
" 22: _PyObject_VectorcallTstate\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/./Include/internal/pycore_call.h:92:11\n",
" 23: PyObject_Vectorcall\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/call.c:325:12\n",
" 24: _PyEval_EvalFrameDefault\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/bytecodes.c:2711:19\n",
" 25: _PyEval_EvalFrame\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/./Include/internal/pycore_ceval.h:88:16\n",
" 26: _PyEval_Vector\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/ceval.c:1675:12\n",
" 27: PyEval_EvalCode\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/ceval.c:570:21\n",
" 28: builtin_exec_impl\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/bltinmodule.c:1096:17\n",
" 29: builtin_exec\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/clinic/bltinmodule.c.h:586:20\n",
" 30: _PyEval_EvalFrameDefault\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/bytecodes.c:2971:19\n",
" 31: _PyEval_EvalFrame\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/./Include/internal/pycore_ceval.h:88:16\n",
" 32: gen_send_ex2\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/genobject.c:230:14\n",
" 33: gen_send_ex\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/genobject.c:274:9\n",
" 34: gen_send\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/genobject.c:297:12\n",
" 35: _PyEval_EvalFrameDefault\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/bytecodes.c:3090:19\n",
" 36: _PyObject_VectorcallTstate\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/./Include/internal/pycore_call.h:92:11\n",
" 37: method_vectorcall\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/classobject.c:61:18\n",
" 38: _PyVectorcall_Call\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/call.c:283:24\n",
" 39: _PyObject_Call\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/call.c:354:16\n",
" 40: _PyEval_EvalFrameDefault\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/bytecodes.c:3259:26\n",
" 41: _PyEval_EvalFrame\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/./Include/internal/pycore_ceval.h:88:16\n",
" 42: gen_send_ex2\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/genobject.c:230:14\n",
" 43: PyGen_am_send\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/genobject.c:267:12\n",
" 44: task_step_impl\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/./Modules/_asynciomodule.c:2826:22\n",
" 45: task_step\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/./Modules/_asynciomodule.c:3127:11\n",
" 46: cfunction_vectorcall_O\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/methodobject.c:509:24\n",
" 47: _PyObject_VectorcallTstate\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/./Include/internal/pycore_call.h:92:11\n",
" 48: context_run\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/context.c:668:29\n",
" 49: cfunction_vectorcall_FASTCALL_KEYWORDS\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/methodobject.c:438:24\n",
" 50: _PyEval_EvalFrameDefault\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/bytecodes.c:3259:26\n",
" 51: _PyEval_EvalFrame\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/./Include/internal/pycore_ceval.h:88:16\n",
" 52: _PyEval_Vector\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/ceval.c:1675:12\n",
" 53: PyEval_EvalCode\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/ceval.c:570:21\n",
" 54: builtin_exec_impl\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/bltinmodule.c:1096:17\n",
" 55: builtin_exec\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/clinic/bltinmodule.c.h:586:20\n",
" 56: cfunction_vectorcall_FASTCALL_KEYWORDS\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/methodobject.c:438:24\n",
" 57: _PyObject_VectorcallTstate\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/./Include/internal/pycore_call.h:92:11\n",
" 58: PyObject_Vectorcall\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Objects/call.c:325:12\n",
" 59: _PyEval_EvalFrameDefault\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Python/bytecodes.c:2711:19\n",
" 60: pymain_run_module\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Modules/main.c:300:14\n",
" 61: pymain_run_python\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Modules/main.c:604:21\n",
" 62: Py_RunMain\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Modules/main.c:689:5\n",
" 63: pymain_main\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Modules/main.c:719:12\n",
" 64: Py_BytesMain\n",
" at /tmp/python-build.20241021214719.82636/Python-3.12.0/Modules/main.c:743:12\n",
" 65: __libc_start_call_main\n",
" at ./csu/../sysdeps/nptl/libc_start_call_main.h:58:16\n",
" 66: __libc_start_main_impl\n",
" at ./csu/../csu/libc-start.c:360:3\n",
" 67: _start\n",
"note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.\n"
]
},
{
"ename": "PanicException",
"evalue": "ndarray: index out of bounds",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mPanicException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43migor_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muniform\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mPanicException\u001b[0m: ndarray: index out of bounds"
]
}
],
"source": [
"igor_model.uniform()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "81e25bb0-fbcc-4202-b66f-515dcf3a3da0",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "15e6ec6f322e4a659e3889f9da56b728",
"model_id": "7d27d97233674a97a0902d23672e86d6",
"version_major": 2,
"version_minor": 0
},
Expand Down
Binary file modified python/righor/_righor.cpython-312-x86_64-linux-gnu.so
Binary file not shown.
30 changes: 30 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,36 @@ impl PyModel {
Generator::new(&self.inner.clone(), seed, available_v, available_j)
}

/// Compute the proportion of productive sequence (number between 0 and 1)
/// Can be restricted to a subset of Vs/Js
/// Kind of slow (rely on Monte-Carlo)
#[pyo3(signature = (seed=None, available_v=None, available_j=None, nb_seq_generated=1000000))]
pub fn proportion_productive(
&self,
seed: Option<u64>,
available_v: Option<Vec<Gene>>,
available_j: Option<Vec<Gene>>,
nb_seq_generated: usize,
) -> Result<f64> {
let mut gen = self.generator(seed, available_v, available_j)?;

Ok(gen
.parallel_generate(nb_seq_generated, false)
.into_iter()
.filter(|seq| {
seq.junction_aa
.as_ref()
.and_then(|s| AminoAcid::from_string(s).ok())
.map_or(false, |aa| self.inner.is_productive(&Some(aa)))
})
.count() as f64
/ (nb_seq_generated as f64))
}

pub fn is_productive(&self, seq: &AminoAcid) -> bool {
self.inner.is_productive(&Some(seq.clone()))
}

pub fn filter_vs(&self, vs: Vec<Gene>) -> Result<PyModel> {
Ok(PyModel {
inner: self.inner.filter_vs(vs)?,
Expand Down
68 changes: 56 additions & 12 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ fn run_infer() -> Result<()> {
let mut als = Vec::new();
let ifp = shared::InferenceParameters::default();
let alp = shared::AlignmentParameters::default();
for _ in tqdm!(0..1000) {
for _ in tqdm!(0..10000) {
let generated = generator.generate(false)?;
let es = shared::EntrySequence::NucleotideCDR3((
shared::Dna::from_string(&generated.junction_nt)?.into(),
Expand All @@ -45,6 +45,9 @@ fn run_infer() -> Result<()> {
}
model = model.uniform()?;
model.infer(&als, None, &alp, &ifp)?;
model.infer(&als, None, &alp, &ifp)?;
model.infer(&als, None, &alp, &ifp)?;

Ok(())
}

Expand Down Expand Up @@ -73,16 +76,16 @@ fn run_evaluate() -> Result<()> {
let generated = generator.generate(false)?;
let es = shared::EntrySequence::NucleotideCDR3((
shared::Dna::from_string(&generated.junction_nt)?.into(),
model
.get_v_segments()
.into_iter()
.filter(|x| x.name == generated.v_gene)
.collect(),
model
.get_j_segments()
.into_iter()
.filter(|x| x.name == generated.j_gene)
.collect(),
model.genes_matching(&generated.v_gene, false)?,
// .get_v_segments()
// .into_iter()
// .filter(|x| x.name == generated.v_gene)
// .collect(),
model.genes_matching(&generated.j_gene, false)?, // model
// .get_j_segments()
// .into_iter()
// .filter(|x| x.name == generated.j_gene)
// .collect(),
));
model.evaluate(es, &alp, &ifp);
}
Expand Down Expand Up @@ -133,7 +136,48 @@ fn run_evaluate_aa() -> Result<()> {
Ok(())
}

// generate 1'000 nucleotide sequences and infer them
fn run_infer_vj() -> Result<()> {
let mut model = shared::Model::load_from_name(
"human",
"igk",
None,
&Path::new(concat!(
env!("CARGO_MANIFEST_DIR"),
"/righor.data/data/righor_models/"
)),
)?;
let mut generator = shared::Generator::new(&model.clone(), Some(48), None, None)?;
//let mut als = Vec::new();
let mut als = Vec::new();
let ifp = shared::InferenceParameters::default();
let alp = shared::AlignmentParameters::default();
for _ in tqdm!(0..10000) {
let generated = generator.generate(false)?;
let es = shared::EntrySequence::NucleotideCDR3((
shared::Dna::from_string(&generated.junction_nt)?.into(),
model
.get_v_segments()
.into_iter()
.filter(|x| x.name == generated.v_gene)
.collect(),
model
.get_j_segments()
.into_iter()
.filter(|x| x.name == generated.j_gene)
.collect(),
));
als.push(es)
}
model = model.uniform()?;
model.infer(&als, None, &alp, &ifp)?;
model.infer(&als, None, &alp, &ifp)?;
model.infer(&als, None, &alp, &ifp)?;

Ok(())
}

fn main() -> Result<()> {
let _ = run_evaluate()?;
let _ = run_infer()?;
Ok(())
}
21 changes: 20 additions & 1 deletion src/shared/markov_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use foldhash::{HashMap, HashMapExt};
use ndarray::Array2;
use serde::{de, ser::SerializeStruct, Deserialize, Deserializer, Serialize, Serializer};

#[derive(Clone, PartialEq, Debug, Default)]
#[derive(Clone, PartialEq, Debug)]
pub struct DNAMarkovChain {
pub transition_matrix: Array2<f64>,
// pre-computed data for dealing with the degenerate nucleotides
Expand Down Expand Up @@ -65,6 +65,25 @@ impl<'de> Deserialize<'de> for DNAMarkovChain {
}
}

impl Default for DNAMarkovChain {
fn default() -> Self {
Self {
transition_matrix: Array2::eye(4), // Assuming 4x4 identity matrix for DNA bases
degenerate_matrix: Vec::new(),
aa_lone_rev: HashMap::new(),
aa_lone: HashMap::new(),
aa_start_rev: HashMap::new(),
aa_start: HashMap::new(),
aa_middle_rev: HashMap::new(),
aa_middle: HashMap::new(),
aa_end_rev: HashMap::new(),
aa_end: HashMap::new(),
end_degenerate_vector: Vec::new(),
reverse: false,
}
}
}

impl DNAMarkovChain {
pub fn reinitialize(&mut self) -> Result<Self> {
Self::new(&self.transition_matrix, self.reverse)
Expand Down
Loading

0 comments on commit 1ea4eee

Please sign in to comment.