Skip to content

Commit 594f46b

Browse files
committed
bugfix for cache, more information in nodes
1 parent 6c472d4 commit 594f46b

File tree

12 files changed

+278
-228
lines changed

12 files changed

+278
-228
lines changed

Makefile

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,31 @@
55
#
66

77
# Default OBO file.
8-
URL = http://current.geneontology.org/ontology/go-basic.obo
9-
OBO = tmp/go-basic.obo
8+
OBO_URL = http://current.geneontology.org/ontology/go-basic.obo
9+
OBO_FILE = tmp/go-basic.obo
10+
11+
# The gene association file.
12+
GAF_URL = https://current.geneontology.org/annotations/goa_human.gaf.gz
13+
GAF_FILE = tmp/goa_human.gaf.gz
1014

1115
# Usage information.
1216
usage:
1317
@echo "#"
1418
@echo "# Use the source, Luke!"
1519
@echo "#"
1620

21+
# Download the gene ontology file
22+
${OBO_FILE}:
23+
mkdir -p $(dir ${OBO_FILE})
24+
curl -L ${OBO_URL} > ${OBO_FILE}
25+
26+
${GAF_FILE}:
27+
mkdir -p $(dir ${GAF_FILE})
28+
curl -L ${GAF_URL} > ${GAF_FILE}
29+
30+
index: ${OBO_FILE} ${GAF_FILE}
31+
genescape build --obo ${OBO_FILE} --gaf ${GAF_FILE} --index ${GAF_FILE}.index.gz
32+
1733
# Performs the testing.
1834
web:
1935
python src/genescape/server.py
@@ -26,10 +42,6 @@ test:
2642
testall: test
2743
(cd test && make test)
2844

29-
# A full test with file generation.
30-
testall: test
31-
(cd test && make test)
32-
3345
# Runs a linter.
3446
lint:
3547
hatch run lint:style

src/genescape/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.8.0"
1+
__version__ = "0.8.1"

src/genescape/annot.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ def build():
3333
return idx
3434

3535
# Cache the index
36-
idx = resources.cache("index", func=build)
36+
key = f"file_{index}"
37+
idx = resources.cache(key, func=build)
3738

3839
# Extract the gene names from the data
3940
names = map(lambda x: x[utils.GID], data[utils.DATA_FIELD])
@@ -109,7 +110,9 @@ def go2ns(goid):
109110
res = []
110111
data_fields = [utils.GID, "root", "count", "function", utils.SOURCE, "count", "size", utils.LABEL]
111112

113+
112114
for goid, cnt, func in counts:
115+
113116
label = f"({cnt}/{n_size})"
114117
funcs = func2name.get(goid, [])
115118
name_space = go2ns(goid)
@@ -152,17 +155,21 @@ def ann2csv(ann):
152155

153156

154157
if __name__ == "__main__":
158+
159+
# Get default config
160+
cnf = resources.get_config()
161+
155162
# Initialize the resources
156-
res = resources.init()
163+
res = resources.init(cnf)
157164

158165
inp = res.TEST_GENES
159-
index = res.INDEX
166+
ind = res.INDEX
160167

161168
# Read the genelist
162169
stream = utils.get_stream(inp=inp)
163170

164171
data = utils.parse_terms(iterable=stream)
165172

166-
out = run(data=data, index=index, csvout=True)
173+
out = run(data=data, index=ind, csvout=True)
167174

168175
print(out)

src/genescape/build.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,33 @@ def make_index(obo, gaf, index, with_synonyms=False):
8989
sym2go.setdefault(sym, []).append(go_id)
9090
go2sym.setdefault(go_id, []).append(sym)
9191

92+
# Remove duplicates
93+
for key in sym2go:
94+
sym2go[key] = list(set(sym2go[key]))
95+
96+
for key in go2sym:
97+
go2sym[key] = list(set(go2sym[key]))
98+
9299
utils.info(f"reading: {obo}")
93100
stream = gzip.open(obo, mode="rt", encoding="utf-8") if obo.name.endswith(".gz") else open(obo)
94101
terms = parse_obo(stream)
95102
terms = filter(lambda x: not x.get("is_obsolete"), terms)
96103
terms = list(terms)
97104

105+
# The total number of symbols
106+
sym_count = len(sym2go) or 1
107+
108+
# Number of genes annotated with this GO term.
109+
def gene_count(goid):
110+
return len(go2sym.get(goid, []))
111+
98112
# Create a dictionary of GO terms
99113
obo_dict = {}
100114
for term in terms:
115+
goid = term["id"]
116+
gcnt = gene_count(goid)
117+
term[utils.GO2GENE_COUNT] = gcnt
118+
term[utils.GO2GENE_PERC] = (gcnt / sym_count) * 100
101119
obo_dict[term["id"]] = term
102120

103121
# Database metadata
-258 KB
Binary file not shown.
-226 KB
Binary file not shown.

src/genescape/resources.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
def cache(key, func):
1919
global CACHE
2020

21+
#key = str(key)
22+
23+
utils.info(f"cache key: {key}")
24+
2125
if key not in CACHE:
2226
CACHE[key] = func()
2327

@@ -65,7 +69,7 @@ def get(name):
6569

6670
def get_index(self, code):
6771
for value in self.config.get("index", []):
68-
if value["code"] == code:
72+
if value.get("code", "") == code:
6973
return value["path"]
7074
utils.error("index code not found: {code}")
7175
return self.INDEX

src/genescape/server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ def draw():
117117
count = safe_int(count, default=1)
118118
db = get_param(request, name='db')
119119

120+
#resources.clear_cache()
121+
120122
# Choose a different index
121123
index = res.get_index(db)
122124

src/genescape/tree.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ def build_onto_graph(index):
3030
name = node["name"]
3131
text = textwrap.fill(name, width=20)
3232
label = f"{oid}\n{text}"
33+
gperc = node.get(utils.GO2GENE_PERC, 0)
34+
if gperc > 0.09:
35+
label += f": {gperc:.1f}%"
3336
namespace = utils.NAMESPACE_MAP.get(node["namespace"], "?")
3437
graph.add_node(oid, id=oid, name=name, namespace=namespace, label=label, **utils.NODE_ATTRS)
3538

@@ -209,7 +212,8 @@ def build():
209212
graph = build_onto_graph(index)
210213
return graph
211214

212-
graph = resources.cache("graph", func=build)
215+
key = f"graph_{index}"
216+
graph = resources.cache(key, func=build)
213217

214218
# Generate the tree from the graph.
215219
tree = make_tree(ann=ann, graph=graph)

src/genescape/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ def stop(msg):
8989
# Namespace categories
9090
NS_BP, NS_MF, NS_CC, NS_ALL = "BP", "MF", "CC", "ALL"
9191

92+
# Additional fields
93+
GO2GENE_COUNT, GO2GENE_PERC = "gene_count", "gene_percent"
94+
9295
# Map the GO categories namespaces.
9396
NAMESPACE_MAP = {
9497
"biological_process": NS_BP,

0 commit comments

Comments
 (0)