diff --git a/bbconf/_version.py b/bbconf/_version.py
index f102a9c..3b93d0b 100644
--- a/bbconf/_version.py
+++ b/bbconf/_version.py
@@ -1 +1 @@
-__version__ = "0.0.1"
+__version__ = "0.0.2"
diff --git a/bbconf/bbconf.py b/bbconf/bbconf.py
index f337895..5ef1db8 100644
--- a/bbconf/bbconf.py
+++ b/bbconf/bbconf.py
@@ -1,4 +1,5 @@
from elasticsearch import Elasticsearch
+from elasticsearch.exceptions import ConflictError
from logging import getLogger
from attmap import PathExAttMap as PXAM
@@ -33,7 +34,10 @@ def _raise_missing_key(key):
if CFG_BEDSTAT_OUTPUT_KEY not in self[CFG_PATH_KEY]:
_raise_missing_key(CFG_BEDSTAT_OUTPUT_KEY)
-
+
+ if CFG_BEDBUNCHER_OUTPUT_KEY not in self[CFG_PATH_KEY]:
+ _raise_missing_key(CFG_BEDBUNCHER_OUTPUT_KEY)
+
for section, mapping in DEFAULT_SECTION_VALUES.items():
if section not in self:
self[section] = PXAM()
@@ -50,12 +54,16 @@ def establish_elasticsearch_connection(self, host=None):
:return elasticsearch.Elasticsearch: connected client
"""
if hasattr(self, ES_CLIENT_KEY):
- raise BedBaseConnectionError("The connection is already established: {}".
- format(str(self[ES_CLIENT_KEY])))
+ raise BedBaseConnectionError(
+ "The connection is already established: {}".
+ format(str(self[ES_CLIENT_KEY]))
+ )
hst = host or self[CFG_DATABASE_KEY][CFG_HOST_KEY]
self[ES_CLIENT_KEY] = Elasticsearch([{"host": hst}])
- _LOGGER.info("Established connection with Elasticsearch: {}".format(hst))
- _LOGGER.debug("Elasticsearch info:\n{}".format(self[ES_CLIENT_KEY].info()))
+ _LOGGER.info("Established connection with Elasticsearch: {}".
+ format(hst))
+ _LOGGER.debug("Elasticsearch info:\n{}".
+ format(self[ES_CLIENT_KEY].info()))
def assert_connection(self):
"""
@@ -64,25 +72,36 @@ def assert_connection(self):
:raise BedBaseConnectionError: if there is no active connection
"""
if not hasattr(self, ES_CLIENT_KEY):
- raise BedBaseConnectionError("No active connection with Elasticsearch")
+ raise BedBaseConnectionError(
+ "No active connection with Elasticsearch"
+ )
- def _search_index(self, index_name, query, just_data=True):
+ def _search_index(self, index_name, query, just_data=True, size=None,
+ **kwargs):
"""
Search selected Elasticsearch index with selected query
:param str index_name: name of the Elasticsearch index to search
:param dict query: query to search the DB against
:param bool just_data: whether just the hits should be returned
- :return dict | Iterable[dict]: search results
+ :param int size: number of hits to return, all are returned by default
+ :return dict | Iterable[dict] | NoneType: search results
+ or None if requested index does not exist
"""
self.assert_connection()
- _LOGGER.debug("Searching index: {}\nQuery: {}".format(index_name, query))
+ if not self[ES_CLIENT_KEY].indices.exists(index_name):
+ _LOGGER.warning("'{}' index does not exist".format(index_name))
+ return
+ _LOGGER.debug("Searching index: {}\nQuery: {}".
+ format(index_name, query))
query = {"query": query} if "query" not in query else query
- search_results = self[ES_CLIENT_KEY].search(index=index_name, body=query)
+ size = size or self._count_docs(index=index_name)
+ search_results = self[ES_CLIENT_KEY].search(
+ index=index_name, body=query, size=size, **kwargs)
return [r["_source"] for r in search_results["hits"]["hits"]] \
if just_data else search_results
- def search_bedfiles(self, query, just_data=True):
+ def search_bedfiles(self, query, just_data=True, **kwargs):
"""
Search selected Elasticsearch bedset index with selected query
@@ -90,9 +109,10 @@ def search_bedfiles(self, query, just_data=True):
:param bool just_data: whether just the hits should be returned
:return dict | Iterable[dict]: search results
"""
- return self._search_index(index_name=BED_INDEX, query=query, just_data=just_data)
+ return self._search_index(index_name=BED_INDEX, query=query,
+ just_data=just_data, **kwargs)
- def search_bedsets(self, query, just_data=True):
+ def search_bedsets(self, query, just_data=True, **kwargs):
"""
Search selected Elasticsearch bedfiles index with selected query
@@ -100,36 +120,77 @@ def search_bedsets(self, query, just_data=True):
:param bool just_data: whether just the hits should be returned
:return dict | Iterable[dict]: search results
"""
- return self._search_index(index_name=BEDSET_INDEX, query=query, just_data=just_data)
+ return self._search_index(index_name=BEDSET_INDEX, query=query,
+ just_data=just_data, **kwargs)
- def _insert_data(self, index, data, **kwargs):
+ def _insert_data(self, index, data, doc_id, force_update=False, **kwargs):
"""
- Insert data to an index in a Elasticsearch DB
- or create it and the insert in case it does not exist
+ Insert document to an index in a Elasticsearch DB
+ or create it and the insert in case it does not exist.
+
+ Document ID argument is optional. If not provided, a random ID
+ will be assigned.
+ If provided the document will be inserted only if no documents with
+ this ID are present in the DB. However, the document overwriting
+ can be forced if needed.
:param str index: name of the index to insert the data into
+ :param str doc_id: unique identifier for the document
+ :param bool force_update: whether the pre-existing document
+ should be overwritten
:param dict data: data to insert
"""
self.assert_connection()
- self[ES_CLIENT_KEY].index(index=index, body=data, **kwargs)
-
- def insert_bedfiles_data(self, data, **kwargs):
+ if doc_id is None:
+ _LOGGER.info("Inserting document to index '{}' with an "
+ "automatically-assigned ID".format(index))
+ self[ES_CLIENT_KEY].index(index=index, body=data, **kwargs)
+ else:
+ try:
+ self[ES_CLIENT_KEY].create(index=index, body=data, id=doc_id,
+ **kwargs)
+ except ConflictError:
+ msg_base = "Document '{}' already exists in index '{}'"\
+ .format(doc_id, index)
+ if force_update:
+ _LOGGER.info(msg_base + ". Forcing update")
+ self[ES_CLIENT_KEY].index(index=index, body=data, id=doc_id,
+ **kwargs)
+ else:
+ _LOGGER.error("Could not insert data. " + msg_base)
+ raise
+
+ def insert_bedfiles_data(self, data, doc_id=None, **kwargs):
"""
Insert data to the bedfile index a Elasticsearch DB
- or create it and the insert in case it does not exist
+ or create it and the insert in case it does not exist.
+
+ Document ID argument is optional. If not provided, a random ID will
+ be assigned. If provided the document will be inserted only if no
+ documents with this ID are present in the DB. However, the document
+ overwriting can be forced if needed.
:param dict data: data to insert
+ :param str doc_id: unique identifier for the document, optional
"""
- self._insert_data(index=BED_INDEX, data=data, **kwargs)
+ self._insert_data(index=BED_INDEX, data=data, doc_id=doc_id, **kwargs)
- def insert_bedsets_data(self, data, **kwargs):
+ def insert_bedsets_data(self, data, doc_id=None, **kwargs):
"""
Insert data to the bedset index in a Elasticsearch DB
- or create it and the insert in case it does not exist
+ or create it and the insert in case it does not exist.
+
+ Document ID argument is optional. If not provided, a random ID will
+ be assigned.
+ If provided the document will be inserted only if no documents with
+ this ID are present in the DB.
+ However, the document overwriting can be forced if needed.
:param dict data: data to insert
+ :param str doc_id: unique identifier for the document, optional
"""
- self._insert_data(index=BEDSET_INDEX, data=data, **kwargs)
+ self._insert_data(index=BEDSET_INDEX, data=data, doc_id=doc_id,
+ **kwargs)
def _get_mapping(self, index, just_data=True, **kwargs):
"""
@@ -140,7 +201,8 @@ def _get_mapping(self, index, just_data=True, **kwargs):
"""
self.assert_connection()
mapping = self[ES_CLIENT_KEY].indices.get_mapping(index, **kwargs)
- return mapping[index]["mappings"]["properties"] if just_data else mapping
+ return mapping[index]["mappings"]["properties"] \
+ if just_data else mapping
def get_bedfiles_mapping(self, just_data=True, **kwargs):
"""
@@ -156,17 +218,50 @@ def get_bedsets_mapping(self, just_data=True, **kwargs):
:return dict: besets mapping definitions
"""
- return self._get_mapping(index=BEDSET_INDEX, just_data=just_data, **kwargs)
+ return self._get_mapping(index=BEDSET_INDEX, just_data=just_data,
+ **kwargs)
+
+ def _get_doc(self, index, doc_id):
+ """
+ Get a document from an index by its ID
+
+ :param str index: name of the index to search
+ :param str doc_id: document ID to return
+ :return Mapping: matched document
+ """
+ return self[ES_CLIENT_KEY].get(index=index, id=doc_id)
+
+ def get_bedfiles_doc(self, doc_id):
+ """
+ Get a document from bedfiles index by its ID
+
+ :param str doc_id: document ID to return
+ :return Mapping: matched document
+ """
+ return self._get_doc(index=BED_INDEX, doc_id=doc_id)
+
+ def get_bedsets_doc(self, doc_id):
+ """
+ Get a document from bedsets index by its ID
+
+ :param str doc_id: document ID to return
+ :return Mapping: matched document
+ """
+ return self._get_doc(index=BEDSET_INDEX, doc_id=doc_id)
def _count_docs(self, index):
"""
Get the total number of the documents in a selected index
:param str index: index to count the documents for
- :return int: number of documents
+ :return int | None: number of documents
"""
self.assert_connection()
- return int(self[ES_CLIENT_KEY].cat.count(index, params={"format": "json"})[0]['count'])
+ if not self[ES_CLIENT_KEY].indices.exists(index=index):
+ _LOGGER.warning("'{}' index does not exist".format(index))
+ return None
+ return int(self[ES_CLIENT_KEY].cat.count(
+ index, params={"format": "json"})[0]['count'])
def count_bedfiles_docs(self):
"""
@@ -184,22 +279,58 @@ def count_bedsets_docs(self):
"""
return self._count_docs(index=BEDSET_INDEX)
+ def _delete_index(self, index):
+ """
+ Delete selected index from Elasticsearch
+
+ :param str index: name of the index to delete
+ """
+ self.assert_connection()
+ self[ES_CLIENT_KEY].indices.delete(index=index)
+
+ def delete_bedfiles_index(self):
+ """
+ Delete bedfiles index from Elasticsearch
+ """
+ self._delete_index(index=BED_INDEX)
+
+ def delete_bedsets_index(self):
+ """
+ Delete bedsets index from Elasticsearch
+ """
+ self._delete_index(index=BEDSET_INDEX)
+
+ def _get_all(self, index_name, just_data=False):
+ """
+ Convenience method for index exploration
+
+ :param str index_name: name of the Elasticsearch index to search
+ :param bool just_data: whether just the hits should be returned
+ :return:
+ """
+ self.assert_connection()
+ return self._search_index(index_name=index_name, query=QUERY_ALL,
+ just_data=just_data)
+
def get_bedbase_cfg(cfg=None):
"""
Determine path to the bedbase configuration file
- The path can be either excplicitly provided
+ The path can be either explicitly provided
or read from a $BEDBASE environment variable
:param str cfg: path to the config file.
Optional, the $BEDBASE config env var will be used if not provided
:return str: configuration file path
"""
- selected_cfg = yacman.select_config(config_filepath=cfg, config_env_vars=CFG_ENV_VARS)
+ selected_cfg = yacman.select_config(config_filepath=cfg,
+ config_env_vars=CFG_ENV_VARS)
if not selected_cfg:
- raise BedBaseConnectionError("You must provide a config file or set the {} "
- "environment variable".format("or ".join(CFG_ENV_VARS)))
+ raise BedBaseConnectionError(
+ "You must provide a config file or set the {} environment variable"
+ .format("or ".join(CFG_ENV_VARS))
+ )
return selected_cfg
diff --git a/bbconf/const.py b/bbconf/const.py
index f218480..d228084 100644
--- a/bbconf/const.py
+++ b/bbconf/const.py
@@ -9,9 +9,6 @@
CFG_ENV_VARS = ["BEDBASE"]
-SEARCH_TERMS = ['cellType', 'cellTypeSubtype', 'antibody', 'mappingGenome',
- 'description', 'tissue', 'species', 'protocol', 'genome']
-
RAW_BEDFILE_KEY = "raw_bedfile"
BEDFILE_PATH_KEY = "bedfile_path"
@@ -22,6 +19,8 @@
ES_CLIENT_KEY = "elasticsearch_client"
+QUERY_ALL = {"match_all": {}}
+
# config file constants
CFG_PATH_KEY = "path"
CFG_SERVER_KEY = "server"
@@ -29,9 +28,14 @@
CFG_HOST_KEY = "host"
CFG_PORT_KEY = "port"
CFG_BEDSTAT_OUTPUT_KEY = "bedstat_output"
+CFG_BEDBUNCHER_OUTPUT_KEY = "bedbuncher_output"
CFG_BED_INDEX_KEY = "bed_index"
CFG_BEDSET_INDEX_KEY = "bedset_index"
+CFG_KEYS = [
+ "CFG_PATH_KEY", "CFG_SERVER_KEY", "CFG_DATABASE_KEY", "CFG_HOST_KEY",
+ "CFG_PORT_KEY", "CFG_BEDSTAT_OUTPUT_KEY", "CFG_BEDBUNCHER_OUTPUT_KEY",
+ "CFG_BED_INDEX_KEY", "CFG_BEDSET_INDEX_KEY"]
DEFAULT_SECTION_VALUES = {
CFG_DATABASE_KEY: {
@@ -47,9 +51,102 @@
IDX_MAP = {CFG_BED_INDEX_KEY: BED_INDEX, CFG_BEDSET_INDEX_KEY: BEDSET_INDEX}
-CFG_KEYS = ["CFG_PATH_KEY", "CFG_SERVER_KEY", "CFG_DATABASE_KEY", "CFG_HOST_KEY",
- "CFG_PORT_KEY", "CFG_BEDSTAT_OUTPUT_KEY", "CFG_BED_INDEX_KEY", "CFG_BEDSET_INDEX_KEY"]
+# JSON bed metadata constants and descriptions
+# (the keys are actually established in bedstat/tools/regionstat.R)
+JSON_GC_CONTENT_KEY = "gc_content"
+JSON_ID_KEY = "id"
+JSON_GENOME_KEY = "genome"
+JSON_PROTOCOL_KEY = "exp_protocol"
+JSON_CELL_TYPE_KEY = "cell_type"
+JSON_TISSUE_KEY = "tissue"
+JSON_ANTIBODY_KEY = "antibody"
+JSON_TREATMENT_KEY = "treatment"
+JSON_DATA_SOURCE_KEY = "data_source"
+JSON_DESCRIPTION_KEY = "description"
+JSON_REGIONS_NO_KEY = "regions_no"
+JSON_MEAN_ABS_TSS_DIST_KEY = "mean_absolute_TSS_dist"
+JSON_MEAN_REGION_WIDTH = "mean_region_width"
+JSON_MD5SUM_KEY = "md5sum"
+JSON_PLOTS_KEY = "plots"
+JSON_EXON_FREQUENCY_KEY = "exon_frequency"
+JSON_INTRON_FREQUENCY_KEY = "intron_frequency"
+JSON_INTERGENIC_FREQUENCY_KEY = "intergenic_frequency"
+JSON_PROMOTERCORE_FREQUENCY_KEY = "promoterCore_frequency"
+JSON_PROMOTERPROX_FREQUENCY_KEY = "promoterProx_frequency"
+JSON_EXON_PERCENTAGE_KEY = "exon_percentage"
+JSON_INTRON_PERCENTAGE_KEY = "intron_percentage"
+JSON_INTERGENIC_PERCENTAGE_KEY = "intergenic_percentage"
+JSON_PROMOTERCORE_PERCENTAGE_KEY = "promoterCore_percentage"
+JSON_PROMOTERPROX_PERCENTAGE_KEY = "promoterProx_percentage"
+JSON_BEDSET_PEP_KEY = "bedset_pep"
+JSON_BEDSET_BED_IDS_KEY = "bedset_bed_ids"
+
+JSON_METADATA_NAMES = ["JSON_GENOME_KEY", "JSON_PROTOCOL_KEY", "JSON_CELL_TYPE_KEY", "JSON_TISSUE_KEY", "JSON_ANTIBODY_KEY",
+ "JSON_TREATMENT_KEY", "JSON_DATA_SOURCE_KEY", "JSON_DESCRIPTION_KEY",
+ "JSON_ID_KEY", "JSON_MD5SUM_KEY", "JSON_PLOTS_KEY", "BEDFILE_PATH_KEY"]
+
+JSON_METADATA_VALUES = [eval(x) for x in JSON_METADATA_NAMES]
+
+JSON_STATS_SECTION_KEY = "statistics"
+JSON_METADATA_SECTION_KEY = "metadata"
+
+JSON_NUMERIC_KEY_NAMES = [
+ "JSON_GC_CONTENT_KEY", "JSON_REGIONS_NO_KEY", "JSON_MEAN_ABS_TSS_DIST_KEY", "JSON_MEAN_REGION_WIDTH",
+ "JSON_EXON_FREQUENCY_KEY", "JSON_INTRON_FREQUENCY_KEY", "JSON_PROMOTERPROX_FREQUENCY_KEY",
+ "JSON_INTERGENIC_FREQUENCY_KEY", "JSON_PROMOTERCORE_FREQUENCY_KEY",
+ "JSON_PROMOTERPROX_PERCENTAGE_KEY", "JSON_EXON_PERCENTAGE_KEY", "JSON_INTRON_PERCENTAGE_KEY",
+ "JSON_INTERGENIC_PERCENTAGE_KEY", "JSON_PROMOTERCORE_PERCENTAGE_KEY"]
+
+JSON_NUMERIC_KEY_VALUES = [eval(x) for x in JSON_NUMERIC_KEY_NAMES]
+
+JSON_BEDSET_MEANS_KEY = "bedset_means"
+JSON_BEDSET_SD_KEY = "bedset_standard_deviation"
+JSON_BEDSET_TAR_PATH_KEY = "bedset_tar_archive_path"
+JSON_BEDSET_BEDFILES_GD_STATS_KEY = "bedset_bedfiles_gd_stats"
+JSON_BEDSET_IGD_DB_KEY = "bedset_igd_database_path"
+JSON_BEDSET_GD_STATS_KEY = "bedset_gd_stats"
+JSON_BEDSET_KEY_VALUES = [
+ JSON_BEDSET_MEANS_KEY, JSON_BEDSET_SD_KEY, JSON_BEDSET_TAR_PATH_KEY,
+ JSON_BEDSET_BEDFILES_GD_STATS_KEY, JSON_BEDSET_IGD_DB_KEY, JSON_BEDSET_GD_STATS_KEY]
+JSON_BEDSET_KEY_NAMES = [
+ "JSON_BEDSET_MEANS_KEY", "JSON_BEDSET_SD_KEY", "JSON_BEDSET_TAR_PATH_KEY",
+ "JSON_BEDSET_BEDFILES_GD_STATS_KEY", "JSON_BEDSET_IGD_DB_KEY",
+ "JSON_BEDSET_GD_STATS_KEY", "JSON_BEDSET_PEP_KEY", "JSON_BEDSET_BED_IDS_KEY"]
+
+JSON_KEYS = ["JSON_GC_CONTENT_KEY", "JSON_ID_KEY", "JSON_PLOTS_KEY",
+ "JSON_MD5SUM_KEY", "JSON_STATS_SECTION_KEY", "JSON_METADATA_VALUES",
+ "JSON_METADATA_SECTION_KEY"] + JSON_NUMERIC_KEY_NAMES + JSON_BEDSET_KEY_NAMES + JSON_METADATA_NAMES
+
+
+_PERC_TXT = "Percentage of regions in "
+_FREQ_TXT = "Frequency of regions in "
+JSON_DICTS_KEY_DESCS = {
+ JSON_GC_CONTENT_KEY: "GC content", JSON_ID_KEY: "BED file ID",
+ JSON_REGIONS_NO_KEY: "Number of regions", JSON_MD5SUM_KEY: "BED file md5 checksum",
+ JSON_MEAN_ABS_TSS_DIST_KEY: "Mean absolute distance from transcription start sites",
+ JSON_MEAN_REGION_WIDTH: "Mean width of the regions in the BED file",
+ JSON_PROMOTERPROX_PERCENTAGE_KEY: _PERC_TXT + "promoter proximity",
+ JSON_PROMOTERCORE_PERCENTAGE_KEY: _PERC_TXT + "promoter core",
+ JSON_EXON_PERCENTAGE_KEY: _PERC_TXT + "exons",
+ JSON_INTRON_PERCENTAGE_KEY: _PERC_TXT + "introns",
+ JSON_INTERGENIC_PERCENTAGE_KEY: _PERC_TXT + "intergenic",
+ JSON_PROMOTERPROX_FREQUENCY_KEY: _FREQ_TXT + "promoter proximity",
+ JSON_PROMOTERCORE_FREQUENCY_KEY: _FREQ_TXT + "promoter core",
+ JSON_EXON_FREQUENCY_KEY: _FREQ_TXT + "exons",
+ JSON_INTRON_FREQUENCY_KEY: _FREQ_TXT + "introns",
+ JSON_INTERGENIC_FREQUENCY_KEY: _FREQ_TXT + "intergenic",
+ JSON_BEDSET_MEANS_KEY: "Average bedset statistics",
+ JSON_BEDSET_SD_KEY: "Standard deviation of bedset statistics",
+ JSON_BEDSET_TAR_PATH_KEY: "TAR archive",
+ JSON_BEDSET_BEDFILES_GD_STATS_KEY: "Individual bedfiles statistics CSV",
+ JSON_BEDSET_IGD_DB_KEY: "Bedset iGD database",
+ JSON_BEDSET_GD_STATS_KEY: "Bedset statistics CSV",
+ JSON_BEDSET_PEP_KEY: "Beset PEP",
+ JSON_BEDSET_BED_IDS_KEY: "BED files in this set"
+}
-__all__ = ["BED_INDEX", "BEDSET_INDEX", "SEARCH_TERMS", "RAW_BEDFILE_KEY", "CFG_ENV_VARS",
+__all__ = ["BED_INDEX", "BEDSET_INDEX", "RAW_BEDFILE_KEY", "CFG_ENV_VARS",
"ES_CLIENT_KEY", "DB_DEFAULT_HOST", "SERVER_DEFAULT_PORT", "SERVER_DEFAULT_HOST",
- "PKG_NAME", "IDX_MAP", "BEDFILE_PATH_KEY", "DEFAULT_SECTION_VALUES"] + CFG_KEYS
+ "PKG_NAME", "IDX_MAP", "BEDFILE_PATH_KEY", "DEFAULT_SECTION_VALUES", "JSON_DICTS_KEY_DESCS",
+ "JSON_KEYS", "JSON_NUMERIC_KEY_VALUES", "JSON_NUMERIC_KEY_NAMES", "JSON_BEDSET_KEY_VALUES",
+ "JSON_BEDSET_KEY_NAMES", "QUERY_ALL", "JSON_METADATA_NAMES", "JSON_METADATA_VALUES"] + CFG_KEYS + JSON_KEYS
diff --git a/config.yaml b/config.yaml
index 0769577..4ff5c49 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,7 +1,7 @@
# full config example. Refer to bbconf/const.py for key names and default values
path:
- bedstat_output: $HOME/results_pipeline
+ pipelines_output: $HOME/results_pipeline
database:
host: localhost
diff --git a/config_min.yaml b/config_min.yaml
index aa3ef32..08eb350 100644
--- a/config_min.yaml
+++ b/config_min.yaml
@@ -1,4 +1,5 @@
# min config example. Refer to bbconf/const.py for key names and default values
path:
- bedstat_output: $HOME/results_pipeline
\ No newline at end of file
+ bedstat_output: $LABROOT/resources/regions/bedstat_output
+ bedbuncher_output: $LABROOT/resources/regions/bedbuncher_output
\ No newline at end of file
diff --git a/docs/bbc_api.md b/docs/bbc_api.md
index 62d0d70..8de8432 100644
--- a/docs/bbc_api.md
+++ b/docs/bbc_api.md
@@ -1,3 +1,33 @@
+Final targets: BedBaseConf, get_bedbase_cfg
+
+
+
+
+
# Package `bbconf` Documentation
## Class `BedBaseConf`
@@ -58,6 +88,22 @@ Get the total number of the documents in the bedsets index
+```python
+def delete_bedfiles_index(self)
+```
+
+Delete bedfiles index from Elasticsearch
+
+
+
+```python
+def delete_bedsets_index(self)
+```
+
+Delete bedsets index from Elasticsearch
+
+
+
```python
def establish_elasticsearch_connection(self, host=None)
```
@@ -82,6 +128,23 @@ Return the path to the config file or None if not set
+```python
+def get_bedfiles_doc(self, doc_id)
+```
+
+Get a document from bedfiles index by its ID
+#### Parameters:
+
+- `doc_id` (`str`): document ID to return
+
+
+#### Returns:
+
+- `Mapping`: matched document
+
+
+
+
```python
def get_bedfiles_mapping(self, just_data=True, **kwargs)
```
@@ -94,6 +157,23 @@ Get mapping definitions for the bedfiles index
+```python
+def get_bedsets_doc(self, doc_id)
+```
+
+Get a document from bedsets index by its ID
+#### Parameters:
+
+- `doc_id` (`str`): document ID to return
+
+
+#### Returns:
+
+- `Mapping`: matched document
+
+
+
+
```python
def get_bedsets_mapping(self, just_data=True, **kwargs)
```
@@ -107,31 +187,44 @@ Get mapping definitions for the bedsets index
```python
-def insert_bedfiles_data(self, data, **kwargs)
+def insert_bedfiles_data(self, data, doc_id=None, **kwargs)
```
-Insert data to the bedfile index a Elasticsearch DB or create it and the insert in case it does not exist
+Insert data to the bedfile index a Elasticsearch DB or create it and the insert in case it does not exist.
+
+Document ID argument is optional. If not provided, a random ID will
+be assigned. If provided the document will be inserted only if no
+documents with this ID are present in the DB. However, the document
+overwriting can be forced if needed.
#### Parameters:
- `data` (`dict`): data to insert
+- `doc_id` (`str`): unique identifier for the document, optional
```python
-def insert_bedsets_data(self, data, **kwargs)
+def insert_bedsets_data(self, data, doc_id=None, **kwargs)
```
-Insert data to the bedset index in a Elasticsearch DB or create it and the insert in case it does not exist
+Insert data to the bedset index in a Elasticsearch DB or create it and the insert in case it does not exist.
+
+Document ID argument is optional. If not provided, a random ID will
+be assigned.
+If provided the document will be inserted only if no documents with
+this ID are present in the DB.
+However, the document overwriting can be forced if needed.
#### Parameters:
- `data` (`dict`): data to insert
+- `doc_id` (`str`): unique identifier for the document, optional
```python
-def search_bedfiles(self, query, just_data=True)
+def search_bedfiles(self, query, just_data=True, **kwargs)
```
Search selected Elasticsearch bedset index with selected query
@@ -149,7 +242,7 @@ Search selected Elasticsearch bedset index with selected query
```python
-def search_bedsets(self, query, just_data=True)
+def search_bedsets(self, query, just_data=True, **kwargs)
```
Search selected Elasticsearch bedfiles index with selected query
@@ -182,10 +275,13 @@ Return writability flag or None if not set
def get_bedbase_cfg(cfg=None)
```
-Read and create the bedbase configuration object
+Determine path to the bedbase configuration file
+
+The path can be either explicitly provided
+or read from a $BEDBASE environment variable
#### Parameters:
-- `cfg` (`str`): path to the config file.Optional, the bedbase config env var will be used if not provided
+- `cfg` (`str`): path to the config file.Optional, the $BEDBASE config env var will be used if not provided
#### Returns:
@@ -198,4 +294,4 @@ Read and create the bedbase configuration object
-*Version Information: `bbconf` v0.0.1, generated by `lucidoc` v0.4.2*
+*Version Information: `bbconf` v0.0.2, generated by `lucidoc` v0.4.3*
diff --git a/docs/changelog.md b/docs/changelog.md
index a53f6a2..30bd0fd 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -2,6 +2,19 @@
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.
-## [0.0.1] - unreleased
+## [0.0.2] - 2020-05-28
+### Added
+- index deleting methods:
+ - `delete_bedsets_index`
+ - `delete_bedfiles_index`
+- multiple new keys constants
+
+### Changed
+- make `search_bedfiles` and `search_bedsets` methods return all hits by default instead of just 10. Parametrize it.
+- added more arguments to `insert_bedfiles_data` and `insert_bedsets_data` method interfaces: `doc_id` and `force_update`
+- Elasticsearch documents are inserted into the indices more securily, `insert_*` methods prevent documents duplication
+
+
+## [0.0.1] - 2020-02-05
### Added
- initial project release
\ No newline at end of file