diff --git a/bbconf/_version.py b/bbconf/_version.py index f102a9c..3b93d0b 100644 --- a/bbconf/_version.py +++ b/bbconf/_version.py @@ -1 +1 @@ -__version__ = "0.0.1" +__version__ = "0.0.2" diff --git a/bbconf/bbconf.py b/bbconf/bbconf.py index f337895..5ef1db8 100644 --- a/bbconf/bbconf.py +++ b/bbconf/bbconf.py @@ -1,4 +1,5 @@ from elasticsearch import Elasticsearch +from elasticsearch.exceptions import ConflictError from logging import getLogger from attmap import PathExAttMap as PXAM @@ -33,7 +34,10 @@ def _raise_missing_key(key): if CFG_BEDSTAT_OUTPUT_KEY not in self[CFG_PATH_KEY]: _raise_missing_key(CFG_BEDSTAT_OUTPUT_KEY) - + + if CFG_BEDBUNCHER_OUTPUT_KEY not in self[CFG_PATH_KEY]: + _raise_missing_key(CFG_BEDBUNCHER_OUTPUT_KEY) + for section, mapping in DEFAULT_SECTION_VALUES.items(): if section not in self: self[section] = PXAM() @@ -50,12 +54,16 @@ def establish_elasticsearch_connection(self, host=None): :return elasticsearch.Elasticsearch: connected client """ if hasattr(self, ES_CLIENT_KEY): - raise BedBaseConnectionError("The connection is already established: {}". - format(str(self[ES_CLIENT_KEY]))) + raise BedBaseConnectionError( + "The connection is already established: {}". + format(str(self[ES_CLIENT_KEY])) + ) hst = host or self[CFG_DATABASE_KEY][CFG_HOST_KEY] self[ES_CLIENT_KEY] = Elasticsearch([{"host": hst}]) - _LOGGER.info("Established connection with Elasticsearch: {}".format(hst)) - _LOGGER.debug("Elasticsearch info:\n{}".format(self[ES_CLIENT_KEY].info())) + _LOGGER.info("Established connection with Elasticsearch: {}". + format(hst)) + _LOGGER.debug("Elasticsearch info:\n{}". + format(self[ES_CLIENT_KEY].info())) def assert_connection(self): """ @@ -64,25 +72,36 @@ def assert_connection(self): :raise BedBaseConnectionError: if there is no active connection """ if not hasattr(self, ES_CLIENT_KEY): - raise BedBaseConnectionError("No active connection with Elasticsearch") + raise BedBaseConnectionError( + "No active connection with Elasticsearch" + ) - def _search_index(self, index_name, query, just_data=True): + def _search_index(self, index_name, query, just_data=True, size=None, + **kwargs): """ Search selected Elasticsearch index with selected query :param str index_name: name of the Elasticsearch index to search :param dict query: query to search the DB against :param bool just_data: whether just the hits should be returned - :return dict | Iterable[dict]: search results + :param int size: number of hits to return, all are returned by default + :return dict | Iterable[dict] | NoneType: search results + or None if requested index does not exist """ self.assert_connection() - _LOGGER.debug("Searching index: {}\nQuery: {}".format(index_name, query)) + if not self[ES_CLIENT_KEY].indices.exists(index_name): + _LOGGER.warning("'{}' index does not exist".format(index_name)) + return + _LOGGER.debug("Searching index: {}\nQuery: {}". + format(index_name, query)) query = {"query": query} if "query" not in query else query - search_results = self[ES_CLIENT_KEY].search(index=index_name, body=query) + size = size or self._count_docs(index=index_name) + search_results = self[ES_CLIENT_KEY].search( + index=index_name, body=query, size=size, **kwargs) return [r["_source"] for r in search_results["hits"]["hits"]] \ if just_data else search_results - def search_bedfiles(self, query, just_data=True): + def search_bedfiles(self, query, just_data=True, **kwargs): """ Search selected Elasticsearch bedset index with selected query @@ -90,9 +109,10 @@ def search_bedfiles(self, query, just_data=True): :param bool just_data: whether just the hits should be returned :return dict | Iterable[dict]: search results """ - return self._search_index(index_name=BED_INDEX, query=query, just_data=just_data) + return self._search_index(index_name=BED_INDEX, query=query, + just_data=just_data, **kwargs) - def search_bedsets(self, query, just_data=True): + def search_bedsets(self, query, just_data=True, **kwargs): """ Search selected Elasticsearch bedfiles index with selected query @@ -100,36 +120,77 @@ def search_bedsets(self, query, just_data=True): :param bool just_data: whether just the hits should be returned :return dict | Iterable[dict]: search results """ - return self._search_index(index_name=BEDSET_INDEX, query=query, just_data=just_data) + return self._search_index(index_name=BEDSET_INDEX, query=query, + just_data=just_data, **kwargs) - def _insert_data(self, index, data, **kwargs): + def _insert_data(self, index, data, doc_id, force_update=False, **kwargs): """ - Insert data to an index in a Elasticsearch DB - or create it and the insert in case it does not exist + Insert document to an index in a Elasticsearch DB + or create it and the insert in case it does not exist. + + Document ID argument is optional. If not provided, a random ID + will be assigned. + If provided the document will be inserted only if no documents with + this ID are present in the DB. However, the document overwriting + can be forced if needed. :param str index: name of the index to insert the data into + :param str doc_id: unique identifier for the document + :param bool force_update: whether the pre-existing document + should be overwritten :param dict data: data to insert """ self.assert_connection() - self[ES_CLIENT_KEY].index(index=index, body=data, **kwargs) - - def insert_bedfiles_data(self, data, **kwargs): + if doc_id is None: + _LOGGER.info("Inserting document to index '{}' with an " + "automatically-assigned ID".format(index)) + self[ES_CLIENT_KEY].index(index=index, body=data, **kwargs) + else: + try: + self[ES_CLIENT_KEY].create(index=index, body=data, id=doc_id, + **kwargs) + except ConflictError: + msg_base = "Document '{}' already exists in index '{}'"\ + .format(doc_id, index) + if force_update: + _LOGGER.info(msg_base + ". Forcing update") + self[ES_CLIENT_KEY].index(index=index, body=data, id=doc_id, + **kwargs) + else: + _LOGGER.error("Could not insert data. " + msg_base) + raise + + def insert_bedfiles_data(self, data, doc_id=None, **kwargs): """ Insert data to the bedfile index a Elasticsearch DB - or create it and the insert in case it does not exist + or create it and the insert in case it does not exist. + + Document ID argument is optional. If not provided, a random ID will + be assigned. If provided the document will be inserted only if no + documents with this ID are present in the DB. However, the document + overwriting can be forced if needed. :param dict data: data to insert + :param str doc_id: unique identifier for the document, optional """ - self._insert_data(index=BED_INDEX, data=data, **kwargs) + self._insert_data(index=BED_INDEX, data=data, doc_id=doc_id, **kwargs) - def insert_bedsets_data(self, data, **kwargs): + def insert_bedsets_data(self, data, doc_id=None, **kwargs): """ Insert data to the bedset index in a Elasticsearch DB - or create it and the insert in case it does not exist + or create it and the insert in case it does not exist. + + Document ID argument is optional. If not provided, a random ID will + be assigned. + If provided the document will be inserted only if no documents with + this ID are present in the DB. + However, the document overwriting can be forced if needed. :param dict data: data to insert + :param str doc_id: unique identifier for the document, optional """ - self._insert_data(index=BEDSET_INDEX, data=data, **kwargs) + self._insert_data(index=BEDSET_INDEX, data=data, doc_id=doc_id, + **kwargs) def _get_mapping(self, index, just_data=True, **kwargs): """ @@ -140,7 +201,8 @@ def _get_mapping(self, index, just_data=True, **kwargs): """ self.assert_connection() mapping = self[ES_CLIENT_KEY].indices.get_mapping(index, **kwargs) - return mapping[index]["mappings"]["properties"] if just_data else mapping + return mapping[index]["mappings"]["properties"] \ + if just_data else mapping def get_bedfiles_mapping(self, just_data=True, **kwargs): """ @@ -156,17 +218,50 @@ def get_bedsets_mapping(self, just_data=True, **kwargs): :return dict: besets mapping definitions """ - return self._get_mapping(index=BEDSET_INDEX, just_data=just_data, **kwargs) + return self._get_mapping(index=BEDSET_INDEX, just_data=just_data, + **kwargs) + + def _get_doc(self, index, doc_id): + """ + Get a document from an index by its ID + + :param str index: name of the index to search + :param str doc_id: document ID to return + :return Mapping: matched document + """ + return self[ES_CLIENT_KEY].get(index=index, id=doc_id) + + def get_bedfiles_doc(self, doc_id): + """ + Get a document from bedfiles index by its ID + + :param str doc_id: document ID to return + :return Mapping: matched document + """ + return self._get_doc(index=BED_INDEX, doc_id=doc_id) + + def get_bedsets_doc(self, doc_id): + """ + Get a document from bedsets index by its ID + + :param str doc_id: document ID to return + :return Mapping: matched document + """ + return self._get_doc(index=BEDSET_INDEX, doc_id=doc_id) def _count_docs(self, index): """ Get the total number of the documents in a selected index :param str index: index to count the documents for - :return int: number of documents + :return int | None: number of documents """ self.assert_connection() - return int(self[ES_CLIENT_KEY].cat.count(index, params={"format": "json"})[0]['count']) + if not self[ES_CLIENT_KEY].indices.exists(index=index): + _LOGGER.warning("'{}' index does not exist".format(index)) + return None + return int(self[ES_CLIENT_KEY].cat.count( + index, params={"format": "json"})[0]['count']) def count_bedfiles_docs(self): """ @@ -184,22 +279,58 @@ def count_bedsets_docs(self): """ return self._count_docs(index=BEDSET_INDEX) + def _delete_index(self, index): + """ + Delete selected index from Elasticsearch + + :param str index: name of the index to delete + """ + self.assert_connection() + self[ES_CLIENT_KEY].indices.delete(index=index) + + def delete_bedfiles_index(self): + """ + Delete bedfiles index from Elasticsearch + """ + self._delete_index(index=BED_INDEX) + + def delete_bedsets_index(self): + """ + Delete bedsets index from Elasticsearch + """ + self._delete_index(index=BEDSET_INDEX) + + def _get_all(self, index_name, just_data=False): + """ + Convenience method for index exploration + + :param str index_name: name of the Elasticsearch index to search + :param bool just_data: whether just the hits should be returned + :return: + """ + self.assert_connection() + return self._search_index(index_name=index_name, query=QUERY_ALL, + just_data=just_data) + def get_bedbase_cfg(cfg=None): """ Determine path to the bedbase configuration file - The path can be either excplicitly provided + The path can be either explicitly provided or read from a $BEDBASE environment variable :param str cfg: path to the config file. Optional, the $BEDBASE config env var will be used if not provided :return str: configuration file path """ - selected_cfg = yacman.select_config(config_filepath=cfg, config_env_vars=CFG_ENV_VARS) + selected_cfg = yacman.select_config(config_filepath=cfg, + config_env_vars=CFG_ENV_VARS) if not selected_cfg: - raise BedBaseConnectionError("You must provide a config file or set the {} " - "environment variable".format("or ".join(CFG_ENV_VARS))) + raise BedBaseConnectionError( + "You must provide a config file or set the {} environment variable" + .format("or ".join(CFG_ENV_VARS)) + ) return selected_cfg diff --git a/bbconf/const.py b/bbconf/const.py index f218480..d228084 100644 --- a/bbconf/const.py +++ b/bbconf/const.py @@ -9,9 +9,6 @@ CFG_ENV_VARS = ["BEDBASE"] -SEARCH_TERMS = ['cellType', 'cellTypeSubtype', 'antibody', 'mappingGenome', - 'description', 'tissue', 'species', 'protocol', 'genome'] - RAW_BEDFILE_KEY = "raw_bedfile" BEDFILE_PATH_KEY = "bedfile_path" @@ -22,6 +19,8 @@ ES_CLIENT_KEY = "elasticsearch_client" +QUERY_ALL = {"match_all": {}} + # config file constants CFG_PATH_KEY = "path" CFG_SERVER_KEY = "server" @@ -29,9 +28,14 @@ CFG_HOST_KEY = "host" CFG_PORT_KEY = "port" CFG_BEDSTAT_OUTPUT_KEY = "bedstat_output" +CFG_BEDBUNCHER_OUTPUT_KEY = "bedbuncher_output" CFG_BED_INDEX_KEY = "bed_index" CFG_BEDSET_INDEX_KEY = "bedset_index" +CFG_KEYS = [ + "CFG_PATH_KEY", "CFG_SERVER_KEY", "CFG_DATABASE_KEY", "CFG_HOST_KEY", + "CFG_PORT_KEY", "CFG_BEDSTAT_OUTPUT_KEY", "CFG_BEDBUNCHER_OUTPUT_KEY", + "CFG_BED_INDEX_KEY", "CFG_BEDSET_INDEX_KEY"] DEFAULT_SECTION_VALUES = { CFG_DATABASE_KEY: { @@ -47,9 +51,102 @@ IDX_MAP = {CFG_BED_INDEX_KEY: BED_INDEX, CFG_BEDSET_INDEX_KEY: BEDSET_INDEX} -CFG_KEYS = ["CFG_PATH_KEY", "CFG_SERVER_KEY", "CFG_DATABASE_KEY", "CFG_HOST_KEY", - "CFG_PORT_KEY", "CFG_BEDSTAT_OUTPUT_KEY", "CFG_BED_INDEX_KEY", "CFG_BEDSET_INDEX_KEY"] +# JSON bed metadata constants and descriptions +# (the keys are actually established in bedstat/tools/regionstat.R) +JSON_GC_CONTENT_KEY = "gc_content" +JSON_ID_KEY = "id" +JSON_GENOME_KEY = "genome" +JSON_PROTOCOL_KEY = "exp_protocol" +JSON_CELL_TYPE_KEY = "cell_type" +JSON_TISSUE_KEY = "tissue" +JSON_ANTIBODY_KEY = "antibody" +JSON_TREATMENT_KEY = "treatment" +JSON_DATA_SOURCE_KEY = "data_source" +JSON_DESCRIPTION_KEY = "description" +JSON_REGIONS_NO_KEY = "regions_no" +JSON_MEAN_ABS_TSS_DIST_KEY = "mean_absolute_TSS_dist" +JSON_MEAN_REGION_WIDTH = "mean_region_width" +JSON_MD5SUM_KEY = "md5sum" +JSON_PLOTS_KEY = "plots" +JSON_EXON_FREQUENCY_KEY = "exon_frequency" +JSON_INTRON_FREQUENCY_KEY = "intron_frequency" +JSON_INTERGENIC_FREQUENCY_KEY = "intergenic_frequency" +JSON_PROMOTERCORE_FREQUENCY_KEY = "promoterCore_frequency" +JSON_PROMOTERPROX_FREQUENCY_KEY = "promoterProx_frequency" +JSON_EXON_PERCENTAGE_KEY = "exon_percentage" +JSON_INTRON_PERCENTAGE_KEY = "intron_percentage" +JSON_INTERGENIC_PERCENTAGE_KEY = "intergenic_percentage" +JSON_PROMOTERCORE_PERCENTAGE_KEY = "promoterCore_percentage" +JSON_PROMOTERPROX_PERCENTAGE_KEY = "promoterProx_percentage" +JSON_BEDSET_PEP_KEY = "bedset_pep" +JSON_BEDSET_BED_IDS_KEY = "bedset_bed_ids" + +JSON_METADATA_NAMES = ["JSON_GENOME_KEY", "JSON_PROTOCOL_KEY", "JSON_CELL_TYPE_KEY", "JSON_TISSUE_KEY", "JSON_ANTIBODY_KEY", + "JSON_TREATMENT_KEY", "JSON_DATA_SOURCE_KEY", "JSON_DESCRIPTION_KEY", + "JSON_ID_KEY", "JSON_MD5SUM_KEY", "JSON_PLOTS_KEY", "BEDFILE_PATH_KEY"] + +JSON_METADATA_VALUES = [eval(x) for x in JSON_METADATA_NAMES] + +JSON_STATS_SECTION_KEY = "statistics" +JSON_METADATA_SECTION_KEY = "metadata" + +JSON_NUMERIC_KEY_NAMES = [ + "JSON_GC_CONTENT_KEY", "JSON_REGIONS_NO_KEY", "JSON_MEAN_ABS_TSS_DIST_KEY", "JSON_MEAN_REGION_WIDTH", + "JSON_EXON_FREQUENCY_KEY", "JSON_INTRON_FREQUENCY_KEY", "JSON_PROMOTERPROX_FREQUENCY_KEY", + "JSON_INTERGENIC_FREQUENCY_KEY", "JSON_PROMOTERCORE_FREQUENCY_KEY", + "JSON_PROMOTERPROX_PERCENTAGE_KEY", "JSON_EXON_PERCENTAGE_KEY", "JSON_INTRON_PERCENTAGE_KEY", + "JSON_INTERGENIC_PERCENTAGE_KEY", "JSON_PROMOTERCORE_PERCENTAGE_KEY"] + +JSON_NUMERIC_KEY_VALUES = [eval(x) for x in JSON_NUMERIC_KEY_NAMES] + +JSON_BEDSET_MEANS_KEY = "bedset_means" +JSON_BEDSET_SD_KEY = "bedset_standard_deviation" +JSON_BEDSET_TAR_PATH_KEY = "bedset_tar_archive_path" +JSON_BEDSET_BEDFILES_GD_STATS_KEY = "bedset_bedfiles_gd_stats" +JSON_BEDSET_IGD_DB_KEY = "bedset_igd_database_path" +JSON_BEDSET_GD_STATS_KEY = "bedset_gd_stats" +JSON_BEDSET_KEY_VALUES = [ + JSON_BEDSET_MEANS_KEY, JSON_BEDSET_SD_KEY, JSON_BEDSET_TAR_PATH_KEY, + JSON_BEDSET_BEDFILES_GD_STATS_KEY, JSON_BEDSET_IGD_DB_KEY, JSON_BEDSET_GD_STATS_KEY] +JSON_BEDSET_KEY_NAMES = [ + "JSON_BEDSET_MEANS_KEY", "JSON_BEDSET_SD_KEY", "JSON_BEDSET_TAR_PATH_KEY", + "JSON_BEDSET_BEDFILES_GD_STATS_KEY", "JSON_BEDSET_IGD_DB_KEY", + "JSON_BEDSET_GD_STATS_KEY", "JSON_BEDSET_PEP_KEY", "JSON_BEDSET_BED_IDS_KEY"] + +JSON_KEYS = ["JSON_GC_CONTENT_KEY", "JSON_ID_KEY", "JSON_PLOTS_KEY", + "JSON_MD5SUM_KEY", "JSON_STATS_SECTION_KEY", "JSON_METADATA_VALUES", + "JSON_METADATA_SECTION_KEY"] + JSON_NUMERIC_KEY_NAMES + JSON_BEDSET_KEY_NAMES + JSON_METADATA_NAMES + + +_PERC_TXT = "Percentage of regions in " +_FREQ_TXT = "Frequency of regions in " +JSON_DICTS_KEY_DESCS = { + JSON_GC_CONTENT_KEY: "GC content", JSON_ID_KEY: "BED file ID", + JSON_REGIONS_NO_KEY: "Number of regions", JSON_MD5SUM_KEY: "BED file md5 checksum", + JSON_MEAN_ABS_TSS_DIST_KEY: "Mean absolute distance from transcription start sites", + JSON_MEAN_REGION_WIDTH: "Mean width of the regions in the BED file", + JSON_PROMOTERPROX_PERCENTAGE_KEY: _PERC_TXT + "promoter proximity", + JSON_PROMOTERCORE_PERCENTAGE_KEY: _PERC_TXT + "promoter core", + JSON_EXON_PERCENTAGE_KEY: _PERC_TXT + "exons", + JSON_INTRON_PERCENTAGE_KEY: _PERC_TXT + "introns", + JSON_INTERGENIC_PERCENTAGE_KEY: _PERC_TXT + "intergenic", + JSON_PROMOTERPROX_FREQUENCY_KEY: _FREQ_TXT + "promoter proximity", + JSON_PROMOTERCORE_FREQUENCY_KEY: _FREQ_TXT + "promoter core", + JSON_EXON_FREQUENCY_KEY: _FREQ_TXT + "exons", + JSON_INTRON_FREQUENCY_KEY: _FREQ_TXT + "introns", + JSON_INTERGENIC_FREQUENCY_KEY: _FREQ_TXT + "intergenic", + JSON_BEDSET_MEANS_KEY: "Average bedset statistics", + JSON_BEDSET_SD_KEY: "Standard deviation of bedset statistics", + JSON_BEDSET_TAR_PATH_KEY: "TAR archive", + JSON_BEDSET_BEDFILES_GD_STATS_KEY: "Individual bedfiles statistics CSV", + JSON_BEDSET_IGD_DB_KEY: "Bedset iGD database", + JSON_BEDSET_GD_STATS_KEY: "Bedset statistics CSV", + JSON_BEDSET_PEP_KEY: "Beset PEP", + JSON_BEDSET_BED_IDS_KEY: "BED files in this set" +} -__all__ = ["BED_INDEX", "BEDSET_INDEX", "SEARCH_TERMS", "RAW_BEDFILE_KEY", "CFG_ENV_VARS", +__all__ = ["BED_INDEX", "BEDSET_INDEX", "RAW_BEDFILE_KEY", "CFG_ENV_VARS", "ES_CLIENT_KEY", "DB_DEFAULT_HOST", "SERVER_DEFAULT_PORT", "SERVER_DEFAULT_HOST", - "PKG_NAME", "IDX_MAP", "BEDFILE_PATH_KEY", "DEFAULT_SECTION_VALUES"] + CFG_KEYS + "PKG_NAME", "IDX_MAP", "BEDFILE_PATH_KEY", "DEFAULT_SECTION_VALUES", "JSON_DICTS_KEY_DESCS", + "JSON_KEYS", "JSON_NUMERIC_KEY_VALUES", "JSON_NUMERIC_KEY_NAMES", "JSON_BEDSET_KEY_VALUES", + "JSON_BEDSET_KEY_NAMES", "QUERY_ALL", "JSON_METADATA_NAMES", "JSON_METADATA_VALUES"] + CFG_KEYS + JSON_KEYS diff --git a/config.yaml b/config.yaml index 0769577..4ff5c49 100644 --- a/config.yaml +++ b/config.yaml @@ -1,7 +1,7 @@ # full config example. Refer to bbconf/const.py for key names and default values path: - bedstat_output: $HOME/results_pipeline + pipelines_output: $HOME/results_pipeline database: host: localhost diff --git a/config_min.yaml b/config_min.yaml index aa3ef32..08eb350 100644 --- a/config_min.yaml +++ b/config_min.yaml @@ -1,4 +1,5 @@ # min config example. Refer to bbconf/const.py for key names and default values path: - bedstat_output: $HOME/results_pipeline \ No newline at end of file + bedstat_output: $LABROOT/resources/regions/bedstat_output + bedbuncher_output: $LABROOT/resources/regions/bedbuncher_output \ No newline at end of file diff --git a/docs/bbc_api.md b/docs/bbc_api.md index 62d0d70..8de8432 100644 --- a/docs/bbc_api.md +++ b/docs/bbc_api.md @@ -1,3 +1,33 @@ +Final targets: BedBaseConf, get_bedbase_cfg + + + + + # Package `bbconf` Documentation ## Class `BedBaseConf` @@ -58,6 +88,22 @@ Get the total number of the documents in the bedsets index +```python +def delete_bedfiles_index(self) +``` + +Delete bedfiles index from Elasticsearch + + + +```python +def delete_bedsets_index(self) +``` + +Delete bedsets index from Elasticsearch + + + ```python def establish_elasticsearch_connection(self, host=None) ``` @@ -82,6 +128,23 @@ Return the path to the config file or None if not set +```python +def get_bedfiles_doc(self, doc_id) +``` + +Get a document from bedfiles index by its ID +#### Parameters: + +- `doc_id` (`str`): document ID to return + + +#### Returns: + +- `Mapping`: matched document + + + + ```python def get_bedfiles_mapping(self, just_data=True, **kwargs) ``` @@ -94,6 +157,23 @@ Get mapping definitions for the bedfiles index +```python +def get_bedsets_doc(self, doc_id) +``` + +Get a document from bedsets index by its ID +#### Parameters: + +- `doc_id` (`str`): document ID to return + + +#### Returns: + +- `Mapping`: matched document + + + + ```python def get_bedsets_mapping(self, just_data=True, **kwargs) ``` @@ -107,31 +187,44 @@ Get mapping definitions for the bedsets index ```python -def insert_bedfiles_data(self, data, **kwargs) +def insert_bedfiles_data(self, data, doc_id=None, **kwargs) ``` -Insert data to the bedfile index a Elasticsearch DB or create it and the insert in case it does not exist +Insert data to the bedfile index a Elasticsearch DB or create it and the insert in case it does not exist. + +Document ID argument is optional. If not provided, a random ID will +be assigned. If provided the document will be inserted only if no +documents with this ID are present in the DB. However, the document +overwriting can be forced if needed. #### Parameters: - `data` (`dict`): data to insert +- `doc_id` (`str`): unique identifier for the document, optional ```python -def insert_bedsets_data(self, data, **kwargs) +def insert_bedsets_data(self, data, doc_id=None, **kwargs) ``` -Insert data to the bedset index in a Elasticsearch DB or create it and the insert in case it does not exist +Insert data to the bedset index in a Elasticsearch DB or create it and the insert in case it does not exist. + +Document ID argument is optional. If not provided, a random ID will +be assigned. +If provided the document will be inserted only if no documents with +this ID are present in the DB. +However, the document overwriting can be forced if needed. #### Parameters: - `data` (`dict`): data to insert +- `doc_id` (`str`): unique identifier for the document, optional ```python -def search_bedfiles(self, query, just_data=True) +def search_bedfiles(self, query, just_data=True, **kwargs) ``` Search selected Elasticsearch bedset index with selected query @@ -149,7 +242,7 @@ Search selected Elasticsearch bedset index with selected query ```python -def search_bedsets(self, query, just_data=True) +def search_bedsets(self, query, just_data=True, **kwargs) ``` Search selected Elasticsearch bedfiles index with selected query @@ -182,10 +275,13 @@ Return writability flag or None if not set def get_bedbase_cfg(cfg=None) ``` -Read and create the bedbase configuration object +Determine path to the bedbase configuration file + +The path can be either explicitly provided +or read from a $BEDBASE environment variable #### Parameters: -- `cfg` (`str`): path to the config file.Optional, the bedbase config env var will be used if not provided +- `cfg` (`str`): path to the config file.Optional, the $BEDBASE config env var will be used if not provided #### Returns: @@ -198,4 +294,4 @@ Read and create the bedbase configuration object -*Version Information: `bbconf` v0.0.1, generated by `lucidoc` v0.4.2* +*Version Information: `bbconf` v0.0.2, generated by `lucidoc` v0.4.3* diff --git a/docs/changelog.md b/docs/changelog.md index a53f6a2..30bd0fd 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,19 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.0.1] - unreleased +## [0.0.2] - 2020-05-28 +### Added +- index deleting methods: + - `delete_bedsets_index` + - `delete_bedfiles_index` +- multiple new keys constants + +### Changed +- make `search_bedfiles` and `search_bedsets` methods return all hits by default instead of just 10. Parametrize it. +- added more arguments to `insert_bedfiles_data` and `insert_bedsets_data` method interfaces: `doc_id` and `force_update` +- Elasticsearch documents are inserted into the indices more securily, `insert_*` methods prevent documents duplication + + +## [0.0.1] - 2020-02-05 ### Added - initial project release \ No newline at end of file