From 1eede62395b6ed5b5c2976a061ba736519eb1d97 Mon Sep 17 00:00:00 2001 From: kimakan <45099849+kimakan@users.noreply.github.com> Date: Tue, 21 May 2024 15:13:58 +0200 Subject: [PATCH] update docs for 0.7.0 --- CHANGELOG.md | 19 +++++++----- README.md | 58 ++++++++++++++++++----------------- docs/development.md | 74 +++++++++++++++------------------------------ docs/examples.md | 69 ------------------------------------------ pyproject.toml | 2 +- 5 files changed, 69 insertions(+), 153 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5abe6b4..070e384 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,18 @@ -## 0.7.0 (XXXX-XX-XX) +## 0.7.0 (2024-05-21) major overhaul for ADQL 2.1 recommendation 2023-12-15 - - COOSYS is not required for the geometry constructors - - the geometry constructors return the correct datatype (doube precission[]) + - COOSYS is not required for the geometry constructors anymore, since it's deprecated + - the geometry constructors return the correct datatype (double precision[]) and correct units (degrees) - - drop the maintenance/support for the translation from ADQL to MySQL. - - fix `BOX` constructor - - new requirements for the `pg_sphere` and postgreSQL - - ... + - droped the maintenance/support for the translation from ADQL to MySQL. + - bumped the version of `antlr4-python3-runtime` to 4.13.1 + - fixed `BOX` constructor, although it's deprecated in ADQL 2.1 + - fixed `CONTAINS` for the case `0=CONTAINS()` + - fixed `INTERSECTS` for the case `0=INTERSECTS()` + - new requirements for the `pg_sphere` extension +([link](https://github.com/kimakan/pgsphere/tree/aiprdbms16)) + - removed not supported optional ADQL functions, such as `CENTROID`, `REGION`, etc. + - replaced `setup.py` by `pyproject.toml` since `python setup.py install` is deprecated ## 0.6.1 (2022-11-17) diff --git a/README.md b/README.md index cd0191a..b9a76c0 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Designed to be used in conjunction with [django-daiquri](https://github.com/djan as a query processing backend but it can be easily used as a stand-alone tool or integrated into another project. -**\*NOTE: Since version 0.7.0, MySQL is not activelly supported/maintained anymore.** +**\*NOTE: Since version 0.7.0 MySQL is not supported (maintained) anymore.** [![pytest Workflow Status](https://github.com/aipescience/queryparser/actions/workflows/pytest.yml/badge.svg)](https://github.com/aipescience/queryparser/actions/workflows/pytest.yml) @@ -24,9 +24,7 @@ Installation The easiest way to install the package is by using the pip tool: ```bash - - pip install queryparser-python3 - +python -m pip install queryparser-python3 ``` Alternatively, you can clone the repository and install it from there. @@ -39,18 +37,24 @@ Generating the parser from the git repository To generate the parsers you need `python3` , `java` above version 7, and `antlr4` (`antlr-4.*-complete.jar` has to be installed inside the -`/usr/local/lib/` or `/usr/local/bin/` directories). +`/usr/local/lib/`, `/usr/local/bin/` or root directory of the project). + +The current version of `antlr-4.*-complete.jar` can be downloaded via + +```bash +wget http://www.antlr.org/download/antlr-4.13.1-complete.jar +``` After cloning the project run ```bash - make +make ``` and a `lib` directory will be created. After that, run ```bash - python setup.py install +python -m pip install . ``` to install the generated parser in your virtual environment. @@ -58,10 +62,10 @@ to install the generated parser in your virtual environment. Additional requirements ----------------------- -The queryparser assumes that the PostgreSQL database has the extension -[pg_sphere](https://github.com/kimakan/pgsphere/tree/aiprdbms16) installed. Although the `pg_sphere` is not required for the -python module, the PostgreSQL **queries will not run** without that extension -installed on the database. +The queryparser assumes that the PostgreSQL database has the extension +[pg_sphere](https://github.com/kimakan/pgsphere/tree/aiprdbms16) installed. +Although the `pg_sphere` is not required for the python module, the PostgreSQL +**queries will not run** without this extension installed on the database. Parsing MySQL and PostgreSQL @@ -74,21 +78,21 @@ Parsing and processing of MySQL queries can be done by creating an instance of the `MySQLQueryProcessor` class ```python - from queryparser.mysql import MySQLQueryProcessor - qp = MySQLQueryProcessor() +from queryparser.mysql import MySQLQueryProcessor +qp = MySQLQueryProcessor() ``` feeding it a MySQL query ```python - sql = "SELECT a FROM db.tab;" - qp.set_query(sql) +sql = "SELECT a FROM db.tab;" +qp.set_query(sql) ``` and running it with ```python - qp.process_query() +qp.process_query() ``` After the processing is completed, the processor object `qp` will include @@ -101,8 +105,8 @@ PostgreSQL parsing is very similar to MySQL, except it requires importing the `PostgreSQLProcessor` class: ```python - from queryparser.postgresql import PostgreSQLQueryProcessor - qp = PostgreSQLQueryProcessor() +from queryparser.postgresql import PostgreSQLQueryProcessor +qp = PostgreSQLQueryProcessor() ``` The rest of the functionality remains the same. @@ -115,15 +119,15 @@ Translation of ADQL queries is done similarly by first creating an instance of the `ADQLQueryTranslator` class ```python - from queryparser.adql import ADQLQueryTranslator - adql = "SELECT TOP 100 POINT('ICRS', ra, de) FROM db.tab;" - adt = ADQLQueryTranslator(adql) +from queryparser.adql import ADQLQueryTranslator +adql = "SELECT TOP 100 POINT('ICRS', ra, de) FROM db.tab;" +adt = ADQLQueryTranslator(adql) ``` and calling ```python - adt.to_postgresql() +adt.to_postgresql() ``` which returns a translated string representing a valid MySQL query if @@ -133,16 +137,16 @@ the ADQL query had no errors. The PostgreSQL query can then be parsed with the Testing ------- -First, install `pytest` +First in the root directory of the project, install optional dependencies +(`PyYAML` and `pytest`) by running ```bash - pip install pytest +python -m pip install .[test] ``` -then run the test suite for a version of python you would like to test with +then run the test suite with ```bash - pytest lib/ +python -m pytest lib/ ``` -More elaborate testing procedures can be found in the development notes. diff --git a/docs/development.md b/docs/development.md index f2f96e3..46fde71 100644 --- a/docs/development.md +++ b/docs/development.md @@ -26,14 +26,15 @@ package) and activate it: ```bash python -m venv qpenv -source qpenv /bin/activate +source qpenv/bin/activate ``` -After the virtual environment has been activated we can install the package -from the root directory of the package with +After the virtual environment has been activated we can build and install +the package from the root directory of the package with ```bash -pip install -r requirements.txt . +make +python -m pip install . ``` ## Testing @@ -41,7 +42,7 @@ pip install -r requirements.txt . All tests from the test suite can be executed with ```bash -pytest lib +pytest lib/ ``` Individual dialect functionality (MySQL in this case) with increased verbosity @@ -65,9 +66,9 @@ can be generated with pytest --cov=queryparser --cov-report html lib ``` -Continuous integration is enabled through Travis CI. The configuration is -specified inside of `.travis.yml` file. Edit as necessary. Coverage exclusions -are defined within `.coveragerc`. +Continuous integration is enabled through GitHub Actions. The configuration is +specified inside of `.github/workflows/pytest.yml` file. Edit as necessary. +Coverage exclusions are defined within `.coveragerc`. ### Writing new tests @@ -148,53 +149,28 @@ The main queryparser class that includes this antlr functionality is called `process_query()` that binds the processing together. MySQL and PostgreSQL processors inherit from this class and extend it with their own listeners. -### Indexed objects -The need for indexed objects is easiest to explain through an example. Let us -consider the following fairly typical ADQL query, - -```SQL -SELECT ra, dec FROM gdr2.gaia_source -WHERE 1=CONTAINS(POINT('ICRS', ra, dec), CIRCLE('ICRS', 31, -19, 0.5)); -``` - -Translating it to PostgreSQL and using pgsphere functions yields - -```SQL -SELECT * FROM gdr2.gaia_source -WHERE spoint(RADIANS(ra), RADIANS(dec)) @ scircle(spoint(RADIANS(31.0), RADIANS(-19.0)), RADIANS(0.5)); -``` - -While the translated query is syntactically fine, it would take a very long time -to run since the first `spoint` in the translated query needs to be computed -for the whole catalog every time the query is executed. To avoid this drawback -we pre-compute its value across -the whole catalog (let us name it `pos`) and index it. Since we know the value -of the column `pos` was computed from columns `ra` and `dec` of the catalog, -we can pass this information to the PostgreSQL processor and it will replace -its part in the query: - -```python -adt = ADQLQueryTranslator(query) -pgq = adt.to_postgresql() - -iob = {'spoint': ((('gdr2', 'gaia_source', 'ra'), - ('gdr2', 'gaia_source', 'dec'), 'pos'),)} +## New releases -qp = PostgreSQLQueryProcessor() -qp.set_query(pgq) -qp.process_query(indexed_objects=iob) +### Requirements +Install `build` and `twine` with +```bash +pip install twine +pip install build ``` +Make sure you have accounts on `https://pypi.org/` and `https://test.pypi.org/`, +and you are `Maintainer` of the `queryparser-python3` project. -In the indexed object dictionary `iob` we define which columns in the database -should be replaced with which indexed column for each type of pgsphere object -functions (spoint, scircle, sbox...). + - https://pypi.org/project/queryparser-python3/ + - https://test.pypi.org/project/queryparser-python3/ -## New releases +### Publishing 1. Change the version number in `src/queryparser/__init__.py` -2. `python setup.py sdist bdist_wheel` +2. `python -m build .` 3. `twine check dist/*` 4. `twine upload --repository-url https://test.pypi.org/legacy/ dist/*` -5. `twine upload dist/*` -6. Create a new release on github. +5. Check whether the project was correctly uploaded on `test.pypi.org` by executing +`python3 -m pip install --index-url https://test.pypi.org/simple/ queryparser-python3` +6. `twine upload dist/*` +7. Create a new release on github. diff --git a/docs/examples.md b/docs/examples.md index f55205e..3abccfd 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -102,72 +102,3 @@ print(qp.keywords) ``` -### ADQL to PostgreSQL using indexed spoint object - -The need indexed objects is explained in the development document. Here we will -demonstrate how to use them. - -Let us start with the following ADQL query - -```SQL -SELECT gaia.source_id, gaia.ra, gaia.dec, gd.r_est -FROM gdr2.gaia_source gaia, gdr2_contrib.geometric_distance gd -WHERE 1 = CONTAINS(POINT('ICRS', gaia.ra, gaia.dec), - CIRCLE('ICRS',245.8962, -26.5222, 0.5)) -AND gaia.phot_g_mean_mag < 15 -AND gd.r_est > 1500 AND gd.r_est < 2300 -AND gaia.source_id = gd.source_id; -``` - -We first translate it to PostgreSQL - -```python -adt = ADQLQueryTranslator(query) -postgres_query = adt.to_postgresql() -``` - -which yields - -```SQL -SELECT gaia.source_id, gaia.ra, gaia.dec, gd.r_est -FROM gdr2.gaia_source gaia, gdr2_contrib.geometric_distance gd -WHERE spoint(RADIANS(gaia.ra), RADIANS(gaia.dec)) @ scircle(spoint(RADIANS(245.8962), RADIANS(-26.5222)), RADIANS(0.5)) -AND gaia.phot_g_mean_mag < 15 -AND gd.r_est > 1500 AND gd.r_est < 2300 -AND gaia.source_id = gd.source_id; -``` - -The issue with this query is that the computation of the - -```SQL -spoint(RADIANS(gaia.ra), RADIANS(gaia.dec)) -``` - -can take a very long time if the table we are querying on is large. To avoid -that we can pre-compute its value, however, in that case we need to replace -this `spoint` with the name of the pre-computed column. This can be achieved -by defining the `indexed_objects` dictionary and passing it to the processor. - -```python -iob = {'spoint': ((('gdr2', 'gaia_source', 'ra'), - ('gdr2', 'gaia_source', 'dec'), 'pos'),)} -qp = PostgreSQLQueryProcessor() -qp.set_query(postgres_query) -qp.process_query(indexed_objects=iob) -``` - -The `qp.query` string will now give us - -```SQL -SELECT gaia.source_id, gaia.ra, gaia.dec, gd.r_est -FROM gdr2.gaia_source gaia, gdr2_contrib.geometric_distance gd -WHERE gaia.pos @ scircle(spoint(RADIANS(245.8962), RADIANS(-26.5222)), RADIANS(0.5)) -AND gaia.phot_g_mean_mag < 15 -AND gd.r_est > 1500 AND gd.r_est < 2300 -AND gaia.source_id = gd.source_id; -``` - -We see that the `spoint` was replaced with the column `gaia.pos`. Although we -only defined the column as `pos`, we had to attach the alias to it since we -are using this alias for the table in the query. This is done automatically -by the processor. diff --git a/pyproject.toml b/pyproject.toml index 8a6afb3..85f0020 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "queryparser-python3" -description = "Parses PostgreSQL/MySQL and translates ADQL to PostgreSQL/MySQL." +description = "Package for parsing PostgreSQL/MySQL and translating ADQL to PostgreSQL/MySQL." readme = "README.md" dynamic = ["version"] license = {text = "Apache-2.0"}