-
Notifications
You must be signed in to change notification settings - Fork 125
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from macbre/initial-version
Initial version
- Loading branch information
Showing
8 changed files
with
289 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,3 +99,5 @@ ENV/ | |
|
||
# mypy | ||
.mypy_cache/ | ||
|
||
.idea/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
language: python | ||
python: | ||
- "2.7" | ||
- "3.4" | ||
- "3.5" | ||
- "3.6" | ||
install: make install | ||
script: make coverage && make lint |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
coverage_options = --include='sql_metadata.py' --omit='test/*' | ||
|
||
install: | ||
pip install -e . | ||
|
||
test: | ||
py.test | ||
|
||
coverage: | ||
rm -f .coverage* | ||
rm -rf htmlcov/* | ||
coverage run -p -m py.test | ||
coverage combine | ||
coverage html -d htmlcov $(coverage_options) | ||
coverage xml -i | ||
coverage report $(coverage_options) | ||
|
||
lint: | ||
pylint sql_metadata.py | ||
|
||
publish: | ||
# run git tag -a v0.0.0 before running make publish | ||
python setup.py sdist upload -r pypi | ||
|
||
.PHONY: test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,23 @@ | ||
# sql-metadata | ||
Uses tokenized query returned by [`python-sqlparse`](https://github.com/andialbrecht/sqlparse) and generates query metadata | ||
|
||
[![PyPI](https://img.shields.io/pypi/v/sql-metadata.svg)](https://pypi.python.org/pypi/sql-metadata) | ||
[![Build Status](https://travis-ci.org/macbre/sql-metadata.svg?branch=master)](https://travis-ci.org/macbre/sql-metadata) | ||
|
||
Uses tokenized query returned by [`python-sqlparse`](https://github.com/andialbrecht/sqlparse) and generates query metadata. Extracts column names and tables used by the query. | ||
|
||
### Usage | ||
|
||
```python | ||
>>> import sql_metadata | ||
|
||
>>> sql_metadata.get_query_tokens("SELECT * FROM foo") | ||
[<DML 'SELECT' at 0x7F14FFDEB808>, <Wildcard '*' at 0x7F14FFDEB940>, <Keyword 'FROM' at 0x7F14FFDEBBB0>, <Name 'foo' at 0x7F14FFDEB9A8>] | ||
|
||
>>> sql_metadata.get_query_columns("SELECT test, id FROM foo, bar") | ||
[u'test', u'id'] | ||
|
||
>>> sql_metadata.get_query_tables("SELECT test, id FROM foo, bar") | ||
[u'foo', u'bar'] | ||
``` | ||
|
||
> See `test/test_query.py` file for more examples of a bit more complex queries. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from setuptools import setup | ||
|
||
VERSION = '1.0' | ||
|
||
# @see https://github.com/pypa/sampleproject/blob/master/setup.py | ||
setup( | ||
name='sqlmetadata', | ||
version=VERSION, | ||
author='Maciej Brencz', | ||
author_email='maciej.brencz@gmail.com', | ||
license='MIT', | ||
description='Uses tokenized query returned by python-sqlparse and generates query metadata', | ||
url='https://github.com/macbre/sql-metadata', | ||
# https://pypi.python.org/pypi?%3Aaction=list_classifiers | ||
classifiers=[ | ||
# How mature is this project? Common values are | ||
# 3 - Alpha | ||
# 4 - Beta | ||
# 5 - Production/Stable | ||
'Development Status :: 5 - Production/Stable', | ||
|
||
# Indicate who your project is intended for | ||
'Intended Audience :: Developers', | ||
'Intended Audience :: System Administrators', | ||
'Topic :: Database', | ||
|
||
# Pick your license as you wish | ||
'License :: OSI Approved :: MIT License', | ||
|
||
# Specify the Python versions you support here. In particular, ensure | ||
# that you indicate whether you support Python 2, Python 3 or both. | ||
'Programming Language :: Python :: 2', | ||
'Programming Language :: Python :: 2.7', | ||
'Programming Language :: Python :: 3', | ||
'Programming Language :: Python :: 3.4', | ||
'Programming Language :: Python :: 3.5', | ||
'Programming Language :: Python :: 3.6', | ||
], | ||
py_modules=["sql_metadata"], | ||
install_requires=[ | ||
'coverage==4.4.2', | ||
'pylint==1.8.1', | ||
'pytest==3.2.3', | ||
'sqlparse==0.2.4', | ||
] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
""" | ||
This module provides SQL query parsing functions | ||
""" | ||
import re | ||
|
||
import sqlparse | ||
|
||
from sqlparse.sql import TokenList | ||
from sqlparse.tokens import Name, Whitespace, Wildcard | ||
|
||
|
||
def preprocess_query(query): | ||
""" | ||
Perform initial query cleanup | ||
:type query str | ||
:rtype str | ||
""" | ||
# 1. remove aliases | ||
# FROM `dimension_wikis` `dw` | ||
# INNER JOIN `fact_wam_scores` `fwN` | ||
query = re.sub(r'(\s(FROM|JOIN)\s`[^`]+`)\s`[^`]+`', r'\1', query, flags=re.IGNORECASE) | ||
|
||
return query | ||
|
||
|
||
def get_query_tokens(query): | ||
""" | ||
:type query str | ||
:rtype: list[sqlparse.sql.Token] | ||
""" | ||
query = preprocess_query(query) | ||
|
||
tokens = TokenList(sqlparse.parse(query)[0].tokens).flatten() | ||
# print([(token.value, token.ttype) for token in tokens]) | ||
|
||
return [token for token in tokens if token.ttype is not Whitespace] | ||
|
||
|
||
def get_query_columns(query): | ||
""" | ||
:type query str | ||
:rtype: list[str] | ||
""" | ||
columns = [] | ||
last_keyword = None | ||
last_token = None | ||
|
||
for token in get_query_tokens(query): | ||
if token.is_keyword and token.value.upper() not in ['AS', 'AND', 'OR']: | ||
# keep the name of the last keyword, e.g. SELECT, FROM, WHERE, (ORDER) BY | ||
last_keyword = token.value.upper() | ||
# print('keyword', last_keyword) | ||
elif token.ttype is Name: | ||
# analyze the name tokens, column names and where condition values | ||
if last_keyword in ['SELECT', 'WHERE', 'BY'] and last_token not in ['AS']: | ||
# print(last_keyword, last_token, token.value) | ||
|
||
if token.value not in columns: | ||
columns.append(token.value) | ||
elif token.ttype is Wildcard: | ||
# handle wildcard in SELECT part, but ignore count(*) | ||
# print(last_keyword, last_token, token.value) | ||
if last_keyword == 'SELECT' and last_token != '(': | ||
columns.append(token.value) | ||
|
||
last_token = token.value.upper() | ||
|
||
return columns | ||
|
||
|
||
def get_query_tables(query): | ||
""" | ||
:type query str | ||
:rtype: list[str] | ||
""" | ||
tables = [] | ||
last_keyword = None | ||
last_token = None | ||
|
||
table_syntax_keywords = [ | ||
# SELECT queries | ||
'FROM', 'WHERE', 'JOIN', 'INNER JOIN', 'LEFT JOIN', 'RIGHT JOIN', 'ON', | ||
# INSERT queries | ||
'INTO', 'VALUES' | ||
] | ||
|
||
for token in get_query_tokens(query): | ||
# print([token, token.ttype]) | ||
if token.is_keyword and token.value.upper() in table_syntax_keywords: | ||
# keep the name of the last keyword | ||
last_keyword = token.value.upper() | ||
# print('keyword', last_keyword) | ||
elif token.ttype is Name or token.is_keyword: | ||
# print([last_keyword, last_token, token.value]) | ||
# analyze the name tokens, column names and where condition values | ||
if last_keyword in ['FROM', 'JOIN', 'INNER JOIN', 'LEFT JOIN', 'RIGHT JOIN', 'INTO'] \ | ||
and last_token not in ['AS'] \ | ||
and token.value not in ['AS']: | ||
table_name = token.value.strip('`') | ||
if table_name not in tables: | ||
tables.append(table_name) | ||
|
||
last_token = token.value.upper() | ||
|
||
return tables |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
from unittest import TestCase | ||
|
||
from sql_metadata import preprocess_query, get_query_columns, get_query_tables | ||
|
||
|
||
class TestUtils(TestCase): | ||
|
||
def test_preprocess_query(self): | ||
self.assertEquals( | ||
preprocess_query('SELECT DISTINCT dw.lang FROM `dimension_wikis` `dw` INNER JOIN `fact_wam_scores` `fwN` ON ((dw.wiki_id = fwN.wiki_id)) WHERE fwN.time_id = FROM_UNIXTIME(N) ORDER BY dw.lang ASC'), | ||
'SELECT DISTINCT dw.lang FROM `dimension_wikis` INNER JOIN `fact_wam_scores` ON ((dw.wiki_id = fwN.wiki_id)) WHERE fwN.time_id = FROM_UNIXTIME(N) ORDER BY dw.lang ASC' | ||
) | ||
|
||
self.assertEquals( | ||
preprocess_query("SELECT count(fwN.wiki_id) as wam_results_total FROM `fact_wam_scores` `fwN` left join `fact_wam_scores` `fwN` ON ((fwN.wiki_id = fwN.wiki_id) AND (fwN.time_id = FROM_UNIXTIME(N))) left join `dimension_wikis` `dw` ON ((fwN.wiki_id = dw.wiki_id)) WHERE (fwN.time_id = FROM_UNIXTIME(N)) AND (dw.url like X OR dw.title like X) AND fwN.vertical_id IN (XYZ) AND dw.lang = X AND (fwN.wiki_id NOT IN (XYZ)) AND ((dw.url IS NOT NULL AND dw.title IS NOT NULL))"), | ||
"SELECT count(fwN.wiki_id) as wam_results_total FROM `fact_wam_scores` left join `fact_wam_scores` ON ((fwN.wiki_id = fwN.wiki_id) AND (fwN.time_id = FROM_UNIXTIME(N))) left join `dimension_wikis` ON ((fwN.wiki_id = dw.wiki_id)) WHERE (fwN.time_id = FROM_UNIXTIME(N)) AND (dw.url like X OR dw.title like X) AND fwN.vertical_id IN (XYZ) AND dw.lang = X AND (fwN.wiki_id NOT IN (XYZ)) AND ((dw.url IS NOT NULL AND dw.title IS NOT NULL))" | ||
) | ||
|
||
def test_get_query_columns(self): | ||
self.assertListEqual(['*'], | ||
get_query_columns('SELECT * FROM `test_table`')) | ||
|
||
self.assertListEqual(['foo'], | ||
get_query_columns('SELECT foo FROM `test_table`')) | ||
|
||
self.assertListEqual(['id', 'foo'], | ||
get_query_columns('SELECT id, foo FROM test_table WHERE id = 3')) | ||
|
||
self.assertListEqual(['foo', 'count', 'id'], | ||
get_query_columns('SELECT foo, count(*) as bar FROM `test_table` WHERE id = 3')) | ||
|
||
self.assertListEqual(['foo', 'test'], | ||
get_query_columns('SELECT foo, test as bar FROM `test_table`')) | ||
|
||
self.assertListEqual(['bar'], | ||
get_query_columns('SELECT /* a comment */ bar FROM test_table')) | ||
|
||
# assert False | ||
|
||
def test_get_query_tables(self): | ||
self.assertListEqual(['test_table'], | ||
get_query_tables('SELECT * FROM `test_table`')) | ||
|
||
self.assertListEqual(['0001_test_table'], | ||
get_query_tables('SELECT * FROM `0001_test_table`')) | ||
|
||
self.assertListEqual(['test_table'], | ||
get_query_tables('SELECT foo FROM `test_table`')) | ||
|
||
self.assertListEqual(['test_table'], | ||
get_query_tables('SELECT foo FROM test_table WHERE id = 1')) | ||
|
||
self.assertListEqual(['test_table', 'second_table'], | ||
get_query_tables('SELECT foo FROM test_table, second_table WHERE id = 1')) | ||
|
||
self.assertListEqual(['revision', 'page', 'wikicities_user'], | ||
get_query_tables('SELECT rev_id,rev_page,rev_text_id,rev_timestamp,rev_comment,rev_user_text,rev_user,rev_minor_edit,rev_deleted,rev_len,rev_parent_id,rev_shaN,page_namespace,page_title,page_id,page_latest,user_name FROM `revision` INNER JOIN `page` ON ((page_id = rev_page)) LEFT JOIN `wikicities_user` ON ((rev_user != N) AND (user_id = rev_user)) WHERE rev_id = X LIMIT N')) | ||
|
||
self.assertListEqual(['events'], | ||
get_query_tables("SELECT COUNT( 0 ) AS cnt, date_format(event_date, '%Y-%m-%d') AS date FROM events WHERE event_date BETWEEN '2017-10-18 00:00:00' AND '2017-10-24 23:59:59' AND wiki_id = '1289985' GROUP BY date WITH ROLLUP")) | ||
|
||
# complex queries | ||
# @see https://github.com/macbre/query-digest/issues/16 | ||
self.assertListEqual(['report_wiki_recent_pageviews', 'dimension_wikis'], | ||
get_query_tables("SELECT r.wiki_id AS id, pageviews_Nday AS pageviews FROM report_wiki_recent_pageviews AS r INNER JOIN dimension_wikis AS d ON r.wiki_id = d.wiki_id WHERE d.public = X AND r.lang = X AND r.hub_name = X ORDER BY pageviews DESC LIMIT N")) | ||
|
||
self.assertListEqual(['dimension_wikis', 'fact_wam_scores'], | ||
get_query_tables("SELECT DISTINCT dw.lang FROM `dimension_wikis` `dw` INNER JOIN `fact_wam_scores` `fwN` ON ((dw.wiki_id = fwN.wiki_id)) WHERE fwN.time_id = FROM_UNIXTIME(N) ORDER BY dw.lang ASC")) | ||
|
||
self.assertListEqual(['fact_wam_scores', 'dimension_wikis'], | ||
get_query_tables("SELECT count(fwN.wiki_id) as wam_results_total FROM `fact_wam_scores` `fwN` left join `fact_wam_scores` `fwN` ON ((fwN.wiki_id = fwN.wiki_id) AND (fwN.time_id = FROM_UNIXTIME(N))) left join `dimension_wikis` `dw` ON ((fwN.wiki_id = dw.wiki_id)) WHERE (fwN.time_id = FROM_UNIXTIME(N)) AND (dw.url like X OR dw.title like X) AND fwN.vertical_id IN (XYZ) AND dw.lang = X AND (fwN.wiki_id NOT IN (XYZ)) AND ((dw.url IS NOT NULL AND dw.title IS NOT NULL))")) | ||
|
||
# INSERT queries | ||
self.assertListEqual(['0070_insert_ignore_table'], | ||
get_query_tables("INSERT IGNORE INTO `0070_insert_ignore_table` VALUES (9, '123', '2017-01-01');")) | ||
|
||
self.assertListEqual(['0070_insert_ignore_table'], | ||
get_query_tables("INSERT into `0070_insert_ignore_table` VALUES (9, '123', '2017-01-01');")) | ||
|
||
# assert False |