Skip to content

Commit

Permalink
Merge branch 'py3-support'
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed Apr 9, 2020
2 parents e8395d6 + 9300fe7 commit 10ed098
Show file tree
Hide file tree
Showing 9 changed files with 91 additions and 85 deletions.
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
language: python
python:
- "2.7"
- "3.6"
- "3.7"
- "3.8"
install:
- pip install -r requirements-dev.txt
- pip install .
Expand Down
45 changes: 22 additions & 23 deletions datapusher/jobs.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals


import json
import socket
import requests
import urlparse
try:
from urllib.parse import urlsplit
except ImportError:
from urlparse import urlsplit

import itertools
import datetime
import locale
import pprint
import logging
import decimal
import hashlib
import time
import tempfile

import messytables
from slugify import slugify

import ckanserviceprovider.job as job
import ckanserviceprovider.util as util
Expand All @@ -29,7 +30,7 @@
locale.setlocale(locale.LC_ALL, '')

MAX_CONTENT_LENGTH = web.app.config.get('MAX_CONTENT_LENGTH') or 10485760
CHUNK_SIZE = 16 * 1024 # 16kb
CHUNK_SIZE = 16 * 1024 # 16kb
DOWNLOAD_TIMEOUT = 30

if web.app.config.get('SSL_VERIFY') in ['False', 'FALSE', '0', False, 0]:
Expand Down Expand Up @@ -107,7 +108,7 @@ def as_dict(self):
}

def __str__(self):
return u'{} status={} url={} response={}'.format(
return '{} status={} url={} response={}'.format(
self.message, self.status_code, self.request_url, self.response) \
.encode('ascii', 'replace')

Expand All @@ -116,7 +117,7 @@ def get_url(action, ckan_url):
"""
Get url for ckan action
"""
if not urlparse.urlsplit(ckan_url).scheme:
if not urlsplit(ckan_url).scheme:
ckan_url = 'http://' + ckan_url.lstrip('/')
ckan_url = ckan_url.rstrip('/')
return '{ckan_url}/api/3/action/{action}'.format(
Expand All @@ -139,7 +140,7 @@ def check_response(response, request_url, who, good_status=(201, 200), ignore_no

message = '{who} bad response. Status code: {code} {reason}. At: {url}.'
try:
if not response.status_code in good_status:
if response.status_code not in good_status:
json_response = response.json()
if not ignore_no_success or json_response.get('success'):
try:
Expand All @@ -151,7 +152,7 @@ def check_response(response, request_url, who, good_status=(201, 200), ignore_no
raise HTTPError(
message, status_code=response.status_code,
request_url=request_url, response=response.text)
except ValueError as err:
except ValueError:
message = message.format(
who=who, code=response.status_code, reason=response.reason,
url=request_url, resp=response.text[:200])
Expand Down Expand Up @@ -182,7 +183,6 @@ def chunky(items, num_items_per_chunk):
chunk = next_chunk



class DatastoreEncoder(json.JSONEncoder):
# Custon JSON encoder
def default(self, obj):
Expand Down Expand Up @@ -245,7 +245,6 @@ def send_resource_to_datastore(resource, headers, records,
'records': records,
'calculate_record_count': is_it_the_last_chunk}

name = resource.get('name')
url = get_url('datastore_create', ckan_url)
r = requests.post(url,
verify=SSL_VERIFY,
Expand Down Expand Up @@ -293,20 +292,20 @@ def get_resource(resource_id, ckan_url, api_key):

def validate_input(input):
# Especially validate metdata which is provided by the user
if not 'metadata' in input:
if 'metadata' not in input:
raise util.JobError('Metadata missing')

data = input['metadata']

if not 'resource_id' in data:
if 'resource_id' not in data:
raise util.JobError('No id provided.')
if not 'ckan_url' in data:
if 'ckan_url' not in data:
raise util.JobError('No ckan_url provided.')
if not input.get('api_key'):
raise util.JobError('No CKAN API key provided')


@job.async
@job.asynchronous
def push_to_datastore(task_id, input, dry_run=False):
'''Download and parse a resource push its data into CKAN's DataStore.
Expand Down Expand Up @@ -335,8 +334,8 @@ def push_to_datastore(task_id, input, dry_run=False):

try:
resource = get_resource(resource_id, ckan_url, api_key)
except util.JobError, e:
#try again in 5 seconds just incase CKAN is slow at adding resource
except util.JobError as e:
# try again in 5 seconds just incase CKAN is slow at adding resource
time.sleep(5)
resource = get_resource(resource_id, ckan_url, api_key)

Expand All @@ -347,7 +346,7 @@ def push_to_datastore(task_id, input, dry_run=False):

# check scheme
url = resource.get('url')
scheme = urlparse.urlsplit(url).scheme
scheme = urlsplit(url).scheme
if scheme not in ('http', 'https', 'ftp'):
raise util.JobError(
'Only http, https, and ftp resources may be fetched.'
Expand All @@ -367,7 +366,7 @@ def push_to_datastore(task_id, input, dry_run=False):
timeout=DOWNLOAD_TIMEOUT,
verify=SSL_VERIFY,
stream=True, # just gets the headers for now
)
)
response.raise_for_status()

cl = response.headers.get('content-length')
Expand Down Expand Up @@ -417,7 +416,7 @@ def push_to_datastore(task_id, input, dry_run=False):
try:
table_set = messytables.any_tableset(tmp, mimetype=ct, extension=ct)
except messytables.ReadError as e:
## try again with format
# try again with format
tmp.seek(0)
try:
format = resource.get('format')
Expand All @@ -437,7 +436,7 @@ def push_to_datastore(task_id, input, dry_run=False):
for f in existing.get('fields', []) if 'info' in f)

# Some headers might have been converted from strings to floats and such.
headers = [unicode(header) for header in headers]
headers = [str(header) for header in headers]

row_set.register_processor(messytables.headers_processor(headers))
row_set.register_processor(messytables.offset_processor(offset + 1))
Expand Down Expand Up @@ -488,7 +487,7 @@ def row_iterator():
h['info'] = existing_info[h['id']]
# create columns with types user requested
type_override = existing_info[h['id']].get('type_override')
if type_override in _TYPE_MAPPING.values():
if type_override in list(_TYPE_MAPPING.values()):
h['type'] = type_override

logger.info('Determined headers and types: {headers}'.format(
Expand Down
2 changes: 1 addition & 1 deletion datapusher/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import ckanserviceprovider.web as web

import jobs
from . import jobs

# check whether jobs have been imported properly
assert(jobs.push_to_datastore)
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
-r requirements.txt
httpretty==0.9.4
nose==1.2.1
nose
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
argparse
ckanserviceprovider==0.0.9
ckanserviceprovider==0.0.10
html5lib==1.0.1
messytables==0.15.2
python-slugify==1.2.1
certifi
requests[security]==2.20.0
5 changes: 5 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

description="A standalone web service that parses the contents of a CKAN site's data files and pushes them into its DataStore",
long_description=long_description,
long_description_content_type='text/markdown',

# The project's main homepage.
url='https://github.com/ckan/datapusher',
Expand All @@ -42,6 +43,10 @@
# Specify the Python versions you support here. In particular, ensure
# that you indicate whether you support Python 2, Python 3 or both.
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',

],

# What does your project relate to?
Expand Down
88 changes: 44 additions & 44 deletions tests/test_acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def join_static_path(filename):


def get_static_file(filename):
return open(join_static_path(filename)).read()
return open(join_static_path(filename), 'rb').read()


class TestImport(unittest.TestCase):
Expand Down Expand Up @@ -246,14 +246,14 @@ def test_simple_csv(self):

headers, results = jobs.push_to_datastore('fake_id', data, True)
results = list(results)
assert_equal(headers, [{'type': 'timestamp', 'id': u'date'},
{'type': 'numeric', 'id': u'temperature'},
{'type': 'text', 'id': u'place'}])
assert_equal(headers, [{'type': 'timestamp', 'id': 'date'},
{'type': 'numeric', 'id': 'temperature'},
{'type': 'text', 'id': 'place'}])
assert_equal(len(results), 6)
assert_equal(
results[0],
{u'date': datetime.datetime(2011, 1, 1, 0, 0), u'place': u'Galway',
u'temperature': 1})
{'date': datetime.datetime(2011, 1, 1, 0, 0), 'place': 'Galway',
'temperature': 1})

@httpretty.activate
def test_simple_tsv(self):
Expand All @@ -277,13 +277,13 @@ def test_simple_tsv(self):

headers, results = jobs.push_to_datastore('fake_id', data, True)
results = list(results)
assert_equal(headers, [{'type': 'timestamp', 'id': u'date'},
{'type': 'numeric', 'id': u'temperature'},
{'type': 'text', 'id': u'place'}])
assert_equal(headers, [{'type': 'timestamp', 'id': 'date'},
{'type': 'numeric', 'id': 'temperature'},
{'type': 'text', 'id': 'place'}])
assert_equal(len(results), 6)
assert_equal(results[0],
{u'date': datetime.datetime(2011, 1, 1, 0, 0),
u'place': u'Galway', u'temperature': 1})
{'date': datetime.datetime(2011, 1, 1, 0, 0),
'place': 'Galway', 'temperature': 1})

@httpretty.activate
def test_simple_ssv(self):
Expand All @@ -307,13 +307,13 @@ def test_simple_ssv(self):

headers, results = jobs.push_to_datastore('fake_id', data, True)
results = list(results)
assert_equal(headers, [{'type': 'timestamp', 'id': u'date'},
{'type': 'numeric', 'id': u'temperature'},
{'type': 'text', 'id': u'place'}])
assert_equal(headers, [{'type': 'timestamp', 'id': 'date'},
{'type': 'numeric', 'id': 'temperature'},
{'type': 'text', 'id': 'place'}])
assert_equal(len(results), 6)
assert_equal(results[0],
{u'date': datetime.datetime(2011, 1, 1, 0, 0),
u'place': u'Galway', u'temperature': 1})
{'date': datetime.datetime(2011, 1, 1, 0, 0),
'place': 'Galway', 'temperature': 1})

@httpretty.activate
def test_simple_xls(self):
Expand All @@ -336,13 +336,13 @@ def test_simple_xls(self):

headers, results = jobs.push_to_datastore('fake_id', data, True)
results = list(results)
assert_equal(headers, [{'type': 'timestamp', 'id': u'date'},
{'type': 'numeric', 'id': u'temperature'},
{'type': 'text', 'id': u'place'}])
assert_equal(headers, [{'type': 'timestamp', 'id': 'date'},
{'type': 'numeric', 'id': 'temperature'},
{'type': 'text', 'id': 'place'}])
assert_equal(len(results), 6)
assert_equal(results[0],
{u'date': datetime.datetime(2011, 1, 1, 0, 0),
u'place': u'Galway', u'temperature': 1})
{'date': datetime.datetime(2011, 1, 1, 0, 0),
'place': 'Galway', 'temperature': 1})

@httpretty.activate
def test_real_csv(self):
Expand All @@ -365,30 +365,30 @@ def test_real_csv(self):

headers, results = jobs.push_to_datastore('fake_id', data, True)
results = list(results)
assert_equal(headers, [{'type': 'text', 'id': u'Directorate'},
{'type': 'text', 'id': u'Service Area'},
{'type': 'text', 'id': u'Expenditure Category'},
{'type': 'timestamp', 'id': u'Payment Date'},
{'type': 'text', 'id': u'Supplier Name'},
{'type': 'numeric', 'id': u'Internal Ref'},
{'type': 'text', 'id': u'Capital/ Revenue'},
{'type': 'text', 'id': u'Cost Centre'},
assert_equal(headers, [{'type': 'text', 'id': 'Directorate'},
{'type': 'text', 'id': 'Service Area'},
{'type': 'text', 'id': 'Expenditure Category'},
{'type': 'timestamp', 'id': 'Payment Date'},
{'type': 'text', 'id': 'Supplier Name'},
{'type': 'numeric', 'id': 'Internal Ref'},
{'type': 'text', 'id': 'Capital/ Revenue'},
{'type': 'text', 'id': 'Cost Centre'},
{'type': 'text',
'id': u'Cost Centre Description'},
{'type': 'numeric', 'id': u'Grand Total'}])
'id': 'Cost Centre Description'},
{'type': 'numeric', 'id': 'Grand Total'}])
assert_equal(len(results), 230)
assert_equal(results[0],
{u'Directorate': u'Adult and Culture',
u'Service Area': u'Ad Serv-Welfare Rights- ',
u'Expenditure Category': u'Supplies & Services',
u'Cost Centre Description':
u'WELFARE RIGHTS WORKERS M',
u'Capital/ Revenue': u'Revenue',
u'Grand Total': 828.0,
u'Payment Date': datetime.datetime(2011, 10, 24, 0, 0),
u'Internal Ref': 5277184,
u'Cost Centre': u'1MR48',
u'Supplier Name': u'ALBANY OFFICE FURNITURE SOLUTIONS'})
{'Directorate': 'Adult and Culture',
'Service Area': 'Ad Serv-Welfare Rights- ',
'Expenditure Category': 'Supplies & Services',
'Cost Centre Description':
'WELFARE RIGHTS WORKERS M',
'Capital/ Revenue': 'Revenue',
'Grand Total': 828.0,
'Payment Date': datetime.datetime(2011, 10, 24, 0, 0),
'Internal Ref': 5277184,
'Cost Centre': '1MR48',
'Supplier Name': 'ALBANY OFFICE FURNITURE SOLUTIONS'})

@httpretty.activate
def test_weird_header(self):
Expand All @@ -413,8 +413,8 @@ def test_weird_header(self):
results = list(results)
assert_equal(len(headers), 9)
assert_equal(len(results), 82)
assert_equal(headers[0]['id'].strip(), u'1985')
assert_equal(results[1]['1993'].strip(), u'379')
assert_equal(headers[0]['id'].strip(), '1985')
assert_equal(results[1]['1993'].strip(), '379')

@raises(util.JobError)
@httpretty.activate
Expand Down
6 changes: 3 additions & 3 deletions tests/test_mocked.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,17 @@ def register_urls(self):

resource_update_url = 'http://www.ckan.org/api/3/action/resource_update'
httpretty.register_uri(httpretty.POST, resource_update_url,
body=u'{"success": true}',
body='{"success": true}',
content_type="application/json")

datastore_del_url = 'http://www.ckan.org/api/3/action/datastore_delete'
httpretty.register_uri(httpretty.POST, datastore_del_url,
body=u'{"success": true}',
body='{"success": true}',
content_type="application/json")

datastore_url = 'http://www.ckan.org/api/3/action/datastore_create'
httpretty.register_uri(httpretty.POST, datastore_url,
body=u'{"success": true}',
body='{"success": true}',
content_type="application/json")

datastore_check_url = 'http://www.ckan.org/api/3/action/datastore_search'
Expand Down
Loading

0 comments on commit 10ed098

Please sign in to comment.