Skip to content

Commit

Permalink
[QOLDEV-490] extend job timeout if type guessing is needed
Browse files Browse the repository at this point in the history
- Also refactor: move the function to check if a datastore entry exists, from the job module to utils,
so different modules can check it.
  • Loading branch information
ThrawnCA committed Aug 1, 2023
1 parent bef37d2 commit 9200968
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 22 deletions.
4 changes: 4 additions & 0 deletions ckanext/xloader/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ def xloader_submit(context, data_dict):
}
}
timeout = config.get('ckanext.xloader.job_timeout', '3600')
if not utils.datastore_resource_exists(res_id):
# Expand timeout for resources that have to be type-guessed
timeout = timeout * 3

try:
job = enqueue_job(
jobs.xloader_data_into_datastore, [data], rq_kwargs=dict(timeout=timeout)
Expand Down
14 changes: 5 additions & 9 deletions ckanext/xloader/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@
from ckan import model
from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config

from . import loader
from . import db
from . import db, loader
from .job_exceptions import JobError, HTTPError, DataTooBigError, FileCouldNotBeLoadedError
from .utils import set_resource_metadata
from .utils import set_resource_metadata, should_guess_types

try:
from ckan.lib.api_token import get_user_from_token
Expand Down Expand Up @@ -206,13 +205,10 @@ def tabulator_load():
logger.info('Loading CSV')
# If ckanext.xloader.use_type_guessing is not configured, fall back to
# deprecated ckanext.xloader.just_load_with_messytables
use_type_guessing = asbool(config.get(
'ckanext.xloader.use_type_guessing', config.get(
'ckanext.xloader.just_load_with_messytables', False)))
logger.info("'use_type_guessing' mode is: %s",
use_type_guessing)
use_type_guessing = should_guess_types(resource['id'])
logger.info("'use_type_guessing' mode is: %s", use_type_guessing)
try:
if use_type_guessing and not loader.datastore_resource_exists(resource['id']):
if use_type_guessing:
tabulator_load()
else:
try:
Expand Down
13 changes: 1 addition & 12 deletions ckanext/xloader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from .job_exceptions import FileCouldNotBeLoadedError, LoaderError
from .parser import CSV_SAMPLE_LINES, XloaderCSVParser
from .utils import headers_guess, type_guess
from .utils import datastore_resource_exists, headers_guess, type_guess

from ckan.plugins.toolkit import config

Expand Down Expand Up @@ -402,17 +402,6 @@ def send_resource_to_datastore(resource_id, headers, records):
.format(str(e)))


def datastore_resource_exists(resource_id):
from ckan import model
context = {'model': model, 'ignore_auth': True}
try:
response = p.toolkit.get_action('datastore_search')(context, dict(
id=resource_id, limit=0))
except p.toolkit.ObjectNotFound:
return False
return response or {'fields': []}


def delete_datastore_resource(resource_id):
from ckan import model
context = {'model': model, 'user': '', 'ignore_auth': True}
Expand Down
19 changes: 18 additions & 1 deletion ckanext/xloader/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from decimal import Decimal

import ckan.plugins as p
from ckan.plugins.toolkit import config
from ckan.plugins.toolkit import asbool, config

# resource.formats accepted by ckanext-xloader. Must be lowercase here.
DEFAULT_FORMATS = [
Expand Down Expand Up @@ -245,3 +245,20 @@ def type_guess(rows, types=TYPES, strict=False):
guesses_tuples = [(t, guess[t]) for t in types if t in guess]
_columns.append(max(guesses_tuples, key=lambda t_n: t_n[1])[0])
return _columns


def datastore_resource_exists(resource_id):
context = {'model': model, 'ignore_auth': True}
try:
response = p.toolkit.get_action('datastore_search')(context, dict(
id=resource_id, limit=0))
except p.toolkit.ObjectNotFound:
return False
return response or {'fields': []}


def should_guess_types(resource_id):
return asbool(
config.get('ckanext.xloader.use_type_guessing', config.get(
'ckanext.xloader.just_load_with_messytables', False))) \
and datastore_resource_exists(resource_id)

0 comments on commit 9200968

Please sign in to comment.