Skip to content

Commit 40a8187

Browse files
authored
Merge pull request #1773 from getredash/patches
Split refresh schemas into separate tasks and add a timeout.
2 parents 3650617 + 3807510 commit 40a8187

File tree

3 files changed

+44
-34
lines changed

3 files changed

+44
-34
lines changed

redash/models.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,20 @@ def create_with_group(cls, *args, **kwargs):
479479
db.session.add_all([data_source, data_source_group])
480480
return data_source
481481

482+
@classmethod
483+
def all(cls, org, group_ids=None):
484+
data_sources = cls.query.filter(cls.org == org).order_by(cls.id.asc())
485+
486+
if group_ids:
487+
data_sources = data_sources.join(DataSourceGroup).filter(
488+
DataSourceGroup.group_id.in_(group_ids))
489+
490+
return data_sources
491+
492+
@classmethod
493+
def get_by_id(cls, _id):
494+
return cls.query.filter(cls.id == _id).one()
495+
482496
def get_schema(self, refresh=False):
483497
key = "data_source:schema:{}".format(self.id)
484498

@@ -536,24 +550,10 @@ def update_group_permission(self, group, view_only):
536550
def query_runner(self):
537551
return get_query_runner(self.type, self.options)
538552

539-
@classmethod
540-
def get_by_id(cls, _id):
541-
return cls.query.filter(cls.id == _id).one()
542-
543553
@classmethod
544554
def get_by_name(cls, name):
545555
return cls.query.filter(cls.name == name).one()
546556

547-
@classmethod
548-
def all(cls, org, group_ids=None):
549-
data_sources = cls.query.filter(cls.org == org).order_by(cls.id.asc())
550-
551-
if group_ids:
552-
data_sources = data_sources.join(DataSourceGroup).filter(
553-
DataSourceGroup.group_id.in_(group_ids))
554-
555-
return data_sources
556-
557557
#XXX examine call sites to see if a regular SQLA collection would work better
558558
@property
559559
def groups(self):

redash/tasks/queries.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import redis
77

8+
from celery.exceptions import SoftTimeLimitExceeded
89
from celery.result import AsyncResult
910
from celery.utils.log import get_task_logger
1011
from redash import models, redis_connection, settings, statsd_client, utils
@@ -234,9 +235,7 @@ def enqueue_query(query, data_source, user_id, scheduled_query=None, metadata={}
234235
queue_name = data_source.queue_name
235236
scheduled_query_id = None
236237

237-
result = execute_query.apply_async(args=(
238-
query, data_source.id, metadata, user_id,
239-
scheduled_query_id),
238+
result = execute_query.apply_async(args=(query, data_source.id, metadata, user_id, scheduled_query_id),
240239
queue=queue_name)
241240
job = QueryTask(async_result=result)
242241
tracker = QueryTaskTracker.create(
@@ -342,14 +341,30 @@ def cleanup_query_results():
342341
logger.info("Deleted %d unused query results.", deleted_count)
343342

344343

344+
@celery.task(name="redash.tasks.refresh_schema", soft_time_limit=60)
345+
def refresh_schema(data_source_id):
346+
ds = models.DataSource.get_by_id(data_source_id)
347+
logger.info(u"task=refresh_schema state=start ds_id=%s", ds.id)
348+
start_time = time.time()
349+
try:
350+
ds.get_schema(refresh=True)
351+
logger.info(u"task=refresh_schema state=finished ds_id=%s runtime=%.2f", ds.id, time.time() - start_time)
352+
statsd_client.incr('refresh_schema.success')
353+
except SoftTimeLimitExceeded:
354+
logger.info(u"task=refresh_schema state=timeout ds_id=%s runtime=%.2f", ds.id, time.time() - start_time)
355+
statsd_client.incr('refresh_schema.timeout')
356+
except Exception:
357+
logger.warning(u"Failed refreshing schema for the data source: %s", ds.name, exc_info=1)
358+
statsd_client.incr('refresh_schema.error')
359+
logger.info(u"task=refresh_schema state=failed ds_id=%s runtime=%.2f", ds.id, time.time() - start_time)
360+
361+
345362
@celery.task(name="redash.tasks.refresh_schemas")
346363
def refresh_schemas():
347364
"""
348365
Refreshes the data sources schemas.
349366
"""
350-
351367
blacklist = [int(ds_id) for ds_id in redis_connection.smembers('data_sources:schema:blacklist') if ds_id]
352-
353368
global_start_time = time.time()
354369

355370
logger.info(u"task=refresh_schemas state=start")
@@ -360,14 +375,7 @@ def refresh_schemas():
360375
elif ds.id in blacklist:
361376
logger.info(u"task=refresh_schema state=skip ds_id=%s reason=blacklist", ds.id)
362377
else:
363-
logger.info(u"task=refresh_schema state=start ds_id=%s", ds.id)
364-
start_time = time.time()
365-
try:
366-
ds.get_schema(refresh=True)
367-
logger.info(u"task=refresh_schema state=finished ds_id=%s runtime=%.2f", ds.id, time.time() - start_time)
368-
except Exception:
369-
logger.exception(u"Failed refreshing schema for the data source: %s", ds.name)
370-
logger.info(u"task=refresh_schema state=failed ds_id=%s runtime=%.2f", ds.id, time.time() - start_time)
378+
refresh_schema.apply_async(args=(ds.id,), queue="schemas")
371379

372380
logger.info(u"task=refresh_schemas state=finish total_runtime=%.2f", time.time() - global_start_time)
373381

tests/tasks/test_refresh_schemas.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,27 @@
11
import datetime
2-
from mock import patch, call, ANY
2+
3+
from mock import ANY, call, patch
34
from tests import BaseTestCase
5+
46
from redash.tasks import refresh_schemas
57

68

79
class TestRefreshSchemas(BaseTestCase):
810
def test_calls_refresh_of_all_data_sources(self):
911
self.factory.data_source # trigger creation
10-
with patch('redash.models.DataSource.get_schema') as get_schema:
12+
with patch('redash.tasks.queries.refresh_schema.apply_async') as refresh_job:
1113
refresh_schemas()
12-
get_schema.assert_called_with(refresh=True)
14+
refresh_job.assert_called()
1315

1416
def test_skips_paused_data_sources(self):
1517
self.factory.data_source.pause()
1618

17-
with patch('redash.models.DataSource.get_schema') as get_schema:
19+
with patch('redash.tasks.queries.refresh_schema.apply_async') as refresh_job:
1820
refresh_schemas()
19-
get_schema.assert_not_called()
21+
refresh_job.assert_not_called()
2022

2123
self.factory.data_source.resume()
2224

23-
with patch('redash.models.DataSource.get_schema') as get_schema:
25+
with patch('redash.tasks.queries.refresh_schema.apply_async') as refresh_job:
2426
refresh_schemas()
25-
get_schema.assert_called_with(refresh=True)
27+
refresh_job.assert_called()

0 commit comments

Comments
 (0)