|
1 | 1 | import logging
|
| 2 | +import operator |
| 3 | +import time |
| 4 | +from datetime import datetime |
2 | 5 |
|
3 | 6 | from civis import APIClient
|
4 | 7 | from civis.futures import CivisFuture
|
5 | 8 |
|
6 | 9 | log = logging.getLogger(__name__)
|
7 | 10 |
|
| 11 | +_FOLLOW_POLL_INTERVAL_SEC = 5 |
| 12 | +_LOG_REFETCH_CUTOFF_SECONDS = 300 |
| 13 | +_LOG_REFETCH_COUNT = 100 |
| 14 | +_LOGS_PER_QUERY = 250 |
| 15 | + |
8 | 16 |
|
9 | 17 | def run_job(job_id, client=None, polling_interval=None):
|
10 | 18 | """Run a job.
|
@@ -96,3 +104,144 @@ def run_template(id, arguments, JSONValue=False, client=None):
|
96 | 104 | else:
|
97 | 105 | file_ids = {o.name: o.object_id for o in outputs}
|
98 | 106 | return file_ids
|
| 107 | + |
| 108 | + |
| 109 | +def _timestamp_from_iso_str(s): |
| 110 | + """Return an integer POSIX timestamp for a given ISO date string. |
| 111 | +
|
| 112 | + Note: Until Python 3.11, datetime.fromisoformat doesn't work |
| 113 | + with the format returned by Civis Platform. |
| 114 | + """ |
| 115 | + try: |
| 116 | + return datetime.fromisoformat(s).timestamp() |
| 117 | + except ValueError: |
| 118 | + try: |
| 119 | + # This is the format that Civis Platform returns. |
| 120 | + return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S.%f%z").timestamp() |
| 121 | + except ValueError: |
| 122 | + # Another format, just in case. |
| 123 | + return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S%z").timestamp() |
| 124 | + |
| 125 | + |
| 126 | +def _compute_effective_max_log_id(logs): |
| 127 | + """Find a max log ID use in order to avoid missing late messages. |
| 128 | +
|
| 129 | + The order of log IDs may not be consistent with "created at" times |
| 130 | + since log entries are created by Civis Platform as well as the code |
| 131 | + for the job itself. This function looks through recent logs |
| 132 | + and finds a maximum ID that is at least as old as a set cutoff period, |
| 133 | + so that messages with lower IDs that show up a bit late won't be skipped. |
| 134 | + With this, it is still theoretically possible but extremely unlikely |
| 135 | + for some late log messages to be skipped in the job_logs function. |
| 136 | + """ |
| 137 | + if not logs: |
| 138 | + return 0 |
| 139 | + |
| 140 | + sorted_logs = sorted(logs, key=operator.itemgetter("id")) |
| 141 | + |
| 142 | + max_created_at_timestamp = _timestamp_from_iso_str(sorted_logs[-1]["createdAt"]) |
| 143 | + cutoff = time.time() - _LOG_REFETCH_CUTOFF_SECONDS |
| 144 | + if max_created_at_timestamp < cutoff: |
| 145 | + return sorted_logs[-1]["id"] |
| 146 | + elif len(sorted_logs) >= _LOG_REFETCH_COUNT: |
| 147 | + return sorted_logs[-_LOG_REFETCH_COUNT]["id"] |
| 148 | + |
| 149 | + return 0 |
| 150 | + |
| 151 | + |
| 152 | +def _job_finished_past_timeout(job_id, run_id, finished_timeout, raw_client): |
| 153 | + """Return true if the run finished more than so many seconds ago.""" |
| 154 | + if finished_timeout is None: |
| 155 | + return False |
| 156 | + |
| 157 | + run = raw_client.jobs.get_runs(job_id, run_id) |
| 158 | + finished_at = run.json()["finishedAt"] |
| 159 | + if finished_at is None: |
| 160 | + return False |
| 161 | + finished_at_ts = _timestamp_from_iso_str(finished_at) |
| 162 | + result = finished_at_ts < time.time() - finished_timeout |
| 163 | + return result |
| 164 | + |
| 165 | + |
| 166 | +def job_logs(job_id, run_id=None, finished_timeout=None): |
| 167 | + """Return a generator of log message dictionaries for a given run. |
| 168 | +
|
| 169 | + Parameters |
| 170 | + ---------- |
| 171 | + job_id : int |
| 172 | + The ID of the job to retrieve log message for. |
| 173 | + run_id : int or None |
| 174 | + The ID of the run to retrieve log messages for. |
| 175 | + If None, the ID for the most recent run will be used. |
| 176 | + finished_timeout: int or None |
| 177 | + If not None, then this function will return once the run has |
| 178 | + been finished for the specified number of seconds. |
| 179 | + If None, then this function will wait until the API says there |
| 180 | + will be no more new log messages, which may take a few minutes. |
| 181 | + A timeout of 30-60 seconds is usually enough to retrieve all |
| 182 | + log messages. |
| 183 | +
|
| 184 | + Yields |
| 185 | + ------ |
| 186 | + dict |
| 187 | + A log message dictionary with "message", "createdAt" and other attributes |
| 188 | + provided by the job logs endpoint. Note that this will block execution |
| 189 | + until the job has stopped and all log messages are retrieved. |
| 190 | + """ |
| 191 | + # The return_type for the client is "raw" in order to check |
| 192 | + # the "civis-cache-control" and "civis-max-id" headers when |
| 193 | + # list_runs_logs returns an empty list of new messages. |
| 194 | + # Caching of the endpoint information in |
| 195 | + # civis.resources.generate_classes_maybe_cached avoids extra API calls. |
| 196 | + raw_client = APIClient(return_type="raw") |
| 197 | + |
| 198 | + if run_id is None: |
| 199 | + run_id = raw_client.jobs.list_runs( |
| 200 | + job_id, limit=1, order="id", order_dir="desc" |
| 201 | + ).json()[0]["id"] |
| 202 | + |
| 203 | + local_max_log_id = 0 |
| 204 | + continue_polling = True |
| 205 | + |
| 206 | + known_log_ids = set() |
| 207 | + |
| 208 | + while continue_polling: |
| 209 | + # This call gets a limited number of log messages since last_id, |
| 210 | + # ordered by log ID. |
| 211 | + response = raw_client.jobs.list_runs_logs( |
| 212 | + job_id, |
| 213 | + run_id, |
| 214 | + last_id=local_max_log_id, |
| 215 | + limit=_LOGS_PER_QUERY, |
| 216 | + ) |
| 217 | + if "civis-max-id" in response.headers: |
| 218 | + remote_max_log_id = int(response.headers["civis-max-id"]) |
| 219 | + else: |
| 220 | + # Platform hasn't seen any logs at all yet |
| 221 | + remote_max_log_id = None |
| 222 | + logs = response.json() |
| 223 | + if logs: |
| 224 | + local_max_log_id = max(log["id"] for log in logs) |
| 225 | + logs.sort(key=operator.itemgetter("createdAt", "id")) |
| 226 | + for log in logs: |
| 227 | + if log["id"] in known_log_ids: |
| 228 | + continue |
| 229 | + known_log_ids.add(log["id"]) |
| 230 | + yield log |
| 231 | + |
| 232 | + log_finished = response.headers["civis-cache-control"] != "no-store" |
| 233 | + |
| 234 | + if remote_max_log_id is None: |
| 235 | + remote_has_more_logs_to_get_now = False |
| 236 | + elif local_max_log_id == remote_max_log_id: |
| 237 | + remote_has_more_logs_to_get_now = False |
| 238 | + local_max_log_id = _compute_effective_max_log_id(logs) |
| 239 | + if log_finished or _job_finished_past_timeout( |
| 240 | + job_id, run_id, finished_timeout, raw_client |
| 241 | + ): |
| 242 | + continue_polling = False |
| 243 | + else: |
| 244 | + remote_has_more_logs_to_get_now = True |
| 245 | + |
| 246 | + if continue_polling and not remote_has_more_logs_to_get_now: |
| 247 | + time.sleep(_FOLLOW_POLL_INTERVAL_SEC) |
0 commit comments