diff --git a/doc/api_endpoints.md b/doc/api_endpoints.md index c00145e..fe56965 100644 --- a/doc/api_endpoints.md +++ b/doc/api_endpoints.md @@ -35,6 +35,31 @@ with id OEIS_ID. Since some sequence values correspond to extremely large numbers, strings are used to avoid the limitations of any particular numeric datatype. +### URL: `api/get_oeis_chunk//` + +Similar to the `get_oeis_values` endpoint, but allows some level of random +access. Every sequence has a "chunk size" that can be obtained via the +`get_oeis_header` endpoint described below. This endpoint returns (just) the +sequence values in the CHUNK_NUMBERth chunk, in other words, all of the entries +with index no smaller than CHUNK_NUMBER×chunk_size and less than +(CHUNK_NUMBER+1)×(chunk_size). Note that CHUNK_NUMBER is allowed to be negative. + +If you are running the server to test it on your local host, a full URL would +be http://127.0.0.1:5000/api/get_oeis_chunk/A000030/1 which will return the +first digits of the numbers 1024 through 2047 (so 976 '1's followed by 48 '2's). + +#### Key: name + +A string giving the official name of the OEIS sequence with id OEIS_ID, +if already known to backscope, or a temporary name if not. + +#### Key: values + +An array of _strings_ (of digits) giving the first COUNT values of the sequence +with id OEIS_ID. Since some sequence values correspond to extremely large +numbers, strings are used to avoid the limitations of any particular numeric +datatype. + ### URL: `api/get_oeis_name_and_values/` This one is potentially a bit slower than the above URL, as it may make @@ -76,6 +101,34 @@ OEIS text "xref" records for the sequence with id OEIS_ID. An array of strings giving all OEIS ids that mention the given OEIS_ID. +### URL: `api/get_oeis_header/` + +A lighter-weight endpoint that provides some summary information about the +sequence designated by . If you are running the server on your local +machine, a full URL would be http://127.0.0.1:5000/api/get_oeis_header/A028444 +which will show the full name of the Busy Beaver sequence, the first and last +indices of the sequence as avaialable through oeis.org, and the chunk size that +will be used if values are requested in chunks. + +#### Key: name + +A string giving the official name of the OEIS sequence with id OEIS_ID. + +#### Key: first + +The index of the first entry in the sequence available for download from +oeis.org. + +#### Key: last + +The index of the first entry in the sequence available for download from +oeis.org. + +#### Key: chunk_size + +The maximum number of entries provided per chunk if chunked access is used +to download entries for this sequence. + ### URL: `api/get_oeis_factors//` This could take a long time. It internally does everything that the endpoint diff --git a/flaskr/nscope/models.py b/flaskr/nscope/models.py index fee9205..0f8e965 100644 --- a/flaskr/nscope/models.py +++ b/flaskr/nscope/models.py @@ -35,7 +35,13 @@ class Sequence(db.Model): name = db.Column(db.String, unique=False, nullable=True) # The following is called the "offset" in the OEIS, but that is a # Postgres reserved word, so we use a different name. - shift = db.Column(db.Integer, unique=False, nullable=False, default=0) + # Moreover, it is a string, not a number, because some OEIS sequences have + # shifts too large to fit in an integer. + shift = db.Column(db.String, unique=False, nullable=True) + # Similarly, the last index must be a string as well. + last_index = db.Column(db.String, unique=False, nullable=True) + # Should still work to keep values in an array, since there can't be so + # many of them, and we store the shift separately values = db.Column(db.ARRAY(db.String), unique=False, nullable=True) values_requested = db.Column(db.Boolean, nullable=False, default=False) raw_refs = db.Column(db.String, unique=False, nullable=True) diff --git a/flaskr/nscope/test/test_get_oeis_chunk.py b/flaskr/nscope/test/test_get_oeis_chunk.py new file mode 100644 index 0000000..b9452fd --- /dev/null +++ b/flaskr/nscope/test/test_get_oeis_chunk.py @@ -0,0 +1,369 @@ +import unittest +import flaskr.nscope.test.abstract_endpoint_test as abstract_endpoint_test + +a93178 = {str(i): str(1 if i%2 == 0 else i) for i in range(1024)} + +class TestGetOEISChunkSmallZero(abstract_endpoint_test.AbstractEndpointTest): + endpoint = 'http://localhost:5000/api/get_oeis_chunk/A093178/0' + + # we choose A093178 because: + # - it has zero shift, to make sure that is in chunk 0 + # - it currently has small values and few references, which speeds up the + # background work triggered by the request + # - it is one for even indices and i for odd indices, so easy to compute the + # (long) expected value + expected_response = { + 'id': 'A093178', + 'name': 'A093178 [name not yet loaded]', + 'values': a93178 + } + +class TestGetOEISChunkSmallOne(abstract_endpoint_test.AbstractEndpointTest): + endpoint = 'http://localhost:5000/api/get_oeis_chunk/A160480/0' + + # we choose A160480 because: + # - it has a positive shift, so we can make sure that chunk 0 starts at + # positive index + # - it currently has small values and few references, which speeds up the + # background work triggered by the request + expected_response = { + 'id': 'A160480', + 'name': ' A160480 (b-file synthesized from sequence entry)', + 'values': { + '2': '-1', + '3': '-11', + '4': '1', + '5': '-299', + '6': '36', + '7': '-1', + '8': '-15371', + '9': '2063', + '10': '-85', + '11': '1', + '12': '-1285371', + '13': '182474', + '14': '-8948', + '15': '166', + '16': '-1', + '17': '-159158691', + '18': '23364725', + '19': '-1265182', + '20': '29034', + '21': '-287', + '22': '1', + '23': '-27376820379', + '24': '4107797216', + '25': '-237180483', + '26': '6171928', + '27': '-77537', + '28': '456', + '29': '-1' + } + } + +class TestGetOEISChunkNegative(abstract_endpoint_test.AbstractEndpointTest): + endpoint = 'http://localhost:5000/api/get_oeis_chunk/A078302/-1' + + # we choose A078302 because: + # - it has a negative shift, so we can make sure the values come out in + # chunk -1 + # - it currently has small values and few references, which speeds up the + # background work triggered by the request + expected_response = { + 'id': 'A078302', + 'name': ' A078302 (b-file synthesized from sequence entry)', + 'values': { + '-43': '5', + '-42': '3', + '-41': '9', + '-40': '1' + } + } + +class TestGetOEISChunkNegEmpty(abstract_endpoint_test.AbstractEndpointTest): + endpoint = 'http://localhost:5000/api/get_oeis_chunk/A078302/0' + + # we choose A078302 because: + # - it has a negative shift, and no values with positive indices, so + # we can make sure that chunk 0 is empty + # - it currently has small values and few references, which speeds up the + # background work triggered by the request + expected_response = { + 'id': 'A078302', + 'name': ' A078302 (b-file synthesized from sequence entry)', + 'values': {} + } + +class TestGetOEISChunkBig(abstract_endpoint_test.AbstractEndpointTest): + endpoint = 'http://localhost:5000/api/get_oeis_chunk/A241298/361028' + + # we choose A241298 because: + # - it has a large positive shift, so we can make sure that the values show + # up in the correct chunk + # - it currently has small values and few references, which speeds up the + # background work triggered by the request + expected_response = { + 'id': 'A241298', + 'name': ' A241298 (b-file synthesized from sequence entry)', + 'values': { + '369693100': '4', + '369693101': '2', + '369693102': '8', + '369693103': '1', + '369693104': '2', + '369693105': '4', + '369693106': '7', + '369693107': '7', + '369693108': '3', + '369693109': '1', + '369693110': '7', + '369693111': '5', + '369693112': '7', + '369693113': '4', + '369693114': '7', + '369693115': '0', + '369693116': '4', + '369693117': '8', + '369693118': '0', + '369693119': '3', + '369693120': '6', + '369693121': '9', + '369693122': '8', + '369693123': '7', + '369693124': '1', + '369693125': '1', + '369693126': '5', + '369693127': '9', + '369693128': '3', + '369693129': '0', + '369693130': '5', + '369693131': '6', + '369693132': '3', + '369693133': '5', + '369693134': '2', + '369693135': '1', + '369693136': '3', + '369693137': '3', + '369693138': '9', + '369693139': '0', + '369693140': '5', + '369693141': '5', + '369693142': '4', + '369693143': '8', + '369693144': '2', + '369693145': '2', + '369693146': '4', + '369693147': '1', + '369693148': '4', + '369693149': '4', + '369693150': '3', + '369693151': '5', + '369693152': '1', + '369693153': '4', + '369693154': '1', + '369693155': '7', + '369693156': '4', + '369693157': '7', + '369693158': '5', + '369693159': '3', + '369693160': '7', + '369693161': '2', + '369693162': '3', + '369693163': '0', + '369693164': '5', + '369693165': '3', + '369693166': '5', + '369693167': '2', + '369693168': '3', + '369693169': '8', + '369693170': '8', + '369693171': '7', + '369693172': '4', + '369693173': '7', + '369693174': '1', + '369693175': '7', + '369693176': '3', + '369693177': '5', + '369693178': '0', + '369693179': '4', + '369693180': '8', + '369693181': '3', + '369693182': '5', + '369693183': '3', + '369693184': '1', + '369693185': '9', + '369693186': '3', + '369693187': '6', + '369693188': '6', + '369693189': '5', + '369693190': '2', + '369693191': '9', + '369693192': '9', + '369693193': '4', + '369693194': '3', + '369693195': '2', + '369693196': '0', + '369693197': '3', + '369693198': '3', + '369693199': '3', + '369693200': '7', + '369693201': '5', + '369693202': '0', + '369693203': '6', + '369693204': '0' + } + } + +class TestGetOEISChunkBigWrong(abstract_endpoint_test.AbstractEndpointTest): + endpoint = 'http://localhost:5000/api/get_oeis_chunk/A241298/17' + + # we choose A241298 because: + # - it has a large positive shift, so we can make sure that the values show + # up in the correct chunk (and nowhere else) + # - it currently has small values and few references, which speeds up the + # background work triggered by the request + expected_response = { + 'id': 'A241298', + 'name': ' A241298 (b-file synthesized from sequence entry)', + 'values': {} + } + +class TestGetOEISChunkHugeFirst(abstract_endpoint_test.AbstractEndpointTest): + endpoint = 'http://localhost:5000/api/get_oeis_chunk/A241292/3553061171' + + # we choose A241292 because: + # - it has a positive shift larger than JavaScript's MAX_SAFE_INTEGER, so + # we can make sure that the values still show up in the correct chunk + # - its values are by fortune split across two chunks + # - it currently has small values and few references, which speeds up the + # background work triggered by the request + expected_response = { + 'id': 'A241292', + 'name': ' A241292 (b-file synthesized from sequence entry)', + 'values': { + '3638334640025': '1', + '3638334640026': '2', + '3638334640027': '5', + '3638334640028': '8', + '3638334640029': '0', + '3638334640030': '1', + '3638334640031': '4', + '3638334640032': '2', + '3638334640033': '9', + '3638334640034': '0', + '3638334640035': '6', + '3638334640036': '2', + '3638334640037': '7', + '3638334640038': '4', + '3638334640039': '9', + '3638334640040': '1', + '3638334640041': '3', + '3638334640042': '1', + '3638334640043': '7', + '3638334640044': '8', + '3638334640045': '6', + '3638334640046': '0', + '3638334640047': '3', + '3638334640048': '9', + '3638334640049': '0', + '3638334640050': '6', + '3638334640051': '9', + '3638334640052': '8', + '3638334640053': '2', + '3638334640054': '0', + '3638334640055': '3', + '3638334640056': '2', + '3638334640057': '8', + '3638334640058': '1', + '3638334640059': '2', + '3638334640060': '1', + '3638334640061': '5', + '3638334640062': '5', + '3638334640063': '1', + '3638334640064': '8', + '3638334640065': '0', + '3638334640066': '4', + '3638334640067': '6', + '3638334640068': '7', + '3638334640069': '1', + '3638334640070': '4', + '3638334640071': '3', + '3638334640072': '1', + '3638334640073': '6', + '3638334640074': '5', + '3638334640075': '9', + '3638334640076': '6', + '3638334640077': '0', + '3638334640078': '1', + '3638334640079': '5', + '3638334640080': '1', + '3638334640081': '8', + '3638334640082': '9', + '3638334640083': '6', + '3638334640084': '7', + '3638334640085': '4', + '3638334640086': '9', + '3638334640087': '4', + '3638334640088': '4', + '3638334640089': '3', + '3638334640090': '8', + '3638334640091': '1', + '3638334640092': '2', + '3638334640093': '1', + '3638334640094': '1', + '3638334640095': '0', + '3638334640096': '1', + '3638334640097': '1', + '3638334640098': '3', + '3638334640099': '0', + '3638334640100': '0', + '3638334640101': '0', + '3638334640102': '1', + '3638334640103': '7', + '3638334640104': '7', + '3638334640105': '8', + '3638334640106': '5', + '3638334640107': '3', + '3638334640108': '1', + '3638334640109': '0', + '3638334640110': '8', + '3638334640111': '0', + '3638334640112': '3', + '3638334640113': '9', + '3638334640114': '0', + '3638334640115': '3', + '3638334640116': '2', + '3638334640117': '9', + '3638334640118': '6', + '3638334640119': '2', + '3638334640120': '4', + '3638334640121': '0', + '3638334640122': '1', + '3638334640123': '1', + '3638334640124': '5', + '3638334640125': '6', + '3638334640126': '9', + '3638334640127': '5' + } + } + +class TestGetOEISChunkHugeRest(abstract_endpoint_test.AbstractEndpointTest): + endpoint = 'http://localhost:5000/api/get_oeis_chunk/A241292/3553061172' + + # we choose A241292 because: + # - it has a positive shift larger than JavaScript's MAX_SAFE_INTEGER, so + # we can make sure that the values still show up in the correct chunk + # - its values are by fortune split across two chunks + # - it currently has small values and few references, which speeds up the + # background work triggered by the request + expected_response = { + 'id': 'A241292', + 'name': ' A241292 (b-file synthesized from sequence entry)', + 'values': { + '3638334640128': '8', + '3638334640129': '5' + } + } + +if __name__ == "__main__": + unittest.main() diff --git a/flaskr/nscope/test/test_get_oeis_header.py b/flaskr/nscope/test/test_get_oeis_header.py new file mode 100644 index 0000000..ab4e5ef --- /dev/null +++ b/flaskr/nscope/test/test_get_oeis_header.py @@ -0,0 +1,36 @@ +import unittest +import flaskr.nscope.test.abstract_endpoint_test as abstract_endpoint_test + + +class TestGetOEISHeadersPositive(abstract_endpoint_test.AbstractEndpointTest): + endpoint = 'http://localhost:5000/api/get_oeis_header/A003173' + + # we choose A003173 because: + # - it has positive shift + # - it has small values and few references, which speeds up the + # background work triggered by the request + expected_response = { + 'id': 'A003173', + 'name': 'Heegner numbers: imaginary quadratic fields with unique factorization (or class number 1).', + 'first': '1', + 'last': '9', + 'chunk_size': 1024 + } + +class TestGetOEISHeadersNegative(abstract_endpoint_test.AbstractEndpointTest): + endpoint = 'http://localhost:5000/api/get_oeis_header/A000521' + + # we choose A000521 because: + # - it has a negative shift + # - it has many long values, so a full data request would be very large + expected_response = { + 'id': 'A000521', + 'name': 'Coefficients of modular function j as power series in q = e^(2 Pi i t). Another name is the elliptic modular invariant J(tau).', + 'first': '-1', + 'last': '10000', + 'chunk_size': 1024 + } + + +if __name__ == "__main__": + unittest.main() diff --git a/flaskr/nscope/test/test_wrong_response_type.py b/flaskr/nscope/test/test_wrong_response_type.py index a67109c..f247cc3 100644 --- a/flaskr/nscope/test/test_wrong_response_type.py +++ b/flaskr/nscope/test/test_wrong_response_type.py @@ -15,9 +15,9 @@ class TestUnavailableValuesInNameAndValues(abstract_mock_oeis_test.AbstractMockO class TestUnavailableSearchInNameAndValues(abstract_mock_oeis_test.AbstractMockOEISTest): search_available = False endpoint = 'http://localhost:5000/api/get_oeis_name_and_values/A153080' - expected_response = 'Error: 503 Server Error: SERVICE UNAVAILABLE for url: http://localhost:5001/search?q=id%3AA153080&fmt=json' + expected_response = 'Error: 400 Client Error: BAD REQUEST for url: http://localhost:5001/search?q=id:A153080&fmt=text' class TestUnavailableSearch(abstract_mock_oeis_test.AbstractMockOEISTest): search_available = False endpoint = 'http://localhost:5000/api/get_oeis_metadata/A153080' - expected_response = 'Error: 503 Server Error: SERVICE UNAVAILABLE for url: http://localhost:5001/search?q=A153080&fmt=json' \ No newline at end of file + expected_response = 'Error: 400 Client Error: BAD REQUEST for url: http://localhost:5001/search?q=id:A153080&fmt=text' diff --git a/flaskr/nscope/views.py b/flaskr/nscope/views.py index 0bfcbd3..c301f3f 100644 --- a/flaskr/nscope/views.py +++ b/flaskr/nscope/views.py @@ -118,7 +118,11 @@ def fetch_metadata(oeis_id): if seq.backrefs is not None: # We've cached all the metadata already, so we just return it return seq + # Start with the basic data on the main page: + seq = fetch_oeis_name_etc(oeis_id) + if isinstance(seq, Exception): return seq + # Now hunt for the backrefs our_req_time = time.time_ns() last_req_time = seq.meta_req_time if last_req_time is not None: @@ -167,14 +171,7 @@ def fetch_metadata(oeis_id): saw = 0 while (saw < ref_count): for result in search_response['results']: - if result['number'] == target_number: - # Write the sequence's name and raw references as soon as we - # find them - if seq.raw_refs is None: - seq.name = result['name'] - seq.raw_refs = "\n".join(result.get('xref', [])) - db.session.commit() - else: + if result['number'] != target_number: # Don't add self-backref backrefs.append(oeis_a_id(result['number'])) saw += 1 if saw < ref_count: @@ -184,6 +181,7 @@ def fetch_metadata(oeis_id): return search_response if search_response['results'] == None: break + # Do we need to break out of this loop if we fail to make progress? seq.backrefs = backrefs else: # We didn't find any metadata @@ -279,10 +277,11 @@ def fetch_values(oeis_id): seq_vals[index] = column[1] if last < first: return IndexError(f"No terms found for ID '{oeis_id}'.") - seq.values = [seq_vals[i] for i in range(first,last+1)] + seq.values = [seq_vals[i] for i in range(first, last+1)] if not seq.name: seq.name = name or placeholder_name(oeis_id) - seq.shift = first + seq.shift = str(first) + seq.last_index = str(last) db.session.commit() return seq @@ -429,10 +428,71 @@ def get_oeis_values(oeis_id, num_elements): wants = int(num_elements) if wants and wants < len(raw_vals): raw_vals = raw_vals[0:wants] - vals = {(i+seq.shift):raw_vals[i] for i in range(len(raw_vals))} + first = int(seq.shift) + vals = {(i+first): raw_vals[i] for i in range(len(raw_vals))} return jsonify({'id': seq.id, 'name': seq.name, 'values': vals}) +chunk_size = 1024 # A constant now; may someday want it to depend on sequence + +@bp.route("/api/get_oeis_chunk//", methods=["GET"]) +def get_oeis_chunk(oeis_id, chunk_string): + valid_oeis_id = get_valid_oeis_id(oeis_id) + if isinstance(valid_oeis_id, Exception): + return f"Error: {valid_oeis_id}" + seq = fetch_values(valid_oeis_id) + if isinstance(seq, Exception): + return f"Error: {seq}" + # OK, got valid sequence, so schedule grabbing of metadata and factors: + executor.submit(fetch_metadata, valid_oeis_id) + executor.submit(fetch_factors, valid_oeis_id, timeout=1000) + # Finally, trim return sequence as requested: + raw_vals = seq.values + chunk = int(chunk_string) + first = int(seq.shift) + least = chunk*chunk_size + toobig = (chunk+1)*chunk_size + leasti = max(least - first, 0) + toobigi = min(toobig - first, len(raw_vals)) + vals = {(i+first): raw_vals[i] for i in range(leasti, toobigi)} + return jsonify({'id': seq.id, 'name': seq.name, 'values': vals}) + +# We grab and parse the text format of the OEIS. If we don't find a link, +# go ahead and fetch the values and use the info from there. +def fetch_oeis_name_etc(valid_oeis_id): + seq = find_oeis_sequence(valid_oeis_id) + if (not seq.name or seq.raw_refs is None + or seq.name == placeholder_name(valid_oeis_id)): + text = oeis_get(f"/search?q=id:{valid_oeis_id}&fmt=text", json=False) + if isinstance(text, Exception): + return text + seenLink = False + needLast = True + xrefs = '' + for line in text.split("\n"): + line = line.strip() + if not line.startswith('%'): continue + value = line[11:] # Value starts in column 11 + match line[1]: + case 'N': seq.name = value + case 'O': seq.shift = value.split(',')[0] + case 'H': + if seenLink: continue + seenLink = True + found = re.search( + r'Table of.*=\s*([-\d]+)[.][.]([-\d]+)', value) + if found: + if seq.shift: assert seq.shift == found[1] + seq.last_index = found[2] + needLast = False + case 'Y': + xrefs += value + "\n" + if xrefs: seq.raw_refs = xrefs + db.session.commit() + if needLast: + seq = fetch_values(valid_oeis_id) + return seq + @bp.route("/api/get_oeis_name_and_values/", methods=["GET"]) def get_oeis_name_and_values(oeis_id): valid_oeis_id = get_valid_oeis_id(oeis_id) @@ -442,19 +502,30 @@ def get_oeis_name_and_values(oeis_id): if isinstance(seq, Exception): return f"Error: {seq}" raw_vals = seq.values - vals = {(i + seq.shift): raw_vals[i] for i in range(len(raw_vals))} - # Now get the name - seq = find_oeis_sequence(valid_oeis_id) - if not seq.name or seq.name == placeholder_name(oeis_id): - search_response = oeis_get('/search', {'q': f'id:{oeis_id}', 'fmt': 'json'}) - if isinstance(search_response, Exception): - return f"Error: {search_response}" - if search_response['results'] != None: - seq.name = search_response['results'][0]['name'] - db.session.commit() + first = int(seq.shift) + vals = {(i + first): raw_vals[i] for i in range(len(raw_vals))} + seq = fetch_oeis_name_etc(valid_oeis_id) + if isinstance(seq, Exception): + return f"Error: {seq}" executor.submit(fetch_factors, valid_oeis_id, timeout=1000) return jsonify({'id': seq.id, 'name': seq.name, 'values': vals}) +@bp.route("/api/get_oeis_header/", methods=["GET"]) +def get_oeis_header(oeis_id): + valid_oeis_id = get_valid_oeis_id(oeis_id) + if isinstance(valid_oeis_id, Exception): + return f"Error: {valid_oeis_id}" + seq = fetch_oeis_name_etc(valid_oeis_id) + if isinstance(seq, Exception): + return f"Error: {seq}" + executor.submit(fetch_factors, valid_oeis_id, timeout=1000) + return jsonify({ + 'id': seq.id, + 'name': seq.name, + 'first': seq.shift, + 'last': seq.last_index, + 'chunk_size': chunk_size}) + @bp.route("/api/get_oeis_metadata/", methods=["GET"]) def get_oeis_metadata(oeis_id): valid_oeis_id = get_valid_oeis_id(oeis_id) @@ -486,7 +557,8 @@ def get_oeis_factors(oeis_id, num_elements): raw_fac = seq.factors if wants and wants < len(raw_fac): raw_fac = raw_fac[0:wants] - facs = {(i+seq.shift):raw_fac[i] for i in range(len(raw_fac))} + first = int(seq.shift) + facs = {(i+first):raw_fac[i] for i in range(len(raw_fac))} executor.submit(fetch_metadata, valid_oeis_id) return jsonify({ 'id': seq.id,