From c91725818a2fb43d88132f632efcb1328f7dc6bf Mon Sep 17 00:00:00 2001
From: Glen Whitney <glen@studioinfinity.org>
Date: Sun, 8 Sep 2024 16:00:38 -0700
Subject: [PATCH 1/3] feat: add get_oeis_header endpoint

---
 doc/api_endpoints.md                          |  28 +++++
 flaskr/nscope/models.py                       |   8 +-
 .../nscope/test/test_wrong_response_type.py   |   4 +-
 flaskr/nscope/views.py                        | 110 ++++++++++++++----
 4 files changed, 125 insertions(+), 25 deletions(-)
diff --git a/doc/api_endpoints.md b/doc/api_endpoints.md
index c00145e..dfbd399 100644
--- a/doc/api_endpoints.md
+++ b/doc/api_endpoints.md
@@ -76,6 +76,34 @@ OEIS text "xref" records for the sequence with id OEIS_ID.
 
 An array of strings giving all OEIS ids that mention the given OEIS_ID.
 
+### URL: `api/get_oeis_header/<OEIS_ID>`
+
+A lighter-weight endpoint that provides some summary information about the
+sequence designated by <OEIS_ID>. If you are running the server on your local
+machine, a full URL would be http://127.0.0.1:5000/api/get_oeis_header/A028444
+which will show the full name of the Busy Beaver sequence, the first and last
+indices of the sequence as avaialable through oeis.org, and the chunk size that
+will be used if values are requested in chunks.
+
+#### Key: name
+
+A string giving the official name of the OEIS sequence with id OEIS_ID.
+
+#### Key: first
+
+The index of the first entry in the sequence available for download from
+oeis.org.
+
+#### Key: last
+
+The index of the first entry in the sequence available for download from
+oeis.org.
+
+#### Key: chunk_size
+
+The maximum number of entries provided per chunk if chunked access is used
+to download entries for this sequence.
+
 ### URL: `api/get_oeis_factors/<OEIS_ID>/<COUNT>`
 
 This could take a long time. It internally does everything that the endpoint
diff --git a/flaskr/nscope/models.py b/flaskr/nscope/models.py
index fee9205..0f8e965 100644
--- a/flaskr/nscope/models.py
+++ b/flaskr/nscope/models.py
@@ -35,7 +35,13 @@ class Sequence(db.Model):
     name = db.Column(db.String, unique=False, nullable=True)
     # The following is called the "offset" in the OEIS, but that is a
     # Postgres reserved word, so we use a different name.
-    shift = db.Column(db.Integer, unique=False, nullable=False, default=0)
+    # Moreover, it is a string, not a number, because some OEIS sequences have
+    # shifts too large to fit in an integer.
+    shift = db.Column(db.String, unique=False, nullable=True)
+    # Similarly, the last index must be a string as well.
+    last_index = db.Column(db.String, unique=False, nullable=True)
+    # Should still work to keep values in an array, since there can't be so
+    # many of them, and we store the shift separately
     values = db.Column(db.ARRAY(db.String), unique=False, nullable=True)
     values_requested = db.Column(db.Boolean, nullable=False, default=False)
     raw_refs = db.Column(db.String, unique=False, nullable=True)
diff --git a/flaskr/nscope/test/test_wrong_response_type.py b/flaskr/nscope/test/test_wrong_response_type.py
index a67109c..98e6735 100644
--- a/flaskr/nscope/test/test_wrong_response_type.py
+++ b/flaskr/nscope/test/test_wrong_response_type.py
@@ -15,9 +15,9 @@ class TestUnavailableValuesInNameAndValues(abstract_mock_oeis_test.AbstractMockO
 class TestUnavailableSearchInNameAndValues(abstract_mock_oeis_test.AbstractMockOEISTest):
   search_available = False
   endpoint = 'http://localhost:5000/api/get_oeis_name_and_values/A153080'
-  expected_response = 'Error: 503 Server Error: SERVICE UNAVAILABLE for url: http://localhost:5001/search?q=id%3AA153080&fmt=json'
+  expected_response = 'Error: 404 Client Error: NOT FOUND for url: http://localhost:5001/A153080'
 
 class TestUnavailableSearch(abstract_mock_oeis_test.AbstractMockOEISTest):
   search_available = False
   endpoint = 'http://localhost:5000/api/get_oeis_metadata/A153080'
-  expected_response = 'Error: 503 Server Error: SERVICE UNAVAILABLE for url: http://localhost:5001/search?q=A153080&fmt=json'
\ No newline at end of file
+  expected_response = 'Error: 404 Client Error: NOT FOUND for url: http://localhost:5001/A153080'
diff --git a/flaskr/nscope/views.py b/flaskr/nscope/views.py
index 0bfcbd3..9e8cb35 100644
--- a/flaskr/nscope/views.py
+++ b/flaskr/nscope/views.py
@@ -118,7 +118,11 @@ def fetch_metadata(oeis_id):
     if seq.backrefs is not None:
         # We've cached all the metadata already, so we just return it
         return seq
+    # Start with the basic data on the main page:
+    seq = fetch_oeis_name_etc(oeis_id)
+    if isinstance(seq, Exception): return seq
 
+    # Now hunt for the backrefs
     our_req_time = time.time_ns()
     last_req_time = seq.meta_req_time
     if last_req_time is not None:
@@ -167,14 +171,7 @@ def fetch_metadata(oeis_id):
         saw = 0
         while (saw < ref_count):
             for result in search_response['results']:
-                if result['number'] == target_number:
-                    # Write the sequence's name and raw references as soon as we
-                    # find them
-                    if seq.raw_refs is None:
-                        seq.name = result['name']
-                        seq.raw_refs = "\n".join(result.get('xref', []))
-                        db.session.commit()
-                else:
+                if result['number'] != target_number: # Don't add self-backref
                     backrefs.append(oeis_a_id(result['number']))
                 saw += 1
             if saw < ref_count:
@@ -184,6 +181,7 @@ def fetch_metadata(oeis_id):
                     return search_response
                 if search_response['results'] == None:
                     break
+            # Do we need to break out of this loop if we fail to make progress?
         seq.backrefs = backrefs
     else:
         # We didn't find any metadata
@@ -279,10 +277,10 @@ def fetch_values(oeis_id):
         seq_vals[index] = column[1]
     if last < first:
         return IndexError(f"No terms found for ID '{oeis_id}'.")
-    seq.values = [seq_vals[i] for i in range(first,last+1)]
+    seq.values = [seq_vals[i] for i in range(first, last+1)]
     if not seq.name:
         seq.name = name or placeholder_name(oeis_id)
-    seq.shift = first
+    seq.shift = str(first)
     db.session.commit()
     return seq
 
@@ -429,10 +427,64 @@ def get_oeis_values(oeis_id, num_elements):
     wants = int(num_elements)
     if wants and wants < len(raw_vals):
         raw_vals = raw_vals[0:wants]
-    vals = {(i+seq.shift):raw_vals[i] for i in range(len(raw_vals))}
+    first = int(seq.shift)
+    vals = {(i+first):raw_vals[i] for i in range(len(raw_vals))}
 
     return jsonify({'id': seq.id, 'name': seq.name, 'values': vals})
 
+# We grab the actual page rather than the json data because the auto-generated
+# link describing the b-file is not in the json data =(
+def fetch_oeis_name_etc(valid_oeis_id):
+    seq = find_oeis_sequence(valid_oeis_id)
+    if (not seq.name or seq.raw_refs is None
+            or seq.name == placeholder_name(valid_oeis_id)):
+        seq_page = oeis_get('/' + valid_oeis_id, json=False)
+        if isinstance(seq_page, Exception):
+            return seq_page
+        body = seq_page.split('<body')[1]
+        belowform = body.split('</form')[1]
+        lines = belowform.split("\n")
+        name = ''
+        seekingName = False
+        xrefs = ''
+        ix = -1
+        limit = len(lines)
+        while (ix := ix+1) < limit:
+            current = lines[ix].strip()
+            # Name occurs a little below the OEIS ID:
+            if current == valid_oeis_id:
+                seekingName = True
+                continue
+            if name == '' and seekingName and current and current[0] != '<':
+                name = current
+                continue
+            # Otherwise, we are searching for various fields
+            if 'OFFSET' in current:
+                ix += 3
+                current = lines[ix].strip()
+                offsets = current.split('<tt>')[1]
+                seq.shift = offsets.split(',')[0]
+                continue
+            if 'LINKS' in current:
+                ix += 3
+                current = lines[ix].strip()
+                lastetc = current.split('..')[1]
+                seq.last_index = re.search(r'[-\d]*', lastetc)[0]
+                continue
+            if 'CROSSREFS' in current:
+                ix += 2
+                while (ix := ix+1) < limit:
+                    current = lines[ix].strip()
+                    if current == '<tr>': break
+                    if current == '': continue
+                    current = current.removeprefix('<div class="Seq SeqY"><tt>')
+                    current = current.removesuffix('</tt></div>')
+                    xrefs += current + "\n"
+        if name: seq.name = name
+        if xrefs: seq.raw_refs = xrefs
+        db.session.commit()
+    return seq
+
 @bp.route("/api/get_oeis_name_and_values/<oeis_id>", methods=["GET"])
 def get_oeis_name_and_values(oeis_id):
     valid_oeis_id = get_valid_oeis_id(oeis_id)
@@ -442,19 +494,32 @@ def get_oeis_name_and_values(oeis_id):
     if isinstance(seq, Exception):
         return f"Error: {seq}"
     raw_vals = seq.values
-    vals = {(i + seq.shift): raw_vals[i] for i in range(len(raw_vals))}
-    # Now get the name
-    seq = find_oeis_sequence(valid_oeis_id)
-    if not seq.name or seq.name == placeholder_name(oeis_id):
-        search_response = oeis_get('/search', {'q': f'id:{oeis_id}', 'fmt': 'json'})
-        if isinstance(search_response, Exception):
-            return f"Error: {search_response}"
-        if search_response['results'] != None:
-            seq.name = search_response['results'][0]['name']
-            db.session.commit()
+    first = int(seq.shift)
+    vals = {(i + first): raw_vals[i] for i in range(len(raw_vals))}
+    seq = fetch_oeis_name_etc(valid_oeis_id)
+    if isinstance(seq, Exception):
+        return f"Error: {seq}"
     executor.submit(fetch_factors, valid_oeis_id, timeout=1000)
     return jsonify({'id': seq.id, 'name': seq.name, 'values': vals})
 
+chunk_size = 1024 # A constant now; may someday want it to depend on sequence
+
+@bp.route("/api/get_oeis_header/<oeis_id>", methods=["GET"])
+def get_oeis_header(oeis_id):
+    valid_oeis_id = get_valid_oeis_id(oeis_id)
+    if isinstance(valid_oeis_id, Exception):
+        return f"Error: {valid_oeis_id}"
+    seq = fetch_oeis_name_etc(valid_oeis_id)
+    if isinstance(seq, Exception):
+        return f"Error: {seq}"
+    executor.submit(fetch_factors, valid_oeis_id, timeout=1000)
+    return jsonify({
+        'id': seq.id,
+        'name': seq.name,
+        'first': seq.shift,
+        'last': seq.last_index,
+        'chunk_size': chunk_size})
+
 @bp.route("/api/get_oeis_metadata/<oeis_id>", methods=["GET"])
 def get_oeis_metadata(oeis_id):
     valid_oeis_id = get_valid_oeis_id(oeis_id)
@@ -486,7 +551,8 @@ def get_oeis_factors(oeis_id, num_elements):
     raw_fac = seq.factors
     if wants and wants < len(raw_fac):
         raw_fac = raw_fac[0:wants]
-    facs = {(i+seq.shift):raw_fac[i] for i in range(len(raw_fac))}
+    first = int(seq.shift)
+    facs = {(i+first):raw_fac[i] for i in range(len(raw_fac))}
     executor.submit(fetch_metadata, valid_oeis_id)
     return jsonify({
         'id': seq.id,

From 4476f43ded80a0d022f19b76b59a3b9b06956cea Mon Sep 17 00:00:00 2001
From: Glen Whitney <glen@studioinfinity.org>
Date: Sun, 8 Sep 2024 19:02:28 -0700
Subject: [PATCH 2/3] feat: add get_oeis_chunk endpoint

---
 doc/api_endpoints.md                       |  25 ++
 flaskr/nscope/test/test_get_oeis_chunk.py  | 369 +++++++++++++++++++++
 flaskr/nscope/test/test_get_oeis_header.py |  36 ++
 flaskr/nscope/views.py                     |  28 +-
 4 files changed, 455 insertions(+), 3 deletions(-)
 create mode 100644 flaskr/nscope/test/test_get_oeis_chunk.py
 create mode 100644 flaskr/nscope/test/test_get_oeis_header.py

diff --git a/doc/api_endpoints.md b/doc/api_endpoints.md
index dfbd399..fe56965 100644
--- a/doc/api_endpoints.md
+++ b/doc/api_endpoints.md
@@ -35,6 +35,31 @@ with id OEIS_ID. Since some sequence values correspond to extremely large
 numbers, strings are used to avoid the limitations of any particular numeric
 datatype.
 
+### URL: `api/get_oeis_chunk/<OEIS_ID>/<CHUNK_NUMBER>`
+
+Similar to the `get_oeis_values` endpoint, but allows some level of random
+access. Every sequence has a "chunk size" that can be obtained via the
+`get_oeis_header` endpoint described below. This endpoint returns (just) the
+sequence values in the CHUNK_NUMBERth chunk, in other words, all of the entries
+with index no smaller than CHUNK_NUMBER×chunk_size and less than
+(CHUNK_NUMBER+1)×(chunk_size). Note that CHUNK_NUMBER is allowed to be negative.
+
+If you are running the server to test it on your local host, a full URL would
+be http://127.0.0.1:5000/api/get_oeis_chunk/A000030/1 which will return the
+first digits of the numbers 1024 through 2047 (so 976 '1's followed by 48 '2's).
+
+#### Key: name
+
+A string giving the official name of the OEIS sequence with id OEIS_ID,
+if already known to backscope, or a temporary name if not.
+
+#### Key: values
+
+An array of _strings_ (of digits) giving the first COUNT values of the sequence
+with id OEIS_ID. Since some sequence values correspond to extremely large
+numbers, strings are used to avoid the limitations of any particular numeric
+datatype.
+
 ### URL: `api/get_oeis_name_and_values/<OEIS_ID>`
 
 This one is potentially a bit slower than the above URL, as it may make
diff --git a/flaskr/nscope/test/test_get_oeis_chunk.py b/flaskr/nscope/test/test_get_oeis_chunk.py
new file mode 100644
index 0000000..b9452fd
--- /dev/null
+++ b/flaskr/nscope/test/test_get_oeis_chunk.py
@@ -0,0 +1,369 @@
+import unittest
+import flaskr.nscope.test.abstract_endpoint_test as abstract_endpoint_test
+
+a93178 = {str(i): str(1 if i%2 == 0 else i) for i in range(1024)}
+
+class TestGetOEISChunkSmallZero(abstract_endpoint_test.AbstractEndpointTest):
+  endpoint = 'http://localhost:5000/api/get_oeis_chunk/A093178/0'
+  
+  # we choose A093178 because:
+  # - it has zero shift, to make sure that is in chunk 0
+  # - it currently has small values and few references, which speeds up the
+  #   background work triggered by the request
+  # - it is one for even indices and i for odd indices, so easy to compute the
+  #   (long) expected value
+  expected_response = {
+    'id': 'A093178',
+    'name': 'A093178 [name not yet loaded]',
+    'values': a93178
+  }
+
+class TestGetOEISChunkSmallOne(abstract_endpoint_test.AbstractEndpointTest):
+  endpoint = 'http://localhost:5000/api/get_oeis_chunk/A160480/0'
+  
+  # we choose A160480 because:
+  # - it has a positive shift, so we can make sure that chunk 0 starts at
+  #   positive index
+  # - it currently has small values and few references, which speeds up the
+  #   background work triggered by the request
+  expected_response = {
+    'id': 'A160480',
+    'name': ' A160480 (b-file synthesized from sequence entry)',
+    'values': {
+      '2': '-1',
+      '3': '-11',
+      '4': '1',
+      '5': '-299',
+      '6': '36',
+      '7': '-1',
+      '8': '-15371',
+      '9': '2063',
+      '10': '-85',
+      '11': '1',
+      '12': '-1285371',
+      '13': '182474',
+      '14': '-8948',
+      '15': '166',
+      '16': '-1',
+      '17': '-159158691',
+      '18': '23364725',
+      '19': '-1265182',
+      '20': '29034',
+      '21': '-287',
+      '22': '1',
+      '23': '-27376820379',
+      '24': '4107797216',
+      '25': '-237180483',
+      '26': '6171928',
+      '27': '-77537',
+      '28': '456',
+      '29': '-1'
+    }
+  }
+
+class TestGetOEISChunkNegative(abstract_endpoint_test.AbstractEndpointTest):
+  endpoint = 'http://localhost:5000/api/get_oeis_chunk/A078302/-1'
+
+  # we choose A078302 because:
+  # - it has a negative shift, so we can make sure the values come out in
+  #   chunk -1
+  # - it currently has small values and few references, which speeds up the
+  #   background work triggered by the request
+  expected_response = {
+    'id': 'A078302',
+    'name': ' A078302 (b-file synthesized from sequence entry)',
+    'values': {
+      '-43': '5',
+      '-42': '3',
+      '-41': '9',
+      '-40': '1'
+    }
+  }
+
+class TestGetOEISChunkNegEmpty(abstract_endpoint_test.AbstractEndpointTest):
+  endpoint = 'http://localhost:5000/api/get_oeis_chunk/A078302/0'
+
+  # we choose A078302 because:
+  # - it has a negative shift, and no values with positive indices, so
+  #   we can make sure that chunk 0 is empty
+  # - it currently has small values and few references, which speeds up the
+  #   background work triggered by the request
+  expected_response = {
+    'id': 'A078302',
+    'name': ' A078302 (b-file synthesized from sequence entry)',
+    'values': {}
+  }
+
+class TestGetOEISChunkBig(abstract_endpoint_test.AbstractEndpointTest):
+  endpoint = 'http://localhost:5000/api/get_oeis_chunk/A241298/361028'
+
+  # we choose A241298 because:
+  # - it has a large positive shift, so we can make sure that the values show
+  #   up in the correct chunk
+  # - it currently has small values and few references, which speeds up the
+  #   background work triggered by the request
+  expected_response = {
+    'id': 'A241298',
+    'name': ' A241298 (b-file synthesized from sequence entry)',
+    'values': {
+      '369693100': '4',
+      '369693101': '2',
+      '369693102': '8',
+      '369693103': '1',
+      '369693104': '2',
+      '369693105': '4',
+      '369693106': '7',
+      '369693107': '7',
+      '369693108': '3',
+      '369693109': '1',
+      '369693110': '7',
+      '369693111': '5',
+      '369693112': '7',
+      '369693113': '4',
+      '369693114': '7',
+      '369693115': '0',
+      '369693116': '4',
+      '369693117': '8',
+      '369693118': '0',
+      '369693119': '3',
+      '369693120': '6',
+      '369693121': '9',
+      '369693122': '8',
+      '369693123': '7',
+      '369693124': '1',
+      '369693125': '1',
+      '369693126': '5',
+      '369693127': '9',
+      '369693128': '3',
+      '369693129': '0',
+      '369693130': '5',
+      '369693131': '6',
+      '369693132': '3',
+      '369693133': '5',
+      '369693134': '2',
+      '369693135': '1',
+      '369693136': '3',
+      '369693137': '3',
+      '369693138': '9',
+      '369693139': '0',
+      '369693140': '5',
+      '369693141': '5',
+      '369693142': '4',
+      '369693143': '8',
+      '369693144': '2',
+      '369693145': '2',
+      '369693146': '4',
+      '369693147': '1',
+      '369693148': '4',
+      '369693149': '4',
+      '369693150': '3',
+      '369693151': '5',
+      '369693152': '1',
+      '369693153': '4',
+      '369693154': '1',
+      '369693155': '7',
+      '369693156': '4',
+      '369693157': '7',
+      '369693158': '5',
+      '369693159': '3',
+      '369693160': '7',
+      '369693161': '2',
+      '369693162': '3',
+      '369693163': '0',
+      '369693164': '5',
+      '369693165': '3',
+      '369693166': '5',
+      '369693167': '2',
+      '369693168': '3',
+      '369693169': '8',
+      '369693170': '8',
+      '369693171': '7',
+      '369693172': '4',
+      '369693173': '7',
+      '369693174': '1',
+      '369693175': '7',
+      '369693176': '3',
+      '369693177': '5',
+      '369693178': '0',
+      '369693179': '4',
+      '369693180': '8',
+      '369693181': '3',
+      '369693182': '5',
+      '369693183': '3',
+      '369693184': '1',
+      '369693185': '9',
+      '369693186': '3',
+      '369693187': '6',
+      '369693188': '6',
+      '369693189': '5',
+      '369693190': '2',
+      '369693191': '9',
+      '369693192': '9',
+      '369693193': '4',
+      '369693194': '3',
+      '369693195': '2',
+      '369693196': '0',
+      '369693197': '3',
+      '369693198': '3',
+      '369693199': '3',
+      '369693200': '7',
+      '369693201': '5',
+      '369693202': '0',
+      '369693203': '6',
+      '369693204': '0'
+    }
+  }
+
+class TestGetOEISChunkBigWrong(abstract_endpoint_test.AbstractEndpointTest):
+  endpoint = 'http://localhost:5000/api/get_oeis_chunk/A241298/17'
+
+  # we choose A241298 because:
+  # - it has a large positive shift, so we can make sure that the values show
+  #   up in the correct chunk (and nowhere else)
+  # - it currently has small values and few references, which speeds up the
+  #   background work triggered by the request
+  expected_response = {
+    'id': 'A241298',
+    'name': ' A241298 (b-file synthesized from sequence entry)',
+    'values': {}
+  }
+
+class TestGetOEISChunkHugeFirst(abstract_endpoint_test.AbstractEndpointTest):
+  endpoint = 'http://localhost:5000/api/get_oeis_chunk/A241292/3553061171'
+
+  # we choose A241292 because:
+  # - it has a positive shift larger than JavaScript's MAX_SAFE_INTEGER, so
+  #   we can make sure that the values still show up in the correct chunk
+  # - its values are by fortune split across two chunks
+  # - it currently has small values and few references, which speeds up the
+  #   background work triggered by the request
+  expected_response = {
+    'id': 'A241292',
+    'name': ' A241292 (b-file synthesized from sequence entry)',
+    'values': {
+      '3638334640025': '1',
+      '3638334640026': '2',
+      '3638334640027': '5',
+      '3638334640028': '8',
+      '3638334640029': '0',
+      '3638334640030': '1',
+      '3638334640031': '4',
+      '3638334640032': '2',
+      '3638334640033': '9',
+      '3638334640034': '0',
+      '3638334640035': '6',
+      '3638334640036': '2',
+      '3638334640037': '7',
+      '3638334640038': '4',
+      '3638334640039': '9',
+      '3638334640040': '1',
+      '3638334640041': '3',
+      '3638334640042': '1',
+      '3638334640043': '7',
+      '3638334640044': '8',
+      '3638334640045': '6',
+      '3638334640046': '0',
+      '3638334640047': '3',
+      '3638334640048': '9',
+      '3638334640049': '0',
+      '3638334640050': '6',
+      '3638334640051': '9',
+      '3638334640052': '8',
+      '3638334640053': '2',
+      '3638334640054': '0',
+      '3638334640055': '3',
+      '3638334640056': '2',
+      '3638334640057': '8',
+      '3638334640058': '1',
+      '3638334640059': '2',
+      '3638334640060': '1',
+      '3638334640061': '5',
+      '3638334640062': '5',
+      '3638334640063': '1',
+      '3638334640064': '8',
+      '3638334640065': '0',
+      '3638334640066': '4',
+      '3638334640067': '6',
+      '3638334640068': '7',
+      '3638334640069': '1',
+      '3638334640070': '4',
+      '3638334640071': '3',
+      '3638334640072': '1',
+      '3638334640073': '6',
+      '3638334640074': '5',
+      '3638334640075': '9',
+      '3638334640076': '6',
+      '3638334640077': '0',
+      '3638334640078': '1',
+      '3638334640079': '5',
+      '3638334640080': '1',
+      '3638334640081': '8',
+      '3638334640082': '9',
+      '3638334640083': '6',
+      '3638334640084': '7',
+      '3638334640085': '4',
+      '3638334640086': '9',
+      '3638334640087': '4',
+      '3638334640088': '4',
+      '3638334640089': '3',
+      '3638334640090': '8',
+      '3638334640091': '1',
+      '3638334640092': '2',
+      '3638334640093': '1',
+      '3638334640094': '1',
+      '3638334640095': '0',
+      '3638334640096': '1',
+      '3638334640097': '1',
+      '3638334640098': '3',
+      '3638334640099': '0',
+      '3638334640100': '0',
+      '3638334640101': '0',
+      '3638334640102': '1',
+      '3638334640103': '7',
+      '3638334640104': '7',
+      '3638334640105': '8',
+      '3638334640106': '5',
+      '3638334640107': '3',
+      '3638334640108': '1',
+      '3638334640109': '0',
+      '3638334640110': '8',
+      '3638334640111': '0',
+      '3638334640112': '3',
+      '3638334640113': '9',
+      '3638334640114': '0',
+      '3638334640115': '3',
+      '3638334640116': '2',
+      '3638334640117': '9',
+      '3638334640118': '6',
+      '3638334640119': '2',
+      '3638334640120': '4',
+      '3638334640121': '0',
+      '3638334640122': '1',
+      '3638334640123': '1',
+      '3638334640124': '5',
+      '3638334640125': '6',
+      '3638334640126': '9',
+      '3638334640127': '5'
+    }
+  }
+
+class TestGetOEISChunkHugeRest(abstract_endpoint_test.AbstractEndpointTest):
+  endpoint = 'http://localhost:5000/api/get_oeis_chunk/A241292/3553061172'
+
+  # we choose A241292 because:
+  # - it has a positive shift larger than JavaScript's MAX_SAFE_INTEGER, so
+  #   we can make sure that the values still show up in the correct chunk
+  # - its values are by fortune split across two chunks
+  # - it currently has small values and few references, which speeds up the
+  #   background work triggered by the request
+  expected_response = {
+    'id': 'A241292',
+    'name': ' A241292 (b-file synthesized from sequence entry)',
+    'values': {  
+      '3638334640128': '8',
+      '3638334640129': '5'
+    }
+  }
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/flaskr/nscope/test/test_get_oeis_header.py b/flaskr/nscope/test/test_get_oeis_header.py
new file mode 100644
index 0000000..ab4e5ef
--- /dev/null
+++ b/flaskr/nscope/test/test_get_oeis_header.py
@@ -0,0 +1,36 @@
+import unittest
+import flaskr.nscope.test.abstract_endpoint_test as abstract_endpoint_test
+
+
+class TestGetOEISHeadersPositive(abstract_endpoint_test.AbstractEndpointTest):
+  endpoint = 'http://localhost:5000/api/get_oeis_header/A003173'
+  
+  # we choose A003173 because:
+  # - it has positive shift
+  # - it has small values and few references, which speeds up the
+  #   background work triggered by the request
+  expected_response = {
+    'id': 'A003173',
+    'name': 'Heegner numbers: imaginary quadratic fields with unique factorization (or class number 1).',
+    'first': '1',
+    'last': '9',
+    'chunk_size': 1024
+  }
+
+class TestGetOEISHeadersNegative(abstract_endpoint_test.AbstractEndpointTest):
+  endpoint = 'http://localhost:5000/api/get_oeis_header/A000521'
+  
+  # we choose A000521 because:
+  # - it has a negative shift
+  # - it has many long values, so a full data request would be very large
+  expected_response = {
+    'id': 'A000521',
+    'name': 'Coefficients of modular function j as power series in q = e^(2 Pi i t). Another name is the elliptic modular invariant J(tau).',
+    'first': '-1',
+    'last': '10000',
+    'chunk_size': 1024
+  }
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/flaskr/nscope/views.py b/flaskr/nscope/views.py
index 9e8cb35..8b2e5cf 100644
--- a/flaskr/nscope/views.py
+++ b/flaskr/nscope/views.py
@@ -428,10 +428,34 @@ def get_oeis_values(oeis_id, num_elements):
     if wants and wants < len(raw_vals):
         raw_vals = raw_vals[0:wants]
     first = int(seq.shift)
-    vals = {(i+first):raw_vals[i] for i in range(len(raw_vals))}
+    vals = {(i+first): raw_vals[i] for i in range(len(raw_vals))}
 
     return jsonify({'id': seq.id, 'name': seq.name, 'values': vals})
 
+chunk_size = 1024 # A constant now; may someday want it to depend on sequence
+
+@bp.route("/api/get_oeis_chunk/<oeis_id>/<chunk_string>", methods=["GET"])
+def get_oeis_chunk(oeis_id, chunk_string):
+    valid_oeis_id = get_valid_oeis_id(oeis_id)
+    if isinstance(valid_oeis_id, Exception):
+        return f"Error: {valid_oeis_id}"
+    seq = fetch_values(valid_oeis_id)
+    if isinstance(seq, Exception):
+        return f"Error: {seq}"
+    # OK, got valid sequence, so schedule grabbing of metadata and factors:
+    executor.submit(fetch_metadata, valid_oeis_id)
+    executor.submit(fetch_factors, valid_oeis_id, timeout=1000)
+    # Finally, trim return sequence as requested:
+    raw_vals = seq.values
+    chunk = int(chunk_string)
+    first = int(seq.shift)
+    least = chunk*chunk_size
+    toobig = (chunk+1)*chunk_size
+    leasti = max(least - first, 0)
+    toobigi = min(toobig - first, len(raw_vals))
+    vals = {(i+first): raw_vals[i] for i in range(leasti, toobigi)}
+    return jsonify({'id': seq.id, 'name': seq.name, 'values': vals})
+
 # We grab the actual page rather than the json data because the auto-generated
 # link describing the b-file is not in the json data =(
 def fetch_oeis_name_etc(valid_oeis_id):
@@ -502,8 +526,6 @@ def get_oeis_name_and_values(oeis_id):
     executor.submit(fetch_factors, valid_oeis_id, timeout=1000)
     return jsonify({'id': seq.id, 'name': seq.name, 'values': vals})
 
-chunk_size = 1024 # A constant now; may someday want it to depend on sequence
-
 @bp.route("/api/get_oeis_header/<oeis_id>", methods=["GET"])
 def get_oeis_header(oeis_id):
     valid_oeis_id = get_valid_oeis_id(oeis_id)

From 7aec22bb64926876f8c89d9836ed024d06af53b6 Mon Sep 17 00:00:00 2001
From: Glen Whitney <glen@studioinfinity.org>
Date: Wed, 18 Sep 2024 16:53:40 -0700
Subject: [PATCH 3/3] refactor: Switch to parsing OEIS text format for header
 information

  This will be more useful when we change over to having a clone of the
  git repository version of the OEIS, anyway.
  Sometimes there is no link describing the bounds of the sequence. We believe
  such cases only occur for sequences short enough that the only known terms
  are the ones in the sequence entry itself. I.e., these are very short
  sequences. Hence in such cases we fall back to just fetching all of the
  values.
---
 .../nscope/test/test_wrong_response_type.py   |  4 +-
 flaskr/nscope/views.py                        | 72 ++++++++-----------
 2 files changed, 30 insertions(+), 46 deletions(-)

diff --git a/flaskr/nscope/test/test_wrong_response_type.py b/flaskr/nscope/test/test_wrong_response_type.py
index 98e6735..f247cc3 100644
--- a/flaskr/nscope/test/test_wrong_response_type.py
+++ b/flaskr/nscope/test/test_wrong_response_type.py
@@ -15,9 +15,9 @@ class TestUnavailableValuesInNameAndValues(abstract_mock_oeis_test.AbstractMockO
 class TestUnavailableSearchInNameAndValues(abstract_mock_oeis_test.AbstractMockOEISTest):
   search_available = False
   endpoint = 'http://localhost:5000/api/get_oeis_name_and_values/A153080'
-  expected_response = 'Error: 404 Client Error: NOT FOUND for url: http://localhost:5001/A153080'
+  expected_response = 'Error: 400 Client Error: BAD REQUEST for url: http://localhost:5001/search?q=id:A153080&fmt=text'
 
 class TestUnavailableSearch(abstract_mock_oeis_test.AbstractMockOEISTest):
   search_available = False
   endpoint = 'http://localhost:5000/api/get_oeis_metadata/A153080'
-  expected_response = 'Error: 404 Client Error: NOT FOUND for url: http://localhost:5001/A153080'
+  expected_response = 'Error: 400 Client Error: BAD REQUEST for url: http://localhost:5001/search?q=id:A153080&fmt=text'
diff --git a/flaskr/nscope/views.py b/flaskr/nscope/views.py
index 8b2e5cf..c301f3f 100644
--- a/flaskr/nscope/views.py
+++ b/flaskr/nscope/views.py
@@ -281,6 +281,7 @@ def fetch_values(oeis_id):
     if not seq.name:
         seq.name = name or placeholder_name(oeis_id)
     seq.shift = str(first)
+    seq.last_index = str(last)
     db.session.commit()
     return seq
 
@@ -456,57 +457,40 @@ def get_oeis_chunk(oeis_id, chunk_string):
     vals = {(i+first): raw_vals[i] for i in range(leasti, toobigi)}
     return jsonify({'id': seq.id, 'name': seq.name, 'values': vals})
 
-# We grab the actual page rather than the json data because the auto-generated
-# link describing the b-file is not in the json data =(
+# We grab and parse the text format of the OEIS. If we don't find a link,
+# go ahead and fetch the values and use the info from there.
 def fetch_oeis_name_etc(valid_oeis_id):
     seq = find_oeis_sequence(valid_oeis_id)
     if (not seq.name or seq.raw_refs is None
             or seq.name == placeholder_name(valid_oeis_id)):
-        seq_page = oeis_get('/' + valid_oeis_id, json=False)
-        if isinstance(seq_page, Exception):
-            return seq_page
-        body = seq_page.split('<body')[1]
-        belowform = body.split('</form')[1]
-        lines = belowform.split("\n")
-        name = ''
-        seekingName = False
+        text = oeis_get(f"/search?q=id:{valid_oeis_id}&fmt=text", json=False)
+        if isinstance(text, Exception):
+            return text
+        seenLink = False
+        needLast = True
         xrefs = ''
-        ix = -1
-        limit = len(lines)
-        while (ix := ix+1) < limit:
-            current = lines[ix].strip()
-            # Name occurs a little below the OEIS ID:
-            if current == valid_oeis_id:
-                seekingName = True
-                continue
-            if name == '' and seekingName and current and current[0] != '<':
-                name = current
-                continue
-            # Otherwise, we are searching for various fields
-            if 'OFFSET' in current:
-                ix += 3
-                current = lines[ix].strip()
-                offsets = current.split('<tt>')[1]
-                seq.shift = offsets.split(',')[0]
-                continue
-            if 'LINKS' in current:
-                ix += 3
-                current = lines[ix].strip()
-                lastetc = current.split('..')[1]
-                seq.last_index = re.search(r'[-\d]*', lastetc)[0]
-                continue
-            if 'CROSSREFS' in current:
-                ix += 2
-                while (ix := ix+1) < limit:
-                    current = lines[ix].strip()
-                    if current == '<tr>': break
-                    if current == '': continue
-                    current = current.removeprefix('<div class="Seq SeqY"><tt>')
-                    current = current.removesuffix('</tt></div>')
-                    xrefs += current + "\n"
-        if name: seq.name = name
+        for line in text.split("\n"):
+            line = line.strip()
+            if not line.startswith('%'): continue
+            value = line[11:] # Value starts in column 11
+            match line[1]:
+                case 'N': seq.name = value
+                case 'O': seq.shift = value.split(',')[0]
+                case 'H':
+                    if seenLink: continue
+                    seenLink = True
+                    found = re.search(
+                        r'Table of.*=\s*([-\d]+)[.][.]([-\d]+)', value)
+                    if found:
+                        if seq.shift: assert seq.shift == found[1]
+                        seq.last_index = found[2]
+                        needLast = False
+                case 'Y':
+                    xrefs += value + "\n"
         if xrefs: seq.raw_refs = xrefs
         db.session.commit()
+        if needLast:
+            seq = fetch_values(valid_oeis_id)
     return seq
 
 @bp.route("/api/get_oeis_name_and_values/<oeis_id>", methods=["GET"])