Skip to content

Remove couchdb env vars #1149

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions dags/shared_tasks/mapping_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,11 +151,7 @@ def validate_collection_task(
"[3433/vernacular_metadata_v1/mapped_metadata_v1/3.jsonl]"
]
"""
validate = bool(
os.environ.get("UCLDC_SOLR_URL") and
os.environ.get("UCLDC_COUCH_URL")
)
if validate:
if os.environ.get("UCLDC_SOLR_URL"):
mapped_page_batches = [json.loads(batch) for batch in mapped_page_batches]
mapped_pages = list(chain.from_iterable(mapped_page_batches))
mapped_pages = [path for path in mapped_pages if 'children' not in path]
Expand Down Expand Up @@ -266,7 +262,7 @@ def validate_endpoint_task(url, mapped_versions, params=None, **context):
print(f"please validate manually: {list(errored_collections.keys())}")
print("*" * 60)

if not len(errored_collections) == len(validations):
if len(errored_collections) == len(validations):
Copy link
Collaborator Author

@barbarahui barbarahui Nov 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@amywieliczka It seems like this was an error in logic, but I might not be understanding what exactly this is trying to achieve? Right now, if we are validating one collection, and the validation succeeds, then this causes a ValueError to be raised, and the task fails.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, yeah, that looks like an error.

print("-", file=sys.stderr)
raise ValueError("No collections successfully validated, exiting.")

Expand Down
2 changes: 0 additions & 2 deletions env.example
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ export SKIP_UNDEFINED_ENRICHMENTS=True
# export UCLDC_SOLR_URL="https://harvest-stg.cdlib.org/solr_api" # this is solr stage
export UCLDC_SOLR_URL="https://solr.calisphere.org/solr" # this is solr prod
export UCLDC_SOLR_API_KEY= # ask for a key
# export UCLDC_COUCH_URL="https://harvest-stg.cdlib.org/" # this is couch stage
export UCLDC_COUCH_URL="https://harvest-prd.cdlib.org/" # this is couch prod

# content_harvester when run locally via aws_mwaa_local_runner
# export METADATA_MOUNT=/<path on local host>/rikolti_data # required to run content harvester as docker operator in mwaa-local-runner
Expand Down
3 changes: 0 additions & 3 deletions metadata_mapper/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,3 @@

SOLR_URL = os.environ.get('UCLDC_SOLR_URL', False)
SOLR_API_KEY = os.environ.get('UCLDC_SOLR_API_KEY', False)
COUCH_URL = os.environ.get('UCLDC_COUCH_URL', False)

COUCH_TIMEOUT = int(os.environ.get('UCLDC_COUCH_TIMEOUT', 60))
19 changes: 4 additions & 15 deletions metadata_mapper/validate_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,21 +289,10 @@ def couch_db_request(collection_id: int, field_name: str) -> list[dict[str, str]

Returns: list[dict]
"""
url = f"{settings.COUCH_URL}/" \
"couchdb/ucldc/_design/all_provider_docs/" \
"_list/has_field_value/by_provider_name_wdoc" \
f"?key=\"{collection_id}\"&field={field_name}&limit=100000"

try:
response = requests.get(url, verify=False, timeout=settings.COUCH_TIMEOUT)
return json.loads(response.content)
except requests.exceptions.Timeout as e:
print(e)
print(f"Request to Couchdb has timed out after {settings.COUCH_TIMEOUT} \
seconds. Continuing without isShownAt and isShownBy values, \
which may result in increased/inaccurate validation errors.")
return []

print("Couchdb is no longer running. "
"Continuing without isShownAt and isShownBy values, "
"which may result in increased/inaccurate validation errors.")
return []

def get_couch_db_data(collection_id: int,
harvest_ids: list[str]) -> dict[str, dict[str, str]]:
Expand Down
Loading