diff --git a/src/py_semantic_taxonomy/adapters/routers/web_router.py b/src/py_semantic_taxonomy/adapters/routers/web_router.py index 03988a7..8fcbd1a 100644 --- a/src/py_semantic_taxonomy/adapters/routers/web_router.py +++ b/src/py_semantic_taxonomy/adapters/routers/web_router.py @@ -2,6 +2,7 @@ from pathlib import Path as PathLib from urllib.parse import quote, unquote, urlencode +import rfc3987 import structlog from fastapi import APIRouter, Depends, HTTPException, Path, Request from fastapi.responses import HTMLResponse, RedirectResponse @@ -22,6 +23,15 @@ router = APIRouter(prefix="/web", include_in_schema=False) +def _is_iri(query: str) -> bool: + """Check if query string is a valid HTTP/HTTPS IRI.""" + try: + parsed = rfc3987.parse(query.strip(), rule="IRI") + return parsed.get("scheme") in ("http", "https") + except ValueError: + return False + + def value_for_language(value: list[dict[str, str]], lang: str) -> str: """Get the `@value` for a list of multilingual strings with correct `@language` value""" for dct in value: @@ -189,10 +199,16 @@ async def web_concept_scheme_view( }, ) except de.ConceptSchemeNotFoundError: - raise HTTPException(status_code=404, detail=f"Concept Scheme with IRI `{iri}` not found") + raise HTTPException( + status_code=404, detail=f"Concept Scheme with IRI `{iri}` not found" + ) except de.ConceptSchemesNotInDatabase as e: - logger.error("Database error while fetching concept scheme", iri=iri, error=str(e)) - raise HTTPException(status_code=500, detail="Database error while fetching concept scheme") + logger.error( + "Database error while fetching concept scheme", iri=iri, error=str(e) + ) + raise HTTPException( + status_code=500, detail="Database error while fetching concept scheme" + ) def concept_view_url( @@ -264,7 +280,9 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str): except de.ConceptNotFoundError: return iri, iri - relationships = await service.relationships_get(iri=decoded_iri, source=True, target=True) + relationships = await service.relationships_get( + iri=decoded_iri, source=True, target=True + ) broader = [ (await get_concept_and_link(obj.target)) for obj in relationships @@ -277,7 +295,8 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str): ] scheme_list = [ - (request.url_for("web_concept_view", iri=quote(s["@id"])), s) for s in concept.schemes + (request.url_for("web_concept_view", iri=quote(s["@id"])), s) + for s in concept.schemes ] associations = await service.association_get_all(source_concept_iri=concept.id_) @@ -286,29 +305,27 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str): for target in obj.target_concepts: try: url, assoc_concept = await get_concept_and_link(target["@id"]) - formatted_associations.append( - { - "url": url, - "obj": assoc_concept, - "conditional": None, - "conversion": target.get( - "http://qudt.org/3.0.0/schema/qudt/conversionMultiplier" - ), - } - ) + formatted_associations.append({ + "url": url, + "obj": assoc_concept, + "conditional": None, + "conversion": target.get( + "http://qudt.org/3.0.0/schema/qudt/conversionMultiplier" + ), + }) except de.ConceptNotFoundError: - formatted_associations.append( - { - "url": target["@id"], - "obj": target["@id"], - "conditional": None, - "conversion": target.get( - "http://qudt.org/3.0.0/schema/qudt/conversionMultiplier" - ), - } - ) - - languages = [(request.url, Language.get(language).display_name(language).title())] + [ + formatted_associations.append({ + "url": target["@id"], + "obj": target["@id"], + "conditional": None, + "conversion": target.get( + "http://qudt.org/3.0.0/schema/qudt/conversionMultiplier" + ), + }) + + languages = [ + (request.url, Language.get(language).display_name(language).title()) + ] + [ ( concept_view_url( request, @@ -343,7 +360,9 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str): except de.ConceptNotFoundError: raise HTTPException(status_code=404, detail=f"Concept with IRI `{iri}` not found") except de.ConceptSchemesNotInDatabase as e: - logger.error("Database error while fetching concept", iri=decoded_iri, error=str(e)) + logger.error( + "Database error while fetching concept", iri=decoded_iri, error=str(e) + ) raise HTTPException(status_code=500, detail="Database error while fetching concept") @@ -357,9 +376,38 @@ async def web_search( language: str = "en", semantic: bool = True, search_service=Depends(get_search_service), + graph_service=Depends(get_graph_service), settings=Depends(get_settings), ) -> HTMLResponse: """Search for concepts.""" + # Check if query is an IRI and attempt direct lookup + if query and _is_iri(query): + # Try to get concept directly + try: + concept = await graph_service.concept_get(iri=query) + # If found, redirect to concept page + return RedirectResponse( + url=concept_view_url( + request, + concept.id_, + concept.schemes[0]["@id"], + language, + ), + status_code=303, # See Other + ) + except de.ConceptNotFoundError: + # Not a concept, try concept scheme + try: + concept_scheme = await graph_service.concept_scheme_get(iri=query) + # If found, redirect to concept scheme page + return RedirectResponse( + url=concept_scheme_view_url(request, concept_scheme.id_, language), + status_code=303, # See Other + ) + except de.ConceptSchemeNotFoundError: + # IRI not found in database, fall through to regular search + pass + try: results = [] if query: diff --git a/tests/integration/test_web_ui.py b/tests/integration/test_web_ui.py index 6ecf40d..cbbbb75 100644 --- a/tests/integration/test_web_ui.py +++ b/tests/integration/test_web_ui.py @@ -85,3 +85,107 @@ async def test_web_search_empty_query(sqlite, anonymous_client): html_content = response.text assert "Start searching" in html_content or "Search" in html_content + + +@pytest.mark.postgres +async def test_web_search_with_concept_iri_redirects_to_concept( + postgres, anonymous_client, cn_db_engine, cn +): + """Test that searching for a concept IRI redirects directly to the concept page.""" + concept_iri = cn.concept_top["@id"] + + response = await anonymous_client.get( + "/web/search/", + params={"query": concept_iri, "language": "de"}, + follow_redirects=False, + ) + + # Should redirect with 303 See Other + assert response.status_code == 303 + + # Should redirect to the concept view page with language preserved + redirect_url = response.headers["location"] + assert "/web/concept/" in redirect_url + assert quote(concept_iri) in redirect_url + assert "language=de" in redirect_url + + +@pytest.mark.postgres +async def test_web_search_with_concept_scheme_iri_redirects( + postgres, anonymous_client, cn_db_engine, cn +): + """Test that searching for a concept scheme IRI redirects to the concept scheme page.""" + scheme_iri = cn.scheme["@id"] + + response = await anonymous_client.get( + "/web/search/", + params={"query": scheme_iri, "language": "en"}, + follow_redirects=False, + ) + + # Should redirect with 303 See Other + assert response.status_code == 303 + + # Should redirect to the concept scheme view page + redirect_url = response.headers["location"] + assert "/web/concept_scheme/" in redirect_url + assert quote(scheme_iri) in redirect_url + assert "language=en" in redirect_url + + +@pytest.mark.postgres +async def test_web_search_with_nonexistent_iri_shows_search_page( + postgres, anonymous_client, cn_db_engine +): + """Test that searching for an IRI that doesn't exist falls back to search (or error if not configured).""" + nonexistent_iri = "http://example.com/nonexistent/concept/12345" + + response = await anonymous_client.get( + "/web/search/", + params={"query": nonexistent_iri, "language": "en"}, + follow_redirects=True, + ) + + # Should show search page (200) if search engine configured + # or error (503) if search engine not configured + # Important: should NOT redirect (303) since concept doesn't exist + assert response.status_code in (200, 503) + if response.status_code == 503: + assert "Search engine not available" in response.text or "503" in response.text + + +@pytest.mark.postgres +@pytest.mark.typesense +async def test_web_search_with_nonexistent_iri_falls_back_to_search( + postgres, typesense, anonymous_client, cn_db_engine, cn +): + """Test that searching for an IRI that doesn't exist falls back to text search.""" + nonexistent_iri = "http://example.com/nonexistent/concept/12345" + + response = await anonymous_client.get( + "/web/search/", + params={"query": nonexistent_iri, "language": "en"}, + follow_redirects=True, + ) + + # Should show search page (no redirect) since concept doesn't exist + assert response.status_code == 200 + # Should show the search interface (may or may not have results from text search) + assert "Search" in response.text or "search" in response.text + + +async def test_web_search_with_regular_text_not_treated_as_iri(anonymous_client): + """Test that regular search text is not treated as an IRI (no database required).""" + response = await anonymous_client.get( + "/web/search/", + params={"query": "test query", "language": "en"}, + follow_redirects=True, + ) + + # Should get error because search engine not configured (503) + # or show search page if engine is configured (200) + # The important thing is it doesn't try to treat it as an IRI and redirect + assert response.status_code in (200, 503) + if response.status_code == 503: + assert "Search engine not available" in response.text or "503" in response.text +