From 6ff986027bdda8258ccaa1b28874e73b35c5113b Mon Sep 17 00:00:00 2001 From: Valentin Starlinger Date: Mon, 13 Oct 2025 12:18:08 +0200 Subject: [PATCH 1/4] Fix ModuleNotFoundError for url_utils in PyPI distribution --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 2292ba7..bfc6066 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,6 +87,10 @@ dev = [ include-package-data = true package-dir = {"" = "src"} +[tool.setuptools.packages.find] +where = ["src"] +include = ["py_semantic_taxonomy*"] + [tool.setuptools.dynamic] version = {attr = "py_semantic_taxonomy.__version__"} From c402645a92794f99f137c9418555d01d61b56488 Mon Sep 17 00:00:00 2001 From: Valentin Starlinger Date: Mon, 13 Oct 2025 16:31:44 +0200 Subject: [PATCH 2/4] Fixed search not working respecting languages --- .../adapters/routers/templates/search.html | 4 +- tests/integration/test_web_ui.py | 87 +++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 tests/integration/test_web_ui.py diff --git a/src/py_semantic_taxonomy/adapters/routers/templates/search.html b/src/py_semantic_taxonomy/adapters/routers/templates/search.html index a27059d..c5b69e2 100644 --- a/src/py_semantic_taxonomy/adapters/routers/templates/search.html +++ b/src/py_semantic_taxonomy/adapters/routers/templates/search.html @@ -25,7 +25,7 @@

Search Results

diff --git a/tests/integration/test_web_ui.py b/tests/integration/test_web_ui.py new file mode 100644 index 0000000..6ecf40d --- /dev/null +++ b/tests/integration/test_web_ui.py @@ -0,0 +1,87 @@ +"""Integration tests for Web UI functionality.""" +import pytest +from urllib.parse import quote + + +@pytest.mark.typesense +async def test_web_search_preserves_language_in_results( + sqlite, typesense, anonymous_client, cn +): + """Test that search results include language parameter in concept links.""" + # Perform a search in German + response = await anonymous_client.get( + "/web/search/", params={"query": "Esel", "language": "de"} + ) + assert response.status_code == 200 + + # Check that the HTML contains links with language parameter + html_content = response.text + + # Search results should link to concepts with language parameter + # The link format should be: /web/concept/{iri}?language=de + concept_iri = cn.concept_2023_low["@id"] + expected_link_pattern = f"/web/concept/{quote(concept_iri)}?language=de" + + assert expected_link_pattern in html_content, ( + f"Expected to find link '{expected_link_pattern}' in search results, " + f"but it was not present. This means clicking on a search result " + f"will not preserve the language setting." + ) + + +@pytest.mark.typesense +async def test_web_search_multiple_languages(sqlite, typesense, anonymous_client, cn): + """Test that different languages produce correctly parameterized links.""" + languages = ["en", "de"] + + for lang in languages: + response = await anonymous_client.get( + "/web/search/", params={"query": "test", "language": lang} + ) + assert response.status_code == 200 + + # Verify the hidden language input has the correct value + html_content = response.text + assert f' Date: Tue, 14 Oct 2025 09:54:55 +0200 Subject: [PATCH 3/4] IRI in search doesn't use search but goes directly to concept --- .../adapters/routers/web_router.py | 155 +++++++++++++----- tests/integration/test_web_ui.py | 103 ++++++++++++ 2 files changed, 218 insertions(+), 40 deletions(-) diff --git a/src/py_semantic_taxonomy/adapters/routers/web_router.py b/src/py_semantic_taxonomy/adapters/routers/web_router.py index 03988a7..d596ecc 100644 --- a/src/py_semantic_taxonomy/adapters/routers/web_router.py +++ b/src/py_semantic_taxonomy/adapters/routers/web_router.py @@ -2,6 +2,7 @@ from pathlib import Path as PathLib from urllib.parse import quote, unquote, urlencode +import rfc3987 import structlog from fastapi import APIRouter, Depends, HTTPException, Path, Request from fastapi.responses import HTMLResponse, RedirectResponse @@ -22,6 +23,15 @@ router = APIRouter(prefix="/web", include_in_schema=False) +def _is_iri(query: str) -> bool: + """Check if query string is a valid HTTP/HTTPS IRI.""" + try: + parsed = rfc3987.parse(query.strip(), rule="IRI") + return parsed.get("scheme") in ("http", "https") + except ValueError: + return False + + def value_for_language(value: list[dict[str, str]], lang: str) -> str: """Get the `@value` for a list of multilingual strings with correct `@language` value""" for dct in value: @@ -90,7 +100,9 @@ async def redirect_blank_web_page( return RedirectResponse(request.url_for("web_concept_schemes")) -def concept_scheme_view_url(request: Request, concept_scheme_iri: str, language: str) -> str: +def concept_scheme_view_url( + request: Request, concept_scheme_iri: str, language: str +) -> str: params = {"language": language} return ( str(request.url_for("web_concept_scheme_view", iri=quote(concept_scheme_iri))) @@ -121,8 +133,13 @@ async def web_concept_schemes( for scheme in concept_schemes: scheme.url = concept_scheme_view_url(request, scheme.id_, language) - languages = [(request.url, Language.get(language).display_name(language).title())] + [ - (str(request.url_for("web_concept_schemes")) + "?language=" + quote(code), label) + languages = [ + (request.url, Language.get(language).display_name(language).title()) + ] + [ + ( + str(request.url_for("web_concept_schemes")) + "?language=" + quote(code), + label, + ) for code, label in format_languages(settings.languages) if code != language ] @@ -164,9 +181,13 @@ async def web_concept_scheme_view( concept_scheme_iri=decoded_iri, top_concepts_only=True ) for concept in concepts: - concept.url = concept_view_url(request, concept.id_, concept_scheme.id_, language) + concept.url = concept_view_url( + request, concept.id_, concept_scheme.id_, language + ) - languages = [(request.url, Language.get(language).display_name(language).title())] + [ + languages = [ + (request.url, Language.get(language).display_name(language).title()) + ] + [ ( str(request.url_for("web_concept_scheme_view", iri=iri)) + "?language=" @@ -189,10 +210,16 @@ async def web_concept_scheme_view( }, ) except de.ConceptSchemeNotFoundError: - raise HTTPException(status_code=404, detail=f"Concept Scheme with IRI `{iri}` not found") + raise HTTPException( + status_code=404, detail=f"Concept Scheme with IRI `{iri}` not found" + ) except de.ConceptSchemesNotInDatabase as e: - logger.error("Database error while fetching concept scheme", iri=iri, error=str(e)) - raise HTTPException(status_code=500, detail="Database error while fetching concept scheme") + logger.error( + "Database error while fetching concept scheme", iri=iri, error=str(e) + ) + raise HTTPException( + status_code=500, detail="Database error while fetching concept scheme" + ) def concept_view_url( @@ -200,7 +227,9 @@ def concept_view_url( ) -> str: params = {"concept_scheme": concept_scheme_iri, "language": language} return ( - str(request.url_for("web_concept_view", iri=quote(concept_iri))) + "?" + urlencode(params) + str(request.url_for("web_concept_view", iri=quote(concept_iri))) + + "?" + + urlencode(params) ) @@ -233,7 +262,10 @@ async def web_concept_view( if not language: return RedirectResponse( concept_view_url( - request, concept.id_, concept.schemes[0]["@id"], settings.languages[0] + request, + concept.id_, + concept.schemes[0]["@id"], + settings.languages[0], ) ) concept = concept.filter_language(language) @@ -245,7 +277,10 @@ async def web_concept_view( concept_iri=concept.id_, concept_scheme_iri=scheme.id_ ) )[::-1] - hierarchy = [(concept_view_url(request, c.id_, scheme.id_, language), c) for c in hierarchy] + hierarchy = [ + (concept_view_url(request, c.id_, scheme.id_, language), c) + for c in hierarchy + ] async def get_concept_and_link(iri: str) -> (str, de.Concept | str): try: @@ -264,7 +299,9 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str): except de.ConceptNotFoundError: return iri, iri - relationships = await service.relationships_get(iri=decoded_iri, source=True, target=True) + relationships = await service.relationships_get( + iri=decoded_iri, source=True, target=True + ) broader = [ (await get_concept_and_link(obj.target)) for obj in relationships @@ -277,7 +314,8 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str): ] scheme_list = [ - (request.url_for("web_concept_view", iri=quote(s["@id"])), s) for s in concept.schemes + (request.url_for("web_concept_view", iri=quote(s["@id"])), s) + for s in concept.schemes ] associations = await service.association_get_all(source_concept_iri=concept.id_) @@ -286,29 +324,27 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str): for target in obj.target_concepts: try: url, assoc_concept = await get_concept_and_link(target["@id"]) - formatted_associations.append( - { - "url": url, - "obj": assoc_concept, - "conditional": None, - "conversion": target.get( - "http://qudt.org/3.0.0/schema/qudt/conversionMultiplier" - ), - } - ) + formatted_associations.append({ + "url": url, + "obj": assoc_concept, + "conditional": None, + "conversion": target.get( + "http://qudt.org/3.0.0/schema/qudt/conversionMultiplier" + ), + }) except de.ConceptNotFoundError: - formatted_associations.append( - { - "url": target["@id"], - "obj": target["@id"], - "conditional": None, - "conversion": target.get( - "http://qudt.org/3.0.0/schema/qudt/conversionMultiplier" - ), - } - ) - - languages = [(request.url, Language.get(language).display_name(language).title())] + [ + formatted_associations.append({ + "url": target["@id"], + "obj": target["@id"], + "conditional": None, + "conversion": target.get( + "http://qudt.org/3.0.0/schema/qudt/conversionMultiplier" + ), + }) + + languages = [ + (request.url, Language.get(language).display_name(language).title()) + ] + [ ( concept_view_url( request, @@ -341,10 +377,16 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str): }, ) except de.ConceptNotFoundError: - raise HTTPException(status_code=404, detail=f"Concept with IRI `{iri}` not found") + raise HTTPException( + status_code=404, detail=f"Concept with IRI `{iri}` not found" + ) except de.ConceptSchemesNotInDatabase as e: - logger.error("Database error while fetching concept", iri=decoded_iri, error=str(e)) - raise HTTPException(status_code=500, detail="Database error while fetching concept") + logger.error( + "Database error while fetching concept", iri=decoded_iri, error=str(e) + ) + raise HTTPException( + status_code=500, detail="Database error while fetching concept" + ) @router.get( @@ -357,15 +399,48 @@ async def web_search( language: str = "en", semantic: bool = True, search_service=Depends(get_search_service), + graph_service=Depends(get_graph_service), settings=Depends(get_settings), ) -> HTMLResponse: """Search for concepts.""" + # Check if query is an IRI and attempt direct lookup + if query and _is_iri(query): + # Try to get concept directly + try: + concept = await graph_service.concept_get(iri=query) + # If found, redirect to concept page + return RedirectResponse( + url=concept_view_url( + request, + concept.id_, + concept.schemes[0]["@id"], + language, + ), + status_code=303, # See Other + ) + except de.ConceptNotFoundError: + # Not a concept, try concept scheme + try: + concept_scheme = await graph_service.concept_scheme_get(iri=query) + # If found, redirect to concept scheme page + return RedirectResponse( + url=concept_scheme_view_url(request, concept_scheme.id_, language), + status_code=303, # See Other + ) + except de.ConceptSchemeNotFoundError: + # IRI not found in database, fall through to regular search + pass + try: results = [] if query: - results = await search_service.search(query=query, language=language, semantic=semantic) + results = await search_service.search( + query=query, language=language, semantic=semantic + ) - languages = [(request.url, Language.get(language).display_name(language).title())] + [ + languages = [ + (request.url, Language.get(language).display_name(language).title()) + ] + [ ( str(request.url_for("web_search")) + "?" diff --git a/tests/integration/test_web_ui.py b/tests/integration/test_web_ui.py index 6ecf40d..f06067d 100644 --- a/tests/integration/test_web_ui.py +++ b/tests/integration/test_web_ui.py @@ -85,3 +85,106 @@ async def test_web_search_empty_query(sqlite, anonymous_client): html_content = response.text assert "Start searching" in html_content or "Search" in html_content + + +@pytest.mark.postgres +async def test_web_search_with_concept_iri_redirects_to_concept( + postgres, anonymous_client, cn_db_engine, cn +): + """Test that searching for a concept IRI redirects directly to the concept page.""" + concept_iri = cn.concept_top["@id"] + + response = await anonymous_client.get( + "/web/search/", + params={"query": concept_iri, "language": "de"}, + follow_redirects=False, + ) + + # Should redirect with 303 See Other + assert response.status_code == 303 + + # Should redirect to the concept view page with language preserved + redirect_url = response.headers["location"] + assert "/web/concept/" in redirect_url + assert quote(concept_iri) in redirect_url + assert "language=de" in redirect_url + + +@pytest.mark.postgres +async def test_web_search_with_concept_scheme_iri_redirects( + postgres, anonymous_client, cn_db_engine, cn +): + """Test that searching for a concept scheme IRI redirects to the concept scheme page.""" + scheme_iri = cn.scheme["@id"] + + response = await anonymous_client.get( + "/web/search/", + params={"query": scheme_iri, "language": "en"}, + follow_redirects=False, + ) + + # Should redirect with 303 See Other + assert response.status_code == 303 + + # Should redirect to the concept scheme view page + redirect_url = response.headers["location"] + assert "/web/concept_scheme/" in redirect_url + assert quote(scheme_iri) in redirect_url + assert "language=en" in redirect_url + + +@pytest.mark.postgres +async def test_web_search_with_nonexistent_iri_shows_search_page( + postgres, anonymous_client, cn_db_engine +): + """Test that searching for an IRI that doesn't exist falls back to search (or error if not configured).""" + nonexistent_iri = "http://example.com/nonexistent/concept/12345" + + response = await anonymous_client.get( + "/web/search/", + params={"query": nonexistent_iri, "language": "en"}, + follow_redirects=True, + ) + + # Should show search page (200) if search engine configured + # or error (503) if search engine not configured + # Important: should NOT redirect (303) since concept doesn't exist + assert response.status_code in (200, 503) + if response.status_code == 503: + assert "Search engine not available" in response.text or "503" in response.text + + +@pytest.mark.postgres +@pytest.mark.typesense +async def test_web_search_with_nonexistent_iri_falls_back_to_search( + postgres, typesense, anonymous_client, cn_db_engine, cn +): + """Test that searching for an IRI that doesn't exist falls back to text search.""" + nonexistent_iri = "http://example.com/nonexistent/concept/12345" + + response = await anonymous_client.get( + "/web/search/", + params={"query": nonexistent_iri, "language": "en"}, + follow_redirects=True, + ) + + # Should show search page (no redirect) since concept doesn't exist + assert response.status_code == 200 + # Should show the search interface (may or may not have results from text search) + assert "Search" in response.text or "search" in response.text + + +async def test_web_search_with_regular_text_not_treated_as_iri(anonymous_client): + """Test that regular search text is not treated as an IRI (no database required).""" + response = await anonymous_client.get( + "/web/search/", + params={"query": "test query", "language": "en"}, + follow_redirects=True, + ) + + # Should get error because search engine not configured (503) + # or show search page if engine is configured (200) + # The important thing is it doesn't try to treat it as an IRI and redirect + assert response.status_code in (200, 503) + if response.status_code == 503: + assert "Search engine not available" in response.text or "503" in response.text From 396a1d38ceec4acf2c5ce9cd40c56e4a0b3b1769 Mon Sep 17 00:00:00 2001 From: Valentin Starlinger Date: Tue, 14 Oct 2025 10:22:31 +0200 Subject: [PATCH 4/4] formatting --- .../adapters/routers/web_router.py | 51 +++++-------------- 1 file changed, 12 insertions(+), 39 deletions(-) diff --git a/src/py_semantic_taxonomy/adapters/routers/web_router.py b/src/py_semantic_taxonomy/adapters/routers/web_router.py index d596ecc..8fcbd1a 100644 --- a/src/py_semantic_taxonomy/adapters/routers/web_router.py +++ b/src/py_semantic_taxonomy/adapters/routers/web_router.py @@ -100,9 +100,7 @@ async def redirect_blank_web_page( return RedirectResponse(request.url_for("web_concept_schemes")) -def concept_scheme_view_url( - request: Request, concept_scheme_iri: str, language: str -) -> str: +def concept_scheme_view_url(request: Request, concept_scheme_iri: str, language: str) -> str: params = {"language": language} return ( str(request.url_for("web_concept_scheme_view", iri=quote(concept_scheme_iri))) @@ -133,13 +131,8 @@ async def web_concept_schemes( for scheme in concept_schemes: scheme.url = concept_scheme_view_url(request, scheme.id_, language) - languages = [ - (request.url, Language.get(language).display_name(language).title()) - ] + [ - ( - str(request.url_for("web_concept_schemes")) + "?language=" + quote(code), - label, - ) + languages = [(request.url, Language.get(language).display_name(language).title())] + [ + (str(request.url_for("web_concept_schemes")) + "?language=" + quote(code), label) for code, label in format_languages(settings.languages) if code != language ] @@ -181,13 +174,9 @@ async def web_concept_scheme_view( concept_scheme_iri=decoded_iri, top_concepts_only=True ) for concept in concepts: - concept.url = concept_view_url( - request, concept.id_, concept_scheme.id_, language - ) + concept.url = concept_view_url(request, concept.id_, concept_scheme.id_, language) - languages = [ - (request.url, Language.get(language).display_name(language).title()) - ] + [ + languages = [(request.url, Language.get(language).display_name(language).title())] + [ ( str(request.url_for("web_concept_scheme_view", iri=iri)) + "?language=" @@ -227,9 +216,7 @@ def concept_view_url( ) -> str: params = {"concept_scheme": concept_scheme_iri, "language": language} return ( - str(request.url_for("web_concept_view", iri=quote(concept_iri))) - + "?" - + urlencode(params) + str(request.url_for("web_concept_view", iri=quote(concept_iri))) + "?" + urlencode(params) ) @@ -262,10 +249,7 @@ async def web_concept_view( if not language: return RedirectResponse( concept_view_url( - request, - concept.id_, - concept.schemes[0]["@id"], - settings.languages[0], + request, concept.id_, concept.schemes[0]["@id"], settings.languages[0] ) ) concept = concept.filter_language(language) @@ -277,10 +261,7 @@ async def web_concept_view( concept_iri=concept.id_, concept_scheme_iri=scheme.id_ ) )[::-1] - hierarchy = [ - (concept_view_url(request, c.id_, scheme.id_, language), c) - for c in hierarchy - ] + hierarchy = [(concept_view_url(request, c.id_, scheme.id_, language), c) for c in hierarchy] async def get_concept_and_link(iri: str) -> (str, de.Concept | str): try: @@ -377,16 +358,12 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str): }, ) except de.ConceptNotFoundError: - raise HTTPException( - status_code=404, detail=f"Concept with IRI `{iri}` not found" - ) + raise HTTPException(status_code=404, detail=f"Concept with IRI `{iri}` not found") except de.ConceptSchemesNotInDatabase as e: logger.error( "Database error while fetching concept", iri=decoded_iri, error=str(e) ) - raise HTTPException( - status_code=500, detail="Database error while fetching concept" - ) + raise HTTPException(status_code=500, detail="Database error while fetching concept") @router.get( @@ -434,13 +411,9 @@ async def web_search( try: results = [] if query: - results = await search_service.search( - query=query, language=language, semantic=semantic - ) + results = await search_service.search(query=query, language=language, semantic=semantic) - languages = [ - (request.url, Language.get(language).display_name(language).title()) - ] + [ + languages = [(request.url, Language.get(language).display_name(language).title())] + [ ( str(request.url_for("web_search")) + "?"