Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Add image_count and infobox_count columns to submissions table

Revision ID: f1a2b3c4d5e6
Revises: e4e56960f418

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import inspect


revision = "f1a2b3c4d5e6"
down_revision = "e4e56960f418"
branch_labels = None
depends_on = None


def upgrade() -> None:
conn = op.get_bind()
inspector = inspect(conn)
columns = [col["name"] for col in inspector.get_columns("submissions")]

if "image_count" not in columns:
op.add_column(
"submissions",
sa.Column("image_count", sa.Integer(), nullable=True),
)

if "infobox_count" not in columns:
op.add_column(
"submissions",
sa.Column("infobox_count", sa.Integer(), nullable=True),
)


def downgrade() -> None:
conn = op.get_bind()
inspector = inspect(conn)
columns = [col["name"] for col in inspector.get_columns("submissions")]

if "infobox_count" in columns:
op.drop_column("submissions", "infobox_count")

if "image_count" in columns:
op.drop_column("submissions", "image_count")
14 changes: 14 additions & 0 deletions backend/app/models/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ class Submission(BaseModel):
# Can be negative if article was reduced in size
article_expansion_bytes = db.Column(db.Integer, nullable=True)

# Image count
image_count = db.Column(db.Integer, nullable=True)

# Infobox count
infobox_count = db.Column(db.Integer, nullable=True)

# Template enforcement tracking
# True if template was automatically added to the article during submission
template_added = db.Column(db.Boolean, nullable=True, default=False)
Expand Down Expand Up @@ -173,6 +179,8 @@ def __init__(
template_added=False,
categories_added=None,
category_error=None,
image_count=None,
infobox_count=None,
):
"""
Initialize a new Submission instance
Expand All @@ -192,6 +200,8 @@ def __init__(
template_added: Whether template was automatically added to article (optional)
categories_added: List of category names that were automatically added (optional, stored as JSON)
category_error: Error message if category attachment failed (optional)
image_count: Number of images in the article (optional)
infobox_count: Number of infoboxes in the article (optional)
"""
# Set required fields
self.user_id = user_id
Expand Down Expand Up @@ -219,6 +229,8 @@ def __init__(
else:
self.categories_added = None
self.category_error = category_error
self.image_count = image_count
self.infobox_count = infobox_count
self.reviewed_by = None
self.reviewed_at = None
self.review_comment = None
Expand Down Expand Up @@ -530,6 +542,8 @@ def to_dict(self, include_user_info=False):
"article_page_id": self.article_page_id,
"article_size_at_start": self.article_size_at_start,
"article_expansion_bytes": self.article_expansion_bytes,
"image_count": self.image_count,
"infobox_count": self.infobox_count,
"template_added": self.template_added,
"categories_added": self.get_categories_added(),
"category_error": self.category_error,
Expand Down
31 changes: 31 additions & 0 deletions backend/app/routes/contest_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
check_article_has_category,
append_categories_to_article,
get_article_reference_count,
get_article_image_count,
get_article_infobox_count,
MEDIAWIKI_API_TIMEOUT,
)
from app.services.outreach_dashboard import (
Expand Down Expand Up @@ -1314,6 +1316,8 @@ def submit_to_contest(contest_id): # pylint: disable=too-many-return-statements
article_size_at_start = None
article_expansion_bytes = None
article_reference_count = None
image_count = None
infobox_count = None

# --- Fetch Article Information from MediaWiki API ---
# MediaWiki API fetching has deep nesting due to complex error handling
Expand Down Expand Up @@ -1699,6 +1703,31 @@ def submit_to_contest(contest_id): # pylint: disable=too-many-return-statements
pass
article_reference_count = None

# --- Fetch Image and Infobox Counts ---
# These richness metrics are stored for future use but are not used in
# validation or scoring yet.
try:
image_count = get_article_image_count(article_link)
except Exception as img_error: # pylint: disable=broad-exception-caught
try:
current_app.logger.warning(
"Failed to fetch image count: %s", str(img_error)
)
except Exception: # pylint: disable=broad-exception-caught
pass
image_count = None

try:
infobox_count = get_article_infobox_count(article_link)
except Exception as ibx_error: # pylint: disable=broad-exception-caught
try:
current_app.logger.warning(
"Failed to fetch infobox count: %s", str(ibx_error)
)
except Exception: # pylint: disable=broad-exception-caught
pass
infobox_count = None

# --- Validate Article Requirements ---
# Validate article byte count against contest requirements
# This check happens after fetching article information from MediaWiki API
Expand Down Expand Up @@ -2060,6 +2089,8 @@ def submit_to_contest(contest_id): # pylint: disable=too-many-return-statements
template_added=template_added,
categories_added=categories_added,
category_error=category_error,
image_count=image_count,
infobox_count=infobox_count,
)

submission.save()
Expand Down
59 changes: 59 additions & 0 deletions backend/app/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
"append_categories_to_article",
"get_article_reference_count",
"get_mediawiki_user_edit_count",
"get_article_image_count",
"get_article_infobox_count",
]


Expand Down Expand Up @@ -948,6 +950,63 @@ def _fetch_footnotes_count(api_url: str, page_title: str, headers: dict) -> int:
return 0


def _log_warning(message: str, error: Exception) -> None:
"""Best-effort logging helper that uses Flask current_app when available.

This keeps network helpers free from hard Flask dependencies while still
providing useful diagnostics in a running application.
"""
try:
from flask import current_app

current_app.logger.warning("%s: %s", message, str(error))
except Exception: # pylint: disable=broad-exception-caught
# Logging must never break core logic, so ignore any logging failures
pass


def get_article_image_count(article_url: str) -> Optional[int]:
"""
The count is approximate and based purely on wikitext patterns; it does
not guarantee that every match results in a rendered image, but it
generally tracks user-added content images.
"""
try:
wikitext = get_article_wikitext(article_url)
if wikitext is None:
return None

# Match explicit file/image links like [[File:Example.jpg|...]] or
# [[Image:Example.png|...]] in a case-insensitive way.
matches = re.findall(r'\[\[(?:File|Image):', wikitext, flags=re.IGNORECASE)
return len(matches)

except Exception as error: # pylint: disable=broad-exception-caught
_log_warning("Failed to fetch image count", error)
return None


def get_article_infobox_count(article_url: str) -> Optional[int]:
"""Count approximate number of infobox templates in article wikitext.

Detection is done via a simple regex scan for ``{{infobox ...}}`` in the
raw wikitext. This is an approximation and may over-count or under-count
in edge cases (e.g. nested templates, unusual formatting), but is
sufficient for high-level richness metrics.
"""
try:
wikitext = get_article_wikitext(article_url)
if wikitext is None:
return None

matches = re.findall(r"\{\{\s*infobox\b", wikitext, flags=re.IGNORECASE)
return len(matches)

except Exception as error: # pylint: disable=broad-exception-caught
_log_warning("Failed to fetch infobox count", error)
return None


def get_article_reference_count(article_url: str) -> Optional[int]:
"""
Get the total number of references in a MediaWiki article.
Expand Down
Loading