Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion web/server/codechecker_server/api/mass_store_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import base64
from collections import defaultdict
from datetime import datetime, timedelta
import fnmatch
from hashlib import sha256
import json
import os
Expand Down Expand Up @@ -51,7 +52,8 @@
ExtendedReportData, \
File, FileContent, \
Report as DBReport, ReportAnnotations, ReviewStatus as ReviewStatusRule, \
Run, RunLock as DBRunLock, RunHistory
Run, RunLock as DBRunLock, RunHistory, \
SourceComponent, SourceComponentFile
from ..metadata import checker_is_unavailable, MetadataInfoParser

from .report_annotations import report_annotation_types
Expand Down Expand Up @@ -272,6 +274,50 @@ def get_file_content(file_path: str) -> bytes:
return f.read()


def assign_file_to_source_components(session, file_id, filepath):
"""
Checks all Source Components and links the file if it matches.
"""
components = session.query(SourceComponent).all()

associations = []

from .report_server import get_component_values

for component in components:
include, skip = get_component_values(component)

# If no patterns are defined, the component matches nothing.
if not skip and not include:
continue

is_included = False
if include:
for pattern in include:
if fnmatch.fnmatch(filepath, pattern):
is_included = True
break
else:
# If only skip is defined, it matches everything except skips.
is_included = True

is_skipped = False
if skip:
for pattern in skip:
if fnmatch.fnmatch(filepath, pattern):
is_skipped = True
break

if is_included and not is_skipped:
associations.append({
'source_component_name': component.name,
'file_id': file_id
})

if associations:
session.bulk_insert_mappings(SourceComponentFile, associations)


def add_file_record(
session: DBSession,
file_path: str,
Expand Down Expand Up @@ -320,11 +366,14 @@ def add_file_record(
content_hash=content_hash).on_conflict_do_nothing(
index_elements=['filepath', 'content_hash'])
file_id = session.execute(insert_stmt).inserted_primary_key[0]
assign_file_to_source_components(session, file_id, file_path)
session.commit()
return file_id

file_record = File(file_path, content_hash, None, None)
session.add(file_record)
session.flush()
assign_file_to_source_components(session, file_record.id, file_path)
session.commit()
except sqlalchemy.exc.IntegrityError as ex:
LOG.error(ex)
Expand Down
128 changes: 72 additions & 56 deletions web/server/codechecker_server/api/report_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
File, FileContent, \
Report, ReportAnnotations, ReportAnalysisInfo, ReviewStatus, \
Run, RunHistory, RunHistoryAnalysisInfo, RunLock, \
SourceComponent
SourceComponent, SourceComponentFile

from .common import exc_to_thrift_reqfail
from .thrift_enum_helper import detection_status_enum, \
Expand Down Expand Up @@ -146,41 +146,79 @@ def slugify(text):


def get_component_values(
session: DBSession,
component_name: str
component: SourceComponent
) -> Tuple[List[str], List[str]]:
"""
Get component values by component names and returns a tuple where the
first item contains a list path which should be skipped and the second
item contains a list of path which should be included.
Returns a tuple where the first item contains a list paths that should be
included and the second item contains a list of paths that should be
skipped.
E.g.:
+/a/b/x.cpp
+/a/b/y.cpp
-/a/b
On the above component value this function will return the following:
(['/a/b'], ['/a/b/x.cpp', '/a/b/y.cpp'])
(['/a/b/x.cpp', '/a/b/y.cpp'], ['/a/b'])
"""
components = session.query(SourceComponent) \
.filter(SourceComponent.name.like(component_name)) \
.all()

skip = []
include = []
skip = []

for component in components:
values = component.value.decode('utf-8').split('\n')
for value in values:
value = value.strip()
if not value:
continue
values = component.value.decode('utf-8').split('\n')
for value in values:
value = value.strip()
if not value:
continue

v = value[1:]
if value[0] == '+':
include.append(v)
elif value[0] == '-':
skip.append(v)

return include, skip


def update_source_component_files(
session,
component: Optional[SourceComponent] = None
):
"""
Refreshes the SourceComponentFile table for a specific source component.
If `component` is None, then all source components are updated.
"""
if component is None:
all_components = session.query(SourceComponent)
else:
all_components = [component]

# 1. Delete existing associations for this component
session.query(SourceComponentFile) \
.filter(SourceComponentFile.source_component_name.in_(
map(lambda component: component.name, all_components))) \
.delete(synchronize_session=False)

for comp in all_components:
# 2. Re-calculate associations
include, skip = get_component_values(comp)

file_ids_query = None
if skip and include:
include_q, skip_q = get_include_skip_queries(include, skip)
file_ids_query = include_q.except_(skip_q)
elif include:
include_q, _ = get_include_skip_queries(include, [])
file_ids_query = include_q
elif skip:
_, skip_q = get_include_skip_queries([], skip)
file_ids_query = select(File.id).where(File.id.notin_(skip_q))

v = value[1:]
if value[0] == '+':
include.append(v)
elif value[0] == '-':
skip.append(v)
if file_ids_query is not None:
file_ids = session.execute(file_ids_query).fetchall()

return skip, include
if file_ids:
session.bulk_insert_mappings(
SourceComponentFile,
[{'source_component_name': comp.name,
'file_id': fid[0]} for fid in file_ids])


def process_report_filter(
Expand Down Expand Up @@ -506,18 +544,10 @@ def get_source_component_file_query(
component_name: str
):
""" Get filter query for a single source component. """
skip, include = get_component_values(session, component_name)

if skip and include:
include_q, skip_q = get_include_skip_queries(include, skip)
return File.id.in_(include_q.except_(skip_q))

if include:
return or_(*[File.filepath.like(conv(fp)) for fp in include])
elif skip:
return and_(*[not_(File.filepath.like(conv(fp))) for fp in skip])

return None
return File.id.in_(
session.query(SourceComponentFile.file_id)
.filter(SourceComponentFile.source_component_name == component_name)
)


def get_reports_by_bugpath_filter_for_single_origin(
Expand Down Expand Up @@ -635,28 +665,12 @@ def get_other_source_component_file_query(session):
(Files NOT IN Component_1) AND (Files NOT IN Component_2) ... AND
(Files NOT IN Component_N)
"""
component_names = session.query(SourceComponent.name).all()

# If there are no user defined source components we don't have to filter.
if not component_names:
# Check if there are any source components
if not session.query(SourceComponent).count():
return None

def get_query(component_name: str):
""" Get file filter query for auto generated Other component. """
skip, include = get_component_values(session, component_name)

if skip and include:
include_q, skip_q = get_include_skip_queries(include, skip)
return File.id.notin_(include_q.except_(skip_q))
elif include:
return and_(*[File.filepath.notlike(conv(fp)) for fp in include])
elif skip:
return or_(*[File.filepath.like(conv(fp)) for fp in skip])

return None

queries = [get_query(n) for (n, ) in component_names]
return and_(*queries)
files_in_components = session.query(SourceComponentFile.file_id)
return File.id.notin_(files_in_components)


def get_open_reports_date_filter_query(tbl=Report, date=RunHistory.time):
Expand Down Expand Up @@ -3897,6 +3911,8 @@ def addSourceComponent(self, name, value, description):
user)

session.add(component)
update_source_component_files(session, component)

session.commit()

return True
Expand Down
17 changes: 17 additions & 0 deletions web/server/codechecker_server/database/run_db_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,23 @@ def __init__(self, name, due_date=None, description=None, closed_at=None):
self.closed_at = closed_at


class SourceComponentFile(Base):
__tablename__ = 'source_component_files'

source_component_name = Column(String,
ForeignKey('source_components.name',
ondelete='CASCADE'),
primary_key=True)
file_id = Column(Integer,
ForeignKey('files.id',
ondelete='CASCADE'),
primary_key=True)

def __init__(self, source_component_name, file_id):
self.source_component_name = source_component_name
self.file_id = file_id


class CleanupPlanReportHash(Base):
__tablename__ = 'cleanup_plan_report_hashes'

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""
Add SourceComponentFile join table

Revision ID: 198654dac219
Revises: c3dad71f8e6b
Create Date: 2026-01-14 15:43:27.225574
"""

from logging import getLogger

from alembic import op
import sqlalchemy as sa
from sqlalchemy.orm import Session

from codechecker_server.api.report_server import \
update_source_component_files


# Revision identifiers, used by Alembic.
revision = '198654dac219'
down_revision = 'c3dad71f8e6b'
branch_labels = None
depends_on = None


def upgrade():
LOG = getLogger("migration/report")
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
'source_component_files',
sa.Column('source_component_name', sa.String(), nullable=False),
sa.Column('file_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(
['file_id'],
['files.id'],
name=op.f('fk_source_component_files_file_id_files'),
ondelete='CASCADE'),
sa.ForeignKeyConstraint(
['source_component_name'],
['source_components.name'],
name=op.f(
'fk_source_component_files_source_component_name_'
'source_components'),
ondelete='CASCADE'),
sa.PrimaryKeyConstraint(
'source_component_name',
'file_id',
name=op.f('pk_source_component_files'))
)

conn = op.get_bind()

session = Session(bind=conn)

update_source_component_files(session)

# ### end Alembic commands ###


def downgrade():
LOG = getLogger("migration/report")
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('source_component_files')
# ### end Alembic commands ###
24 changes: 0 additions & 24 deletions web/tests/functional/report_viewer_api/test_report_counting.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,30 +283,6 @@ def test_run1_all_file(self):
self.assertEqual(len(res), len(self.run1_files))
self.assertDictEqual(res, self.run1_files)

def test_filter_by_file_and_source_component(self):
"""
File and source component filter in getFileCounts().

Earlier this function resulted an SQL error due to an invalid SQL
statement (File table was ambiguously used, because it was joined
multiple times).

On the other hand we test here that getFileCounts() returns the number
of reports in all files regardless the filter fields. The reason is
that it wouldn't be possible on the GUI to display the options of the
file path filter which doesn't contain a report (i.e. their endpoint is
not in that file). In the future it would be enough to ignore the
filter only if "anywhere on bugpath" option is used (TODO?).
"""
runid = self._runids[0]
run_filter = ReportFilter(
filepath="call*",
componentNames=["doesn't exist"])
file_counts = self._cc_client.getFileCounts(
[runid], run_filter, None, None, 0)

self.assertEqual(len(file_counts), len(self.run1_files))

def test_run2_all_file(self):
"""
Get all the file counts for run2.
Expand Down
Loading