From 5a3ca564a0cc7dfe6e67d02e4914c636613eaa86 Mon Sep 17 00:00:00 2001 From: bruntib Date: Mon, 26 Jan 2026 13:41:26 +0100 Subject: [PATCH] [fix] Add -j flag to "CodeChecker store" When .plist files are on an NFS drive, then processing them in parallel may cause indefinite hanging in the store process. `-j` flag is introduced to "CodeChecker store" command, so it is possible to parse the .plist files in a single process. When `-j1` is given, then no process pool is used at all. --- docs/web/user_guide.md | 7 +++- web/client/codechecker_client/cli/store.py | 42 +++++++++++++++------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/docs/web/user_guide.md b/docs/web/user_guide.md index 80d915887c..3a221fcf16 100644 --- a/docs/web/user_guide.md +++ b/docs/web/user_guide.md @@ -474,6 +474,11 @@ optional arguments: is given, the longest match will be removed. You may also use Unix shell-like wildcards (e.g. '/*/jsmith/'). + -j JOBS, --jobs JOBS Number of parallel jobs that process the input directory + to upload. If the directory is located on NFS drive, + then the storage may hang indefinitely in case of + parallel processing. Choosing value 1 doesn't use + sub-processes. (default: 8) --detach Runs `store` in fire-and-forget mode: exit immediately once the server accepted the analysis reports for storing, without waiting for the server-side data @@ -505,7 +510,7 @@ optional arguments: --zip-loc Specify a directory for CodeChecker to store temporary files. This option is useful if the results directory is readonly and `/tmp` is small. - Defaults to the results directory + Defaults to the results directory (falls back to /tmp if read-only). --verbose {info,debug,debug_analyzer} Set verbosity level. diff --git a/web/client/codechecker_client/cli/store.py b/web/client/codechecker_client/cli/store.py index 1f5eeca9e5..afe025b2c3 100644 --- a/web/client/codechecker_client/cli/store.py +++ b/web/client/codechecker_client/cli/store.py @@ -26,7 +26,7 @@ import shutil from collections import defaultdict, namedtuple -from contextlib import contextmanager +from contextlib import contextmanager, nullcontext from datetime import timedelta from threading import Timer from typing import Dict, Iterable, List, Set, Tuple @@ -55,7 +55,7 @@ def assemble_blame_info(_, __) -> int: from codechecker_client.task_client import await_task_termination from codechecker_common import arg, logger, cmd_config from codechecker_common.checker_labels import CheckerLabels -from codechecker_common.compatibility.multiprocessing import Pool +from codechecker_common.compatibility.multiprocessing import Pool, cpu_count from codechecker_common.source_code_comment_handler import \ SourceCodeCommentHandler from codechecker_common.util import format_size, load_json, strtobool @@ -257,6 +257,16 @@ def add_arguments_to_parser(parser): "match will be removed. You may also use Unix " "shell-like wildcards (e.g. '/*/jsmith/').") + parser.add_argument('-j', '--jobs', + type=int, + dest="jobs", + default=cpu_count(), + help="Number of parallel jobs that process the " + "input directory to upload. If the directory is " + "located on NFS drive, then the storage may hang " + "indefinitely in case of parallel processing. " + "Choosing value 1 doesn't use sub-processes.") + parser.add_argument('--zip-loc', type=str, metavar='PATH', @@ -424,18 +434,23 @@ def get_reports( def parse_analyzer_result_files( analyzer_result_files: Iterable[str], checker_labels: CheckerLabels, - zip_iter=map + jobs: int = cpu_count() ) -> AnalyzerResultFileReports: """ Get reports from the given analyzer result files. """ analyzer_result_file_reports: AnalyzerResultFileReports = defaultdict(list) - for idx, (file_path, reports) in enumerate(zip( - analyzer_result_files, zip_iter( + ctx = nullcontext() if jobs == 1 else Pool(max_workers=jobs) + + with ctx as executor: + map_fn = map if jobs == 1 else executor.map + + for idx, (file_path, reports) in enumerate(zip( + analyzer_result_files, map_fn( functools.partial(get_reports, checker_labels=checker_labels), analyzer_result_files))): - LOG.debug(f"[{idx}/{len(analyzer_result_files)}] " - f"Parsed '{file_path}' ...") - analyzer_result_file_reports[file_path] = reports + LOG.debug(f"[{idx}/{len(analyzer_result_files)}] " + f"Parsed '{file_path}' ...") + analyzer_result_file_reports[file_path] = reports return analyzer_result_file_reports @@ -454,7 +469,8 @@ def assemble_zip(inputs, client, prod_client, checker_labels: CheckerLabels, - tmp_dir: str): + tmp_dir: str, + jobs: int): """Collect and compress report and source files, together with files contanining analysis related information into a zip file which will be sent to the server. @@ -491,9 +507,8 @@ def assemble_zip(inputs, LOG.debug(f"Processing {len(analyzer_result_file_paths)} report files ...") - with Pool() as executor: - analyzer_result_file_reports = parse_analyzer_result_files( - analyzer_result_file_paths, checker_labels, executor.map) + analyzer_result_file_reports = parse_analyzer_result_files( + analyzer_result_file_paths, checker_labels, jobs) LOG.info("Processing report files done.") @@ -959,7 +974,8 @@ def main(args): client, prod_client, context.checker_labels, - temp_dir_path) + temp_dir_path, + args.jobs) except ReportLimitExceedError: sys.exit(1) except Exception as ex: