From 0ca62609fc73367cb1b0cdc5da6bdd314f629278 Mon Sep 17 00:00:00 2001 From: Martin Medler Date: Thu, 20 Jun 2024 22:35:41 +0200 Subject: [PATCH] Fix standard headers list Due to an error in the regex parsing the webpage content, all headers using `_` were missing. --- scripts/BUILD | 7 ++++++ scripts/extract_std_headers.py | 23 ++++++++++++++++---- scripts/mypy.sh | 2 +- scripts/test/BUILD | 7 ++++++ scripts/test/extract_std_headers_test.py | 27 ++++++++++++++++++++++++ scripts/unit_tests.sh | 2 +- src/analyze_includes/std_header.py | 18 ++++++++++++++++ 7 files changed, 80 insertions(+), 6 deletions(-) create mode 100644 scripts/BUILD create mode 100644 scripts/test/BUILD create mode 100644 scripts/test/extract_std_headers_test.py diff --git a/scripts/BUILD b/scripts/BUILD new file mode 100644 index 00000000..1ced800b --- /dev/null +++ b/scripts/BUILD @@ -0,0 +1,7 @@ +load("@rules_python//python:defs.bzl", "py_binary") + +py_binary( + name = "extract_std_headers", + srcs = ["extract_std_headers.py"], + visibility = [":__subpackages__"], +) diff --git a/scripts/extract_std_headers.py b/scripts/extract_std_headers.py index 8ff1f3d3..a4cfb274 100755 --- a/scripts/extract_std_headers.py +++ b/scripts/extract_std_headers.py @@ -10,11 +10,26 @@ python file containing the standard header list for DWYU to lookup. """ +from __future__ import annotations + import re from pathlib import Path -with Path("input.txt").open(encoding="utf-8") as fin: +INPUT = Path("input.txt") + + +def extract_header(text: str) -> list[str]: headers = [] - for line in fin.readlines(): - headers.extend(re.findall(r"<([a-z/.]+)>", line)) - print("\n".join(f'"{h}",' for h in sorted(set(headers)))) # noqa: T201 + for line in text.split("\n"): + headers.extend(re.findall(r"<([a-z_/.]+)>", line)) + return headers + + +def main() -> None: + with INPUT.open(encoding="utf-8") as fin: + headers = extract_header(fin.read()) + print("\n".join(f'"{h}",' for h in sorted(set(headers)))) # noqa: T201 + + +if __name__ == "__main__": + main() diff --git a/scripts/mypy.sh b/scripts/mypy.sh index ecc78b7a..aa5d794d 100755 --- a/scripts/mypy.sh +++ b/scripts/mypy.sh @@ -2,4 +2,4 @@ set -o errexit -bazel build --config=mypy -- //src/... //examples:all //test/aspect:all //test/apply_fixes:all +bazel build --config=mypy -- //src/... //scripts/... //examples:all //test/aspect:all //test/apply_fixes:all diff --git a/scripts/test/BUILD b/scripts/test/BUILD new file mode 100644 index 00000000..8ce0da9e --- /dev/null +++ b/scripts/test/BUILD @@ -0,0 +1,7 @@ +load("@rules_python//python:defs.bzl", "py_test") + +py_test( + name = "extract_std_headers_test", + srcs = ["extract_std_headers_test.py"], + deps = ["//scripts:extract_std_headers"], +) diff --git a/scripts/test/extract_std_headers_test.py b/scripts/test/extract_std_headers_test.py new file mode 100644 index 00000000..f2c0100f --- /dev/null +++ b/scripts/test/extract_std_headers_test.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import unittest + +from scripts.extract_std_headers import extract_header + + +class TestExtractHeader(unittest.TestCase): + def test_empty_input_yields_empty_list(self) -> None: + self.assertEqual(extract_header(""), []) + + def test_extracting_headers(self) -> None: + headers = extract_header( + """ +unrelated_stuff + +ignore this + + + """.strip() + ) + + self.assertEqual(headers, ["foo", "bar.h", "multiple_header", "in_one_line"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/unit_tests.sh b/scripts/unit_tests.sh index 54c307fb..fcab4ff9 100755 --- a/scripts/unit_tests.sh +++ b/scripts/unit_tests.sh @@ -2,4 +2,4 @@ set -o errexit -bazel test -- //src/... //test/aspect:all //third_party/... +bazel test -- //src/... //scripts/... //test/aspect:all //third_party/... diff --git a/src/analyze_includes/std_header.py b/src/analyze_includes/std_header.py index 04189a41..d91456b6 100644 --- a/src/analyze_includes/std_header.py +++ b/src/analyze_includes/std_header.py @@ -38,6 +38,7 @@ "complex", "complex.h", "concepts", + "condition_variable", "coroutine", "cpio.h", "csetjmp", @@ -68,10 +69,13 @@ "fcntl.h", "fenv.h", "filesystem", + "flat_map", + "flat_set", "float.h", "fmtmsg.h", "fnmatch.h", "format", + "forward_list", "fstream", "ftw.h", "functional", @@ -79,7 +83,9 @@ "generator", "glob.h", "grp.h", + "hazard_pointer", "iconv.h", + "initializer_list", "inttypes.h", "iomanip", "ios", @@ -100,6 +106,7 @@ "math.h", "mdspan", "memory", + "memory_resource", "monetary.h", "mqueue.h", "mutex", @@ -109,6 +116,7 @@ "netinet/in.h", "netinet/tcp.h", "new", + "nl_types.h", "numbers", "numeric", "optional", @@ -125,12 +133,15 @@ "regex", "regex.h", "sched.h", + "scoped_allocator", "search.h", "semaphore", "semaphore.h", "set", "setjmp.h", + "shared_mutex", "signal.h", + "source_location", "span", "spanstream", "spawn.h", @@ -150,9 +161,11 @@ "stdio.h", "stdlib.h", "stdnoreturn.h", + "stop_token", "streambuf", "string", "string.h", + "string_view", "strings.h", "stropts.h", "strstream", @@ -175,19 +188,24 @@ "sys/utsname.h", "sys/wait.h", "syslog.h", + "system_error", "tar.h", "termios.h", + "text_encoding", "tgmath.h", "thread", "threads.h", "time.h", "trace.h", "tuple", + "type_traits", "typeindex", "typeinfo", "uchar.h", "ulimit.h", "unistd.h", + "unordered_map", + "unordered_set", "utility", "utime.h", "utmpx.h",