Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: Add pre-commit workflow #3284

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
378 changes: 378 additions & 0 deletions .github/logToCs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,378 @@
#!/usr/bin/env python3
# pylint: disable=invalid-name
"""
Convert a log to CheckStyle format.

Url: https://github.com/mdeweerd/LogToCheckStyle

The log can then be used for generating annotations in a github action.

Note: this script is very young and "quick and dirty".
Patterns can be added to "PATTERNS" to match more messages.

# Examples

Assumes that logToCs.py is available as .github/logToCs.py.

## Example 1:


```yaml
- run: |
pre-commit run -all-files | tee pre-commit.log
.github/logToCs.py pre-commit.log pre-commit.xml
- uses: staabm/annotate-pull-request-from-checkstyle-action@v1
with:
files: pre-commit.xml
notices-as-warnings: true # optional
```

## Example 2:


```yaml
- run: |
pre-commit run --all-files | tee pre-commit.log
- name: Add results to PR
if: ${{ always() }}
run: |
.github/logToCs.py pre-commit.log | cs2pr
```

Author(s):
- https://github.com/mdeweerd

License: MIT License

"""

import argparse
import os
import re
import sys
import xml.etree.ElementTree as ET # nosec


def remove_prefix(string, prefix):
"""
Remove prefix from string

Provided for backward compatibility.
"""
if prefix and string.startswith(prefix):
return string[len(prefix) :]
return string


def convert_to_checkstyle(messages, root_path=None):
"""
Convert provided message to CheckStyle format.
"""
root = ET.Element("checkstyle")
for message in messages:
fields = parse_message(message)
if fields:
add_error_entry(root, **fields, root_path=root_path)
return ET.tostring(root, encoding="utf_8").decode("utf_8")


def convert_text_to_checkstyle(text, root_path=None):
"""
Convert provided message to CheckStyle format.
"""
root = ET.Element("checkstyle")
for fields in parse_file(text):
if fields:
add_error_entry(root, **fields, root_path=root_path)
return ET.tostring(root, encoding="utf_8").decode("utf_8")


ANY_REGEX = r".*?"
FILE_REGEX = r"\s*(?P<file_name>\S.*?)\s*?"
EOL_REGEX = r"[\r\n]"
LINE_REGEX = r"\s*(?P<line>\d+?)\s*?"
COLUMN_REGEX = r"\s*(?P<column>\d+?)\s*?"
SEVERITY_REGEX = r"\s*(?P<severity>error|warning|notice|style|info)\s*?"
MSG_REGEX = r"\s*(?P<message>.+?)\s*?"
MULTILINE_MSG_REGEX = r"\s*(?P<message>(?:.|.[\r\n])+)"
# cpplint confidence index
CONFIDENCE_REGEX = r"\s*\[(?P<confidence>\d+)\]\s*?"


# List of message patterns, add more specific patterns earlier in the list
# Creating patterns by using constants makes them easier to define and read.
PATTERNS = [
# beautysh
# File ftp.sh: error: "esac" before "case" in line 90.
re.compile(
f"^File {FILE_REGEX}:{SEVERITY_REGEX}:"
f" {MSG_REGEX} in line {LINE_REGEX}.$"
),
# beautysh
# File socks4echo.sh: error: indent/outdent mismatch: -2.
re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"),
# ESLint (JavaScript Linter), RoboCop, shellcheck
# path/to/file.js:10:2: Some linting issue
# path/to/file.rb:10:5: Style/Indentation: Incorrect indentation detected
# path/to/script.sh:10:1: SC2034: Some shell script issue
re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{COLUMN_REGEX}: {MSG_REGEX}$"),
# Cpplint default output:
# '%s:%s: %s [%s] [%d]\n'
# % (filename, linenum, message, category, confidence)
re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{MSG_REGEX}{CONFIDENCE_REGEX}$"),
# MSVC
# file.cpp(10): error C1234: Some error message
re.compile(
f"^{FILE_REGEX}\\({LINE_REGEX}\\):{SEVERITY_REGEX}{MSG_REGEX}$"
),
# Java compiler
# File.java:10: error: Some error message
re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{SEVERITY_REGEX}:{MSG_REGEX}$"),
# Python
# File ".../logToCs.py", line 90 (note: code line follows)
re.compile(f'^File "{FILE_REGEX}", line {LINE_REGEX}$'),
# Pylint, others
# path/to/file.py:10: [C0111] Missing docstring
# others
re.compile(f"^{FILE_REGEX}:{LINE_REGEX}: {MSG_REGEX}$"),
# Shellcheck:
# In script.sh line 76:
re.compile(
f"^In {FILE_REGEX} line {LINE_REGEX}:{EOL_REGEX}?"
f"({MULTILINE_MSG_REGEX})?{EOL_REGEX}{EOL_REGEX}"
),
# eslint:
# /path/to/filename
# 14:5 error Unexpected trailing comma comma-dangle
re.compile(
f"^{FILE_REGEX}{EOL_REGEX}"
rf"\s+{LINE_REGEX}:{COLUMN_REGEX}\s+{SEVERITY_REGEX}\s+{MSG_REGEX}$"
),
]

# Severities available in CodeSniffer report format
SEVERITY_NOTICE = "notice"
SEVERITY_WARNING = "warning"
SEVERITY_ERROR = "error"


def strip_ansi(text: str):
"""
Strip ANSI escape sequences from string (colors, etc)
"""
return re.sub(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])", "", text)


def parse_file(text):
"""
Parse all messages in a file

Returns the fields in a dict.
"""
# regex required to allow same group names
try:
import regex # pylint: disable=import-outside-toplevel
except ImportError as exc:
raise ImportError(
"The 'parsefile' method requires 'python -m pip install regex'"
) from exc

patterns = [pattern.pattern for pattern in PATTERNS]
# patterns = [PATTERNS[0].pattern]

full_regex = "(?:(?:" + (")|(?:".join(patterns)) + "))"
results = []

for fields in regex.finditer(
full_regex, strip_ansi(text), regex.MULTILINE
):
if not fields:
continue
result = fields.groupdict()

if len(result) == 0:
continue
severity = result.get("severity", None)
confidence = result.pop("confidence", None)

if confidence is not None:
# Convert confidence level of cpplint
# to warning, etc.
confidence = int(confidence)

if confidence <= 1:
severity = SEVERITY_NOTICE
elif confidence >= 5:
severity = SEVERITY_ERROR
else:
severity = SEVERITY_WARNING

if severity is None:
severity = SEVERITY_ERROR
else:
severity = severity.lower()

if severity in ["info", "style"]:
severity = SEVERITY_NOTICE

result["severity"] = severity

results.append(result)

return results


def parse_message(message):
"""
Parse message until it matches a pattern.

Returns the fields in a dict.
"""
for pattern in PATTERNS:
fields = pattern.match(message)
if not fields:
continue
result = fields.groupdict()
if len(result) == 0:
continue

if "confidence" in result:
# Convert confidence level of cpplint
# to warning, etc.
confidence = int(result["confidence"])
del result["confidence"]

if confidence <= 1:
severity = SEVERITY_NOTICE
elif confidence >= 5:
severity = SEVERITY_ERROR
else:
severity = SEVERITY_WARNING
result["severity"] = severity

if "severity" not in result:
result["severity"] = SEVERITY_ERROR
else:
result["severity"] = result["severity"].lower()

if result["severity"] in ["info", "style"]:
result["severity"] = SEVERITY_NOTICE

return result

# Nothing matched
return None


def add_error_entry( # pylint: disable=too-many-arguments
root,
severity,
file_name,
line=None,
column=None,
message=None,
source=None,
root_path=None,
):
"""
Add error information to the CheckStyle output being created.
"""
file_element = find_or_create_file_element(
root, file_name, root_path=root_path
)
error_element = ET.SubElement(file_element, "error")
error_element.set("severity", severity)
if line:
error_element.set("line", line)
if column:
error_element.set("column", column)
if message:
error_element.set("message", message)
if source:
# To verify if this is a valid attribute
error_element.set("source", source)


def find_or_create_file_element(root, file_name: str, root_path=None):
"""
Find/create file element in XML document tree.
"""

if root_path is not None:
file_name = remove_prefix(file_name, root_path)
for file_element in root.findall("file"):
if file_element.get("name") == file_name:
return file_element
file_element = ET.SubElement(root, "file")
file_element.set("name", file_name)
return file_element


def main():
"""
Parse the script arguments and get the conversion done.
"""
parser = argparse.ArgumentParser(
description="Convert messages to Checkstyle XML format."
)
parser.add_argument(
"input", help="Input file. Use '-' for stdin.", nargs="?", default="-"
)
parser.add_argument(
"output",
help="Output file. Use '-' for stdout.",
nargs="?",
default="-",
)
parser.add_argument(
"-i",
"--in",
dest="input_named",
help="Input filename. Overrides positional input.",
)
parser.add_argument(
"-o",
"--out",
dest="output_named",
help="Output filename. Overrides positional output.",
)
parser.add_argument(
"--root",
metavar="ROOT_PATH",
help="Root directory to remove from file paths."
" Defaults to working directory.",
default=os.getcwd(),
)

args = parser.parse_args()

if args.input == "-" and args.input_named:
with open(args.input_named, encoding="utf_8") as input_file:
text = input_file.read()
elif args.input != "-":
with open(args.input, encoding="utf_8") as input_file:
text = input_file.read()
else:
text = sys.stdin.read()

root_path = os.path.join(args.root, "")

try:
checkstyle_xml = convert_text_to_checkstyle(text, root_path=root_path)
except ImportError:
checkstyle_xml = convert_to_checkstyle(
re.split(r"[\r\n]+", text), root_path=root_path
)

if args.output == "-" and args.output_named:
with open(args.output_named, "w", encoding="utf_8") as output_file:
output_file.write(checkstyle_xml)
elif args.output != "-":
with open(args.output, "w", encoding="utf_8") as output_file:
output_file.write(checkstyle_xml)
else:
print(checkstyle_xml)


if __name__ == "__main__":
main()
Loading
Loading