Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 188 additions & 0 deletions app/models/concept_usage/check_concept_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#!/usr/bin/env python3

import json
import subprocess
import sys
from dataclasses import dataclass
from typing import Dict, List, Set
import re
import traceback
import os.path

# ==================== Constants ======================
RULES_PATH = "rules.json"

# Return codes
SUCCESS = 0
CODE_HAD_ISSUES = 32
CODE_CHECKING_FAILED = 64

# Removes all output except "matched" messages
# No user code should be visible in the output
CLANG_QUERY_PREAMBLE = """\
set traversal IgnoreUnlessSpelledInSource
disable output diag
disable output print
disable output detailed-ast
disable output dump
"""

# ==================== Clang Query Utils ======================

# Run clang-query and return the result

# Note: it deliberately ignores any stderr output,
# since we can't guarantee student's code will parse perfectly
# on our system (they could have their own include file, etc).
# Clang does a best effort parse in these cases, seems acceptable.

# Syntax errors for the `match` queries are handled later
# when the result is sanity checked, by comparing
# the count of successful match results with the expected number

# clang-query return code isn't useful either from what I can tell
def clang_query(source_file: str, query: str) -> str:
full_query = CLANG_QUERY_PREAMBLE + query + "\n"

result = subprocess.run(
["clang-query", source_file, "--"],
input=full_query,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
)

return result.stdout

# Extract "0 matches, 1 match, etc"
def clang_query_extract_matches(output: List[str]) -> List[int]:
pattern = re.compile(r'(\d+)\s+match(?:es)?')
extracted_numbers = []

for line in output:
match = pattern.search(line)
if match:
number = int(match.group(1))
extracted_numbers.append(number)

return (extracted_numbers)

# ==================== Rule Loading ======================

@dataclass
class MatchQuerySet:
queries: List[str]
id: str
description: str

def load_rules(path: str) -> Dict:
with open(path, "r") as f:
return json.load(f)

# Recursively include all the MatchQuerySets for the chosen ruleset (e.g chapter)
def resolve_rules(
rules: Dict,
ruleset: str
) -> List[MatchQuerySet]:
chapter = rules[ruleset]
matches: List[MatchQuerySet] = []

for include in chapter.get("includes", []):
matches.extend(resolve_rules(rules, include))

for rule in chapter.get("rules", []):
matches.append(MatchQuerySet(**rule))

return matches

# ==================== Main Logic ======================

def check_usage(source_file: str, match_query_sets: List[MatchQuerySet]) -> None:
# Run every match query in one go
# We'll then split it up afterwards

# Start by joining them all up
combined_query = "\n".join(["\n".join([f"match {y}" for y in x.queries]) for x in match_query_sets])

# Get the results from clang-query
output = clang_query(source_file, combined_query)
# Find the number of matches for each query
match_counts = clang_query_extract_matches(output.split("\n"))
# There should be the same number as there were match statements
expected_match_length = sum([len(x.queries) for x in match_query_sets])

if len(match_counts) != expected_match_length:
# If not, something is wrong, abort checking
print(f"Error: Mismatch between expected result count and clang-query output.", file=sys.stderr)
print(f"Expected {expected_match_length} match counts, got {len(match_counts)}.", file=sys.stderr)
print("There is likely a syntax error in the queries - see output below.", file=sys.stderr)
print(f"\n==== clang-query Input ====\n{combined_query}\n============================", file=sys.stderr)
print(f"\n==== clang-query Output ====\n{output}\n============================", file=sys.stderr)
sys.exit(CODE_CHECKING_FAILED)

# Now we can step through and pair up each match count with its associated MatchQuerySet
idx = 0
matched = []
for matchQuerySet in match_query_sets:
total_matches = sum(match_counts[idx : idx + len(matchQuerySet.queries)])
idx += len(matchQuerySet.queries)

if total_matches > 0:
matched.append(matchQuerySet)

return matched

# ==================== Output Formatting ======================

def format_output_ids(matched: List[MatchQuerySet]) -> str:
return ", ".join([x.id for x in matched])

def format_output_descriptions(matched: List[MatchQuerySet]) -> str:
return "\n".join([x.description for x in matched])

# =========== Main Real (extracted so it can be tested...) ===========

def main_inner(source_file: str, ruleset: str, output_style: str, rules_path: str) -> str:
rules = load_rules(rules_path)
match_query_sets = resolve_rules(rules, ruleset)

matches = check_usage(source_file, match_query_sets)

if output_style == "id":
return format_output_ids(matches)
else:
return format_output_descriptions(matches)

# ==================== Command Line Usage ======================

def main() -> None:
try:
if len(sys.argv) != 4:
print(f"Usage: {sys.argv[0]} <source-file> <rules> <output-style {{id=ID's only (comma delimited)|desc=Descriptions only (newline delimited)}}>")
sys.exit(CODE_CHECKING_FAILED)

source_file = sys.argv[1]
ruleset = sys.argv[2]
output_style = sys.argv[3]

# check inputs
assert os.path.isfile(RULES_PATH), f"Rule set file doesn't exist {RULES_PATH}"
assert os.path.isfile(source_file), f"Input source file doesn't exist: {source_file}"
assert output_style=="id" or output_style == "desc", f"Invalid output-style {output_style}"

output = main_inner(source_file, ruleset, output_style, RULES_PATH)

# Print the actual result on stdout
print(output)

if len(output) > 0:
sys.exit(CODE_HAD_ISSUES)

except Exception as e:
print("Error:", traceback.format_exc(), file=sys.stderr)
sys.exit(CODE_CHECKING_FAILED)

sys.exit(SUCCESS)

if __name__ == "__main__":
main()
182 changes: 182 additions & 0 deletions app/models/concept_usage/rules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
{
"formatting_checks": {
"includes": [],
"rules": [
{
"queries": ["varDecl(hasType(isConstQualified()), unless(matchesName(\"\\.*::[A-Z0-9_\\]+$\")), isExpansionInMainFile())"],
"id": "FormattingConstant",
"description": "Double check how you've formatted your constants."
},
{
"queries": ["varDecl(unless(hasType(isConstQualified())), unless(matchesName(\".*::[a-z0-9_]+$\")), isExpansionInMainFile(), unless(parmVarDecl()))", "fieldDecl(unless(hasType(isConstQualified())), unless(matchesName(\".*::[a-z0-9_]+$\")), isExpansionInMainFile())"],
"id": "FormattingVariable",
"description": "Double check how you've formatted your variables."
}
]
},

"always_checks": {
"includes": [],
"rules": [
{
"queries": ["varDecl(hasGlobalStorage(), isExpansionInMainFile(), unless(hasType(isConstQualified())))"],
"id": "GlobalVariable",
"description": "Make sure you aren't using any global variables."
},
{
"queries": ["declRefExpr(to(functionDecl(hasName(\"printf\"))), isExpansionInMainFile())", "declRefExpr(to(functionDecl(hasName(\"scanf\"))), isExpansionInMainFile())"],
"id": "Printf",
"description": "Make sure to use the terminal read/write functions we're using."
},
{
"queries": ["declRefExpr(to(varDecl(hasName(\"cin\"))), isExpansionInMainFile())", "declRefExpr(to(varDecl(hasName(\"cout\"))), isExpansionInMainFile())"],
"id": "CppIO",
"description": "Make sure to use the terminal read/write functions we're using."
},
{
"queries": ["labelStmt(isExpansionInMainFile())", "gotoStmt(isExpansionInMainFile())"],
"id": "Goto",
"description": "Make sure to use structured control flow statements rather than jumping to labels."
}
]
},

"base_checks": {
"includes": ["formatting_checks", "always_checks"],
"rules": []
},

"chapter_11_memory_deep_dive": {
"includes": ["base_checks"],
"rules": []
},

"chapter_10_pointers_and_lists": {
"includes": ["chapter_11_memory_deep_dive"],
"rules": [
{
"queries": ["declRefExpr(to(functionDecl(hasName(\"malloc\"))))", "declRefExpr(to(functionDecl(hasName(\"free\"))))"],
"id": "Malloc",
"description": "Try using C++ style memory allocation instead."
},
{
"queries": ["cxxNewExpr(isExpansionInMainFile(), isArray())"],
"id": "CppNewArray",
"description": "Sorry, we'll cover dynamic arrays soon!"
}
]
},

"chapter_09_generics_and_operators": {
"includes": ["chapter_10_pointers_and_lists"],
"rules": [
{
"queries": ["varDecl(hasType(qualType(hasUnqualifiedDesugaredType(pointerType(unless(pointee(hasDeclaration(isExpansionInFileMatching(\"splashkit/.*\")))))))), isExpansionInMainFile())"],
"id": "Pointer",
"description": "Still a bit early for pointers."
},
{
"queries": ["cxxThisExpr(isExpansionInMainFile(), unless(anything()))"],
"id": "This - NOTE: unless(isImplicit()) doesn't work on my machine, so for now this is nullified with anything()",
"description": "No need to use `this` yet."
},
{
"queries": ["cxxNewExpr(isExpansionInMainFile())"],
"id": "CppNew",
"description": "For now make sure to keep things on the stack."
}
]
},

"chapter_08_member_functions": {
"includes": ["chapter_09_generics_and_operators"],
"rules": [
{
"queries": ["templateTypeParmDecl(isExpansionInMainFile())"],
"id": "Template",
"description": "For now just use concrete types - no need to generalize your code too much."
},
{
"queries": ["functionDecl(matchesName(\"operator\"), isExpansionInMainFile())", "cxxMethodDecl(matchesName(\"operator\"), isExpansionInMainFile())"],
"id": "OperatorOverload",
"description": "No need to overload operators yet."
}
]
},

"chapter_07_handling_multiples": {
"includes": ["chapter_08_member_functions"],
"rules": [
{
"queries": ["cxxMethodDecl(isExpansionInMainFile())", "cxxConstructorDecl(isExpansionInMainFile())"],
"id": "Method",
"description": "Make sure your structs are just data for now."
}
]
},

"chapter_06_structuring_data": {
"includes": ["chapter_07_handling_multiples"],
"rules": [
{
"queries": ["varDecl(hasType(arrayType()), isExpansionInMainFile())", "fieldDecl(hasType(arrayType()), isExpansionInMainFile())", "declRefExpr(to(functionDecl(hasName(\"operator[]\"))), isExpansionInMainFile())", "cxxNewExpr(isExpansionInMainFile(), isArray())"],
"id": "Array",
"description": "For now, keep your variables holding only one piece of data at a time."
}
]
},

"chapter_05_structuring_code": {
"includes": ["chapter_06_structuring_data"],
"rules": [
{
"queries": ["recordDecl(isExpansionInMainFile())"],
"id": "Struct",
"description": "Keep the code simple for now - just pass around variables seperately, don't group them."
},
{
"queries": ["enumDecl(isExpansionInMainFile())"],
"id": "Enum",
"description": "Rather than an enum, feel free to just use a series of integer constants for now."
}
]
},

"chapter_04_control_flow": {
"includes": ["chapter_05_structuring_code"],
"rules": [
{
"queries": ["functionDecl(isExpansionInMainFile(), unless(hasName(\"main\")), unless(cxxMethodDecl()))"],
"id": "Function",
"description": "Check that all your code is in the one function for now."
},
{
"queries": ["cxxThrowExpr(isExpansionInMainFile())"],
"id": "Exception",
"description": "Simplify your error handling - you can achieve this in other ways."
}
]
},

"chapter_03_data": {
"includes": ["chapter_04_control_flow"],
"rules": [
{
"queries": ["ifStmt(isExpansionInMainFile())", "whileStmt(isExpansionInMainFile())", "doStmt(isExpansionInMainFile())", "forStmt(isExpansionInMainFile())", "breakStmt(isExpansionInMainFile())", "continueStmt(isExpansionInMainFile())"],
"id": "ControlFlow",
"description": "We'll get to flow control soon! For now, keep things simple and linear."
}
]
},

"chapter_02_sequence": {
"includes": ["chapter_03_data"],
"rules": [
{
"queries": ["varDecl(isExpansionInMainFile())"],
"id": "Variable",
"description": "No need for variables yet, we'll get to those next."
}
]
}
}
Loading