Skip to content

Commit

Permalink
little python refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
MaksimShagov committed Mar 26, 2024
1 parent 45c598c commit 2626cb6
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 189 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import random
from argparse import ArgumentParser
from enum import Enum

class Keyword(Enum):
Expand All @@ -26,9 +24,7 @@ class Keyword(Enum):
TRUE = "True",
NONE = "None"



keywords = {
KEYWODRS = {
"bool": Keyword.BOOL,
"False": Keyword.FALSE,
"int": Keyword.INT,
Expand Down Expand Up @@ -73,7 +69,7 @@ class Operator(Enum):
RightBrace = ")"
RectLeftBrace = "["

operators = {
OPERATORS = {
"%": Operator.Mod,
".": Operator.Dot,
# "]": Operator.RectRightBrace,
Expand All @@ -94,23 +90,37 @@ class Operator(Enum):
# "[": Operator.RectLeftBrace
}

def parse_args():

parser = ArgumentParser()

parser.add_argument('-o', '--output', type=str, required=True)
parser.add_argument('--word-count', type=int, required=True)

return parser.parse_args()
class TokenType(Enum):
Keyword = "keyword"
Identifier = "identifier"
Operator = "operator"
Special = "special"
IntegerLiteral = "integer_literal"
FloatingPointLiteral = "floating_point_literal"
StringLiteral = "string_literal"

def main():
args = parse_args()

words = list(operators.keys()) + list(keywords.keys())

with open(args.output, "w") as py_file:
for i in range(args.word_count):
py_file.writelines(str(words[random.randint(0, len(words) - 1)]) + ' ')

INDETIFICATORS = {
"Z": TokenType.Identifier,
"V": TokenType.Identifier,
"Za_Pobedy": TokenType.Identifier,
"x": TokenType.Identifier,
"y": TokenType.Identifier,
"i": TokenType.Identifier,
"foo": TokenType.Identifier,
"bar": TokenType.Identifier,
}

main()
LITERALS = {
"0": TokenType.IntegerLiteral,
"1": TokenType.IntegerLiteral,
"23122": TokenType.IntegerLiteral,
"42.24": TokenType.FloatingPointLiteral,
"1.0": TokenType.FloatingPointLiteral,
"0.0": TokenType.FloatingPointLiteral,
"0.3232": TokenType.FloatingPointLiteral,
"232.0": TokenType.FloatingPointLiteral,
"\"Hello\"": TokenType.StringLiteral,
"\'Quote1\'": TokenType.StringLiteral,
"\"Quote2\"": TokenType.StringLiteral,
}
26 changes: 26 additions & 0 deletions benchmarks/generators/lexer_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
import math
import random
from typing import List
from lexems import KEYWODRS, OPERATORS, INDETIFICATORS, LITERALS

def simple_lexems_generator(words_count: int, words_multiplier: int, output_folder: str) -> List[str]:

lexems = {**OPERATORS, **KEYWODRS, **INDETIFICATORS, **LITERALS}

lexems_keys = list(lexems.keys())

words_counts = [int(math.pow(words_multiplier, i)) for i in range(0, len(str(words_count)))]
tests_paths = []
for value in words_counts:
generated_file_path = os.path.join(output_folder, f"{value}.py")
validation_file_path = os.path.join(output_folder, f"validation_{value}.val")
tests_paths.append(generated_file_path)
with open(generated_file_path, "w"), open(validation_file_path, "w") as (generated_file, validation_file):
for i in range(value):
random_lexem = lexems_keys[random.randint(0, len(lexems) - 1)]
generated_file.writelines(lexems[random_lexem] + ' ')
validation_file.writelines(lexems[random_lexem] + '\n')


return tests_paths
62 changes: 62 additions & 0 deletions benchmarks/lexer_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from argparse import ArgumentParser
import random
import os
import tokenize
import time
import subprocess
from enum import Enum
import math
import json
import pandas
from generators import lexer_generator

CLI_PATH = "../compiler/build/bin/Release/compiler.exe"


def parse_args():

parser = ArgumentParser()
parser.add_argument('--max-word-count', type=int, help="Maximum words in test file", default=1000000)
parser.add_argument('--words-multiplier', type=int, help="Step for files", default=10)
parser.add_argument('-o', '--output', type=str,help="Output folder", default="results")
# add generate argument

return parser.parse_args()


def main():
args = parse_args()
os.makedirs(args.output, exist_ok=True)
test_files_folder = os.path.join(args.output, "test_files")
os.makedirs(test_files_folder, exist_ok=True)
compiler_logs = os.path.join(args.output, "compiler_logs")
os.makedirs(compiler_logs, exist_ok=True)

tests_paths = lexer_generator(args.words_count, args.words_multiplier, test_files_folder)

tokenize_times = {}
output = {}
for filename in tests_paths:
with open(filename, 'rb') as test_file:
start = time.perf_counter()
x = list(tokenize.tokenize(test_file.__next__))
end = time.perf_counter()
tokenize_times[int(os.path.basename(filename)[:-3])] = float(end - start)

print(tokenize_times)

data_frame = pandas.DataFrame.from_dict(tokenize_times, orient='index')
data_frame.to_csv(
os.path.join(args.output, "tokenize_times.csv")
)

c_times = {} # parse c++ logs
for filename in tests_paths:
subprocess.run(
CLI_PATH +
f" -l {os.path.join(compiler_logs, os.path.basename(filename)[:-3])}.txt -v -O --last-module lexer --times " +
filename
)


main()
166 changes: 0 additions & 166 deletions benchmarks/run_bechmark.py

This file was deleted.

0 comments on commit 2626cb6

Please sign in to comment.