Skip to content

Commit

Permalink
Pylint cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
gherka committed Nov 24, 2023
1 parent 0eceb59 commit e637252
Show file tree
Hide file tree
Showing 27 changed files with 208 additions and 195 deletions.
15 changes: 8 additions & 7 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=C0303, # trailing whitespace
C0330, # hanging indent,
C0103, # invalid name - too many false positives
C3001, # Lambda expression assigned to a variable
E1136, # value is unsubscriptable
E1101, # no-member; too many false positives
W0212, # access to protected member
E1130, # invalid unary operand (numpy)
R0201, # no self use - false positives in test modules
W0622, # redefine builtins - __package__
W0640, # variable defined in loop


[BASIC]
Expand Down Expand Up @@ -104,7 +105,7 @@ indent-after-paren=4
indent-string=' '

# Maximum number of characters on a single line.
max-line-length=88
max-line-length=100

# Maximum number of lines in a module.
max-module-lines=1000
Expand Down Expand Up @@ -236,7 +237,7 @@ valid-metaclass-classmethod-first-arg=cls
[DESIGN]

# Maximum number of arguments for function / method.
max-args=6
max-args=8

# Maximum number of attributes for a class (see R0902).
max-attributes=15
Expand All @@ -248,7 +249,7 @@ max-bool-expr=5
max-branches=12

# Maximum number of locals for function / method body.
max-locals=20
max-locals=25

# Maximum number of parents for a class (see R0901).
max-parents=7
Expand Down Expand Up @@ -279,5 +280,5 @@ deprecated-modules=

# Exceptions that will emit a warning when being caught. Defaults to
# "BaseException, Exception".
overgeneral-exceptions=BaseException,
Exception
overgeneral-exceptions=builtins.BaseException,
builtins.Exception
4 changes: 3 additions & 1 deletion exhibit/command/tests/test_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
regular unit / reference testing
'''

#pylint: disable=W0201

# Standard library imports
import string
import unittest
Expand All @@ -25,7 +27,7 @@
from memory_profiler import memory_usage
except ImportError:
memory_usage = None
print(f"memory_profiler not found. Make sure exhibit is installed in [dev] mode")
print("memory_profiler not found. Make sure exhibit is installed in [dev] mode")

class performanceTests(unittest.TestCase):
'''
Expand Down
2 changes: 1 addition & 1 deletion exhibit/core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
ORIGINAL_VALUES_PAIRED = "See paired column"
ORIGINAL_VALUES_REGEX = "regex"
MISSING_DATA_STR = "Missing data"
EXHIBIT_DB_LOCAL = join("db", "exhibit.db")
EXHIBIT_DB_LOCAL = join("db", "exhibit.db")
24 changes: 15 additions & 9 deletions exhibit/core/constraints.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
'''
Module for various derived and user-set constraints
'''

# pylint: disable=C0302

# Standard library imports
from collections import namedtuple
from datetime import datetime
Expand Down Expand Up @@ -166,7 +169,7 @@ def process_custom_constraints(self, custom_constraints):

cc_filter = constraint.get("filter", None)
cc_partitions = constraint.get("partition", None)
cc_targets = constraint.get("targets", dict())
cc_targets = constraint.get("targets", {})

clean_cc_filter = clean_up_constraint_string(cc_filter)
cc_filter_mask = get_constraint_mask(output_df, clean_cc_filter)
Expand Down Expand Up @@ -447,7 +450,7 @@ def _within_group_outliers(series):
whether the value is divisible by 2 without remainder.
'''

q25, q50, q75 = np.percentile(series, [25, 50, 75])
q25, _, q75 = np.percentile(series, [25, 50, 75])
iqr = q75 - q25

if iqr == 0:
Expand Down Expand Up @@ -761,9 +764,11 @@ def _make_almost_same(group):
ulinked_df = generate_user_linked_anon_df(
self.spec_dict, user_linked_cols, new_df.shape[0], starting_col_matrix)

non_user_linked_cols = [x for x in df.columns if x not in user_linked_cols]

new_df = pd.concat(
[ulinked_df.set_index(new_df.index)] +
[df.loc[filter_idx, [x for x in df.columns if x not in user_linked_cols]]],
[df.loc[filter_idx, non_user_linked_cols]],
axis=1
).reindex(columns=df.columns)

Expand Down Expand Up @@ -865,7 +870,7 @@ def _generate_ordered_values(target_sequence, ordered_list, ordered_probs):
else:
pointer = 0

result = sorted(unordered_result, key=lambda x: ordered_list.index(x))
result = sorted(unordered_result, key=ordered_list.index)

return result

Expand Down Expand Up @@ -1025,7 +1030,7 @@ def _make_skewed_series(group):

# add nulls based on the miss_probability of the skew column
miss_pct = self.spec_dict["columns"][skew_col]["miss_probability"]
miss_val = pd.NA if group.dtype =='Int64' else np.nan
miss_val = pd.NA if group.dtype =="Int64" else np.nan
skewed_result = np.where(
rng.random(size=nrows) < miss_pct,
miss_val, result.values)
Expand All @@ -1037,7 +1042,8 @@ def _make_skewed_series(group):

target_cols = [x.strip() for x in target_str.split(",")]
if len(target_cols) != 2: # pragma: no cover
raise Exception(f"{self.current_action} requires exactly 2 target columns.")
raise RuntimeError(
f"{self.current_action} requires exactly 2 target columns.")

if partition_cols is not None:
partition_cols = [x.strip() for x in partition_cols.split(",") if x]
Expand Down Expand Up @@ -1281,7 +1287,7 @@ def find_basic_constraint_columns(df):

return output

def clean_up_constraint_string(raw_string, type="cc_filter"):
def clean_up_constraint_string(raw_string):
'''
The default way to handle column names with whitespace in eval strings
is to enclose them in backticks. However, the default tokeniser will
Expand Down Expand Up @@ -1379,8 +1385,8 @@ def get_constraint_mask(df, clean_string):
.rename(lambda x: x.replace(" ", "__"), axis="columns")
.eval(clean_string, engine="python"))

except SyntaxError: #pragma: no cover
raise SyntaxError("Invalid filter expression supplied to custom action.")
except SyntaxError as e: #pragma: no cover
raise SyntaxError("Invalid filter expression supplied to custom action.") from e

return mask

Expand Down
31 changes: 17 additions & 14 deletions exhibit/core/exhibit.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,22 +96,22 @@ def __init__(
skip_columns=None, linked_columns=None,
uuid_columns=None, discrete_columns=None,
save_probabilities=None, derived_columns_first=False,
verbose=False, **kwargs):
verbose=False):
'''
Initialise either from the CLI or by instantiating directly
'''

# Basic error checking on the arguments
if linked_columns is not None and len(linked_columns) < 2:
raise Exception("Please provide at least two linked columns")
raise RuntimeError("Please provide at least two linked columns")

self.command = command
self.source = source
self.output = output
self.inline_limit = inline_limit
self.equal_weights = equal_weights
self.skip_columns = skip_columns or set()
self.linked_columns= linked_columns or list()
self.linked_columns= linked_columns or []
self.uuid_columns= uuid_columns or set()
self.discrete_columns = discrete_columns or set()
self.save_probabilities = save_probabilities or set()
Expand Down Expand Up @@ -179,7 +179,7 @@ def write_spec(self, spec_yaml=None):
else:
output_path = self.output

with open(output_path, "w") as f:
with open(output_path, "w", encoding="utf-8") as f:
f.write(spec_yaml)

print("Exhibit ready to view")
Expand Down Expand Up @@ -208,7 +208,7 @@ def read_spec(self):
self.source = path_checker(self.source)

if self.source.suffix == ".yml":
with open(self.source) as f:
with open(self.source, encoding="utf-8") as f:
self.spec_dict = yaml.safe_load(f)
else: #pragma: no cover
raise TypeError("Specification is not in .yml format")
Expand All @@ -219,13 +219,13 @@ def read_spec(self):
# these NONE values early and change them into empty sequences.
for key, value in self.spec_dict["metadata"].items():
if "columns" in key and value is None:
self.spec_dict["metadata"][key] = list()
self.spec_dict["metadata"][key] = []

if self.spec_dict.get("linked_columns", None) is None:
self.spec_dict["linked_columns"] = list()
self.spec_dict["linked_columns"] = []

if self.spec_dict.get("derived_columns", None) is None:
self.spec_dict["derived_columns"] = dict()
self.spec_dict["derived_columns"] = {}

for col in self.spec_dict["metadata"]["categorical_columns"]:

Expand Down Expand Up @@ -253,6 +253,7 @@ def validate_spec(self):
self.spec_dict = validated_spec
return validated_spec is not None

#pylint: disable=R0912, R0915
def execute_spec(self):
'''
Function only runs if validate_spec returned True
Expand Down Expand Up @@ -310,7 +311,7 @@ def execute_spec(self):
for num_col in self.spec_dict["metadata"]["numerical_columns"]:

# skip derived columns; they need main columns (inc. nulls) generated first
if num_col in (self.spec_dict.get("derived_columns", dict()) or dict()):
if num_col in (self.spec_dict.get("derived_columns", {}) or {}):
continue

anon_df[num_col] = generate_continuous_column(
Expand Down Expand Up @@ -432,7 +433,8 @@ def execute_spec(self):
col_name=num_col
)
# see comments above as to why we're re-generating derived columns
for derived_col, derived_def in self.spec_dict["derived_columns"].items(): #pragma: no cover
derived = self.spec_dict["derived_columns"].items()
for derived_col, derived_def in derived: #pragma: no cover
for num_col in num_cols:
if num_col in derived_def:
anon_df[derived_col] = generate_derived_column(anon_df, derived_def)
Expand Down Expand Up @@ -508,7 +510,7 @@ def generate(self):
self.write_spec()
return None

else:
if self.command == "fromspec":
self.read_spec()
if self.validate_spec():
self.execute_spec()
Expand All @@ -520,8 +522,9 @@ def generate(self):
self.write_data()
return None

# technically unreachable code because validation failures will raise
return None #pragma: no cover
raise RuntimeError( #pragma: no cover
"Generation command not recognized. Please initialise Exhibit "
"with a valid command.")

class Specification(UserDict): #pragma: no cover
'''
Expand All @@ -541,5 +544,5 @@ def write_spec(self, path):

spec_yaml = generate_YAML_string(self.data)

with open(path, "w") as f:
with open(path, "w", encoding="utf-8") as f:
f.write(spec_yaml)
14 changes: 7 additions & 7 deletions exhibit/core/formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class FormattedList(list):
separate processing for these formatted values from a basic list
of values passed to original_values during manual column creation
'''
pass


def format_header(dataframe, series_name, prefix=None):
'''
Expand Down Expand Up @@ -162,7 +162,7 @@ def build_list_of_probability_vectors(dataframe, original_series_name, ew=False)

vectors = temp_vectors.values.tolist()

string_vectors = ["{0:.3f}".format(x).ljust(len(HEADER)) for x in vectors]
string_vectors = [f"{x:.3f}".ljust(len(HEADER)) for x in vectors]

return string_vectors

Expand All @@ -187,7 +187,7 @@ def build_list_of_column_weights(weights):

for key in sorted(weights):

padded_key = ["{0:.3f}".format(x).ljust(len(key)) for x in weights[key]]
padded_key = [f"{x:.3f}".ljust(len(key)) for x in weights[key]]
sorted_temp.append(padded_key)

sorted_final = [" | ".join(y for y in x).rstrip() for x in zip(*sorted_temp)]
Expand Down Expand Up @@ -333,15 +333,15 @@ def build_list_of_uuid_frequencies(df, target_col):
counts = Counter(df[target_col].value_counts())

freq_df = pd.DataFrame(
[(frequency, count) for frequency, count in counts.items()],
list(counts.items()),
columns=["frequency", "count"]
).sort_values("frequency")

freq_df["pct"] = freq_df["count"] / freq_df["count"].sum()
freq_df.loc[:, "pct"] = freq_df["count"] / freq_df["count"].sum()

freq_list = (
freq_df["frequency"].astype(str).str.ljust(9)
.str.cat(freq_df["pct"].transform(lambda x: "{0:.3f}".format(x)), sep=' | ')
.str.cat(freq_df["pct"].transform(lambda x: f"{x:.3f}"), sep=" | ")
.tolist()
)

Expand All @@ -359,6 +359,6 @@ def format_df_for_export(df):

for column in df.columns:
if df[column].dtype == "timedelta64[ns]":
df[column] = df[column].astype(str).str.replace('0 days ', '')
df[column] = df[column].astype(str).str.replace("0 days ", "")

return df
Loading

0 comments on commit e637252

Please sign in to comment.