Skip to content

Commit

Permalink
Version 1.0.0 release
Browse files Browse the repository at this point in the history
  • Loading branch information
ZaydH committed Mar 15, 2019
1 parent da9101f commit 843393d
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 34 deletions.
42 changes: 23 additions & 19 deletions malgan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import tensorboardX
import torch
import torch.nn as nn
# import torch.nn.utils
import torch.optim
import torch.utils.data
from torch.utils.data import Dataset, DataLoader, Subset
Expand Down Expand Up @@ -93,7 +92,7 @@ class MalGAN(nn.Module):

MALWARE_BATCH_SIZE = 32

EXPORT_DIR = Path("export")
SAVED_MODEL_DIR = Path("saved_models")

VALIDATION_SPLIT = 0.2

Expand Down Expand Up @@ -247,7 +246,7 @@ def fit_one_cycle(self, cyc_len: int, quiet_mode: bool = False) -> None:
valid_l_g = self._meas_loader_gen_loss(self._mal_data.valid)
MalGAN.tensorboard.add_scalar('Validation_Generator_Loss', valid_l_g, epoch_cnt)
if valid_l_g < best_loss:
self._export(self._build_export_name(epoch_cnt))
self._save(self._build_export_name(epoch_cnt))
if best_epoch is not None:
self._delete_old_backup(best_epoch)
best_epoch, best_loss = epoch_cnt, valid_l_g
Expand All @@ -258,7 +257,7 @@ def fit_one_cycle(self, cyc_len: int, quiet_mode: bool = False) -> None:
MalGAN.tensorboard.close()

self.load(self._build_export_name(best_epoch))
self._export(self._build_export_name())
self._save(self._build_export_name())
self._delete_old_backup(best_epoch)

def _build_export_name(self, epoch_num: int = None) -> str:
Expand All @@ -276,7 +275,7 @@ def _build_export_name(self, epoch_num: int = None) -> str:
"final" if epoch_num is None else "epoch_%05d" % epoch_num]

# Either add an epoch name or
return MalGAN.EXPORT_DIR / "".join(["_".join(name).lower(), ".pth"])
return MalGAN.SAVED_MODEL_DIR / "".join(["_".join(name).lower(), ".pth"])

def _delete_old_backup(self, epoch_num: int) -> None:
"""
Expand Down Expand Up @@ -376,40 +375,45 @@ def measure_and_export_results(self) -> str:
logging.debug("Final Validation Loss: %.6f", valid_loss)
logging.debug("Final Test Loss: %.6f", test_loss)

num_bits_changed = num_mal_test = 0
m_prime_arr = []
num_mal_test = 0
y_mal_orig, m_prime_arr, bits_changed = [], [], []
for m, _ in self._mal_data.test:
y_mal_orig.append(self._bb.predict(m.cpu()))
if self._is_cuda:
m = m.cuda()
num_mal_test += m.shape[0]

m_prime, _ = self._gen.forward(m)
m_prime_arr.append(m_prime.cpu() if self._is_cuda else m_prime)

m_diff = m_prime - m
num_bits_changed += torch.sum(m_diff)
# Error check no bits flipped 1 -> 0
bits_changed.append(torch.sum(m_diff.cpu(), dim=1))

# Sanity check no bits flipped 1 -> 0
msg = "Malware signature changed to 0 which is not allowed"
assert torch.sum(m_diff < -0.1) == 0, msg
avg_changed_bits = num_bits_changed / num_mal_test
avg_changed_bits = torch.cat(bits_changed).mean()
logging.debug("Avg. Malware Bits Changed Changed: %2f", avg_changed_bits)

# BB prediction of the malware before the generator
y_mal_orig = torch.cat(y_mal_orig)

# Build an X tensor for prediction using the detector
ben_test_arr = []
for x_tmp, _ in self._ben_data.test:
ben_test_arr.append(x_tmp.cpu() if self._is_cuda else x_tmp)
ben_test_arr = [x.cpu() if self._is_cuda else x for x, _ in self._ben_data.test]
x = torch.cat(m_prime_arr + ben_test_arr)
y = torch.cat((torch.full((num_mal_test,), MalGAN.Label.Malware.value),
torch.full((len(x) - num_mal_test,), MalGAN.Label.Benign.value)))
y_actual = torch.cat((torch.full((num_mal_test,), MalGAN.Label.Malware.value),
torch.full((len(x) - num_mal_test,), MalGAN.Label.Benign.value)))

y_hat = self._bb.predict(x)
y_hat_post = self._bb.predict(x)
if self._is_cuda:
y_hat, y = y_hat.cpu(), y.cpu()
y_mal_orig, y_hat_post, y_actual = y_mal_orig.cpu(), y_hat_post.cpu(), y_actual.cpu()
# noinspection PyProtectedMember
y_prob = self._bb._model.predict_proba(x) # pylint: disable=protected-access
y_prob = y_prob[:, MalGAN.Label.Malware.value]
return _export_results(self, valid_loss, test_loss, avg_changed_bits, y, y_prob, y_hat)
return _export_results(self, valid_loss, test_loss, avg_changed_bits, y_actual,
y_mal_orig, y_prob, y_hat_post)

def _export(self, file_path: PathOrStr) -> None:
def _save(self, file_path: PathOrStr) -> None:
r"""
Export the specified model to disk. The function creates any files needed on the path.
All exported models will be relative to \p EXPORT_DIR class object.
Expand Down
31 changes: 19 additions & 12 deletions malgan/_export_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,51 +8,58 @@
from sklearn.metrics import confusion_matrix, roc_auc_score

TensorOrFloat = Union[torch.Tensor, float]
TorchOrNumpy = Union[torch.Tensor, np.ndarray]


# noinspection PyProtectedMember,PyUnresolvedReferences
def _export_results(model: 'MalGAN', valid_loss: TensorOrFloat, test_loss: TensorOrFloat,
avg_num_bits_changed: TensorOrFloat, y: np.ndarray,
y_prob: Union[np.ndarray, torch.Tensor], y_hat: np.ndarray) -> str:
avg_num_bits_changed: TensorOrFloat, y_actual: np.ndarray,
y_mal_orig: TorchOrNumpy, y_prob: TorchOrNumpy, y_hat: np.ndarray) -> str:
r"""
Exports MalGAN results.
:param model: MalGAN model
:param valid_loss: Average loss on the malware validation set
:param test_loss: Average loss on the malware test set
:param avg_num_bits_changed:
:param y: Actual labels
:param y_actual: Actual labels
:param y_mal_orig: Predicted value on the original (unmodified) malware
:param y_prob: Probability of malware
:param y_hat: Predict labels
:return: Results string
"""
if isinstance(y_prob, torch.Tensor):
y_prob = y_prob.numpy()
if isinstance(y_mal_orig, torch.Tensor):
y_mal_orig = y_mal_orig.numpy()

results_file = Path("results.csv")
exists = results_file.exists()
with open(results_file, "a+") as f_out:
header = ",".join(["time_completed,M,Z,batch_size,test_set_size,detector_type,activation",
"gen_hidden_dim,discim_hidden_dim",
"avg_validation_loss,avg_test_loss,avg_num_bits_changed",
"auc,tpr,fpr,fnr,tnr"])
"auc,orig_mal_detect_rate,mod_mal_detect_rate,ben_mal_detect_rate"])
if not exists:
f_out.write(header)

results = ["\n%s" % datetime.datetime.now(),
"%d,%d,%d" % (model.M, model.Z, model.__class__.MALWARE_BATCH_SIZE),
"%d,%s,%s" % (len(y), model._bb.type.name, model._g.__class__.__name__),
"%s,%s" % (str(model.d_gen), str(model.d_discrim)),
"%d,%s,%s" % (len(y_actual), model._bb.type.name, model._g.__class__.__name__),
"\"%s\",\"%s\"" % (str(model.d_gen), str(model.d_discrim)),
"%.15f,%.15f,%.3f" % (valid_loss, test_loss, avg_num_bits_changed)]

auc = roc_auc_score(y, y_prob)
results.append("%.6f" % auc)
auc = roc_auc_score(y_actual, y_prob)
results.append("%.8f" % auc)

# Calculate the detection rate on unmodified malware
results.append("%.8f" % y_mal_orig.mean())

# Write the TxR and NxR information
tn, fp, fn, tp = confusion_matrix(y, y_hat).ravel()
tnr, tpr = tn / (tn + fp), tp / (tp + fn)
for rate in [tpr, 1 - tnr, 1 - tpr, tnr]:
results.append("%.6f" % rate)
tn, fp, fn, tp = confusion_matrix(y_actual, y_hat).ravel()
tpr, fpr = tp / (tp + fn), fp / (tn + fp)
for rate in [tpr, fpr]:
results.append("%.8f" % rate)
results = ",".join(results)
f_out.write(results)

Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
tqdm==4.30.0
numpy==1.16.1
numpy>=1.16.0
torch==1.0.0
typing==3.6.6
scikit_learn==0.20.2
typing>=3.6.6
scikit_learn>=0.20.3
tensorboardX==1.6

0 comments on commit 843393d

Please sign in to comment.