Skip to content

Commit

Permalink
Merge pull request #153 from george0st/change
Browse files Browse the repository at this point in the history
Improve unit tests & repair issue with float values
  • Loading branch information
george0st authored Apr 18, 2024
2 parents 68ebe39 + 812b949 commit a04188a
Show file tree
Hide file tree
Showing 33 changed files with 594 additions and 593 deletions.
Binary file modified 02-data/01-size-100/01-basic-party.csv.gz
Binary file not shown.
Binary file modified 02-data/01-size-100/01-basic-party.parquet
Binary file not shown.
Binary file modified 02-data/01-size-100/02-basic-contact.csv.gz
Binary file not shown.
Binary file modified 02-data/01-size-100/02-basic-contact.parquet
Binary file not shown.
Binary file modified 02-data/01-size-100/03-basic-relation.csv.gz
Binary file not shown.
Binary file modified 02-data/01-size-100/03-basic-relation.parquet
Binary file not shown.
Binary file modified 02-data/01-size-100/04-basic-account.csv.gz
Binary file not shown.
Binary file modified 02-data/01-size-100/04-basic-account.parquet
Binary file not shown.
Binary file modified 02-data/01-size-100/05-basic-transaction.csv.gz
Binary file not shown.
Binary file modified 02-data/01-size-100/05-basic-transaction.parquet
Binary file not shown.
Binary file modified 02-data/01-size-100/06-basic-event.csv.gz
Binary file not shown.
Binary file modified 02-data/01-size-100/06-basic-event.parquet
Binary file not shown.
Binary file modified 02-data/01-size-100/07-basic-communication.csv.gz
Binary file not shown.
Binary file modified 02-data/01-size-100/07-basic-communication.parquet
Binary file not shown.
Binary file modified 02-data/02-size-1K/01-basic-party.csv.gz
Binary file not shown.
Binary file modified 02-data/02-size-1K/01-basic-party.parquet
Binary file not shown.
Binary file modified 02-data/02-size-1K/02-basic-contact.csv.gz
Binary file not shown.
Binary file modified 02-data/02-size-1K/02-basic-contact.parquet
Binary file not shown.
Binary file modified 02-data/02-size-1K/03-basic-relation.csv.gz
Binary file not shown.
Binary file modified 02-data/02-size-1K/03-basic-relation.parquet
Binary file not shown.
Binary file modified 02-data/02-size-1K/04-basic-account.csv.gz
Binary file not shown.
Binary file modified 02-data/02-size-1K/04-basic-account.parquet
Binary file not shown.
Binary file modified 02-data/02-size-1K/05-basic-transaction.csv.gz
Binary file not shown.
Binary file modified 02-data/02-size-1K/05-basic-transaction.parquet
Binary file not shown.
Binary file modified 02-data/02-size-1K/06-basic-event.csv.gz
Binary file not shown.
Binary file modified 02-data/02-size-1K/06-basic-event.parquet
Binary file not shown.
Binary file modified 02-data/02-size-1K/07-basic-communication.csv.gz
Binary file not shown.
Binary file modified 02-data/02-size-1K/07-basic-communication.parquet
Binary file not shown.
560 changes: 272 additions & 288 deletions 03-test/01-size-100.json

Large diffs are not rendered by default.

576 changes: 288 additions & 288 deletions 03-test/02-size-1k.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion generator/basic_transaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def generate(self, count):

# "name": "transaction-fraudanomaly",
# "description": "Possible fraud anomaly detection (min. 0 - without anomaly detection, max. 1)",
model["transaction-fraudanomaly"] = fraud_anomaly
model["transaction-fraudanomaly"] = float(fraud_anomaly)

# "name": "transaction-fraud",
# "description": "Identification of fraud (True - fraud, False - without fraud)",
Expand Down
2 changes: 1 addition & 1 deletion generator/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# Store the version here so:

__version__ = '0.2.1'
__version__ = '0.2.2'
47 changes: 32 additions & 15 deletions tests/test_generator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
import unittest
import time
from os import path
import shutil
import pandas as pd
from generator.synthetic_data import SyntheticData
from generator.basic_communication import BasicCommunication
from generator import basic_party, basic_contact, basic_relation, basic_account, basic_transaction, basic_event, basic_communication


class TestGenerator(unittest.TestCase):
Expand Down Expand Up @@ -32,7 +32,7 @@ def test_generate_compress(self):

dir=path.join(TestGenerator.OUTPUT_ADR, lbl)
self.assertTrue(os.path.exists(dir))
self.assertTrue(os.path.exists(path.join(dir, f"{BasicCommunication.NAME}.csv.gz")))
self.assertTrue(os.path.exists(path.join(dir, f"{basic_communication.BasicCommunication.NAME}.csv.gz")))

def test_generate_compress_smallbulk(self):
lbl="0-size-20,6-compress"
Expand All @@ -42,7 +42,7 @@ def test_generate_compress_smallbulk(self):

dir=path.join(TestGenerator.OUTPUT_ADR, lbl)
self.assertTrue(os.path.exists(dir))
self.assertTrue(os.path.exists(path.join(dir, f"{BasicCommunication.NAME}.csv.gz")))
self.assertTrue(os.path.exists(path.join(dir, f"{basic_communication.BasicCommunication.NAME}.csv.gz")))

def test_generate_compress_super_smallbulk(self):
lbl="0-size-s-10,6-compress"
Expand All @@ -52,7 +52,7 @@ def test_generate_compress_super_smallbulk(self):

dir=path.join(TestGenerator.OUTPUT_ADR, lbl)
self.assertTrue(os.path.exists(dir))
self.assertTrue(os.path.exists(path.join(dir, f"{BasicCommunication.NAME}.csv.gz")))
self.assertTrue(os.path.exists(path.join(dir, f"{basic_communication.BasicCommunication.NAME}.csv.gz")))

def test_generate(self):
lbl = "0-size-200,20"
Expand All @@ -62,8 +62,7 @@ def test_generate(self):

dir=path.join(TestGenerator.OUTPUT_ADR, lbl)
self.assertTrue(os.path.exists(dir))
self.assertTrue(os.path.exists(path.join(dir, f"{BasicCommunication.NAME}.csv")))
# TODO: check if the first line contain header
self.assertTrue(os.path.exists(path.join(dir, f"{basic_party.BasicParty.NAME}.csv")))

def test_generate_smallbulk_repeat(self):
"""Repeat generation of small files"""
Expand All @@ -76,8 +75,7 @@ def test_generate_smallbulk_repeat(self):

dir = path.join(TestGenerator.OUTPUT_ADR, lbl)
self.assertTrue(os.path.exists(dir))
self.assertTrue(os.path.exists(path.join(dir, f"{BasicCommunication.NAME}.csv")))
# TODO: check if the first line contain header
self.assertTrue(os.path.exists(path.join(dir, f"{basic_party.BasicParty.NAME}.csv")))

def test_generate_smallbulk(self):
lbl = "0-size-20,6"
Expand All @@ -87,8 +85,7 @@ def test_generate_smallbulk(self):

dir = path.join(TestGenerator.OUTPUT_ADR, lbl)
self.assertTrue(os.path.exists(dir))
self.assertTrue(os.path.exists(path.join(dir, f"{BasicCommunication.NAME}.csv")))
# TODO: check if the first line contain header
self.assertTrue(os.path.exists(path.join(dir, f"{basic_party.BasicParty.NAME}.csv")))

def test_generate_super_smallbulk(self):
lbl = "0-size-s-10,6"
Expand All @@ -98,8 +95,7 @@ def test_generate_super_smallbulk(self):

dir = path.join(TestGenerator.OUTPUT_ADR, lbl)
self.assertTrue(os.path.exists(dir))
self.assertTrue(os.path.exists(path.join(dir, f"{BasicCommunication.NAME}.csv")))
# TODO: check if the first line contain header
self.assertTrue(os.path.exists(path.join(dir, f"{basic_party.BasicParty.NAME}.csv")))

def test_generate_bigbulk(self):
lbl = "0-size-2000,2000"
Expand All @@ -109,7 +105,28 @@ def test_generate_bigbulk(self):

dir = path.join(TestGenerator.OUTPUT_ADR, lbl)
self.assertTrue(os.path.exists(dir))
self.assertTrue(os.path.exists(path.join(dir, f"{BasicCommunication.NAME}.csv")))
# TODO: check if the first line contain header
self.assertTrue(os.path.exists(path.join(dir, f"{basic_party.BasicParty.NAME}.csv")))

def _check_csv_header(self, filename, key_text):
if os.path.exists(filename):
df = pd.read_csv(filename)
self.assertTrue(df.to_string().find(key_text) >= 0)

def test_csv_structure(self):
"""All csv have header"""
lbl = "0-size-csvcheck-10,6"

generator = SyntheticData(os.path.join("..","01-model"),TestGenerator.OUTPUT_ADR, TestGenerator.OUTPUT_ADR)
generator.generate(label=lbl, count=10, bulk_max=6, compress=False)

dir = path.join(TestGenerator.OUTPUT_ADR, lbl)
self.assertTrue(os.path.exists(dir))
self._check_csv_header(path.join(dir, f"{basic_party.BasicParty.NAME}.csv"), "party-id")
self._check_csv_header(path.join(dir, f"{basic_contact.BasicContact.NAME}.csv"), "party-id")
self._check_csv_header(path.join(dir, f"{basic_relation.BasicRelation.NAME}.csv"), "party-id")
self._check_csv_header(path.join(dir, f"{basic_account.BasicAccount.NAME}.csv"), "party-id")
self._check_csv_header(path.join(dir, f"{basic_transaction.BasicTransaction.NAME}.csv"), "account-id")
self._check_csv_header(path.join(dir, f"{basic_event.BasicEvent.NAME}.csv"), "party-id")
self._check_csv_header(path.join(dir, f"{basic_communication.BasicCommunication.NAME}.csv"), "party-id")

# TODO: Add batch size under limit, it will generate wrong dataset

0 comments on commit a04188a

Please sign in to comment.