Merge pull request #1 from BNN-UPC/feature/continious_node_count

Feature/continious node count
BNN-UPC · May 31, 2022 · 3f6594e · 3f6594e
2 parents e36b48f + 1dfe30d
commit 3f6594e
Show file tree

Hide file tree

Showing 12 changed files with 1,728 additions and 1,970 deletions.
diff --git a/GraphlaxyDataGen.py b/GraphlaxyDataGen.py
@@ -13,10 +13,16 @@ def __init__(self):
 The available commands are:
     optimization         Create a baseline dataset and optimize the parameters.
     generate    Using the fitted parameters generate a synthetic graph dataset.
-    plots       Generate plots showing different characteristics of the baseline, sampled, and final datasets.
+    plots       Generate plots showing different characteristics of the baseline, sampled, and final datasets. 
+    statistics  Print some basic statistics of target dataset
 ''')
         parser.add_argument('command', help='Subcommand to run')
-        commands = {"optimization":self.optimization, "generate":self.generate, "plot": self.plot}
+        commands = {
+            "optimization":self.optimization, 
+            "generate":self.generate, 
+            "plots": self.plot,
+            "statistics": self.statistics
+        }
         args = parser.parse_args(sys.argv[1:2])
         if not args.command in commands:
             print('Unrecognized command')
@@ -32,8 +38,7 @@ def optimization(self):
 The available subcommands are:
     baseline    Only creates the baseline dataset
     metrics     Calculate the metrics of a dataset
-    optimize    Use sampling and the Powell method with cooperative bargaining to optimize the input RMat parameters 
-    plot        Some plots to show analyze the results
+    optimize    Use sampling and the Powell method with cooperative bargaining to optimize the input RMat parameters
 
 *************************************
 To run the full optimization in steps:
@@ -45,8 +50,7 @@ def optimization(self):
         commands = {
             "baseline":self.baseline, 
             "metrics":self.metrics, 
-            "optimize": self.optimize, 
-            "plot": self.plot
+            "optimize": self.optimize
             }
         args = parser.parse_args(sys.argv[2:3])
         if not args.subcommand in commands:
@@ -63,12 +67,12 @@ def generate(self):
         parser.add_argument('-s', "--dataset-size", metavar = "int", type = int,
                             help = "The size of the dataset to generate.", default= 5000)
         parser.add_argument('-e', "--edges-between", nargs = 2, metavar = "int", type = int,
-                            help = "The min and max vallue the edges argument can take.", default= (1000, 1000000))
+                            help = "The min and max vallue the edges argument can take.", default= (100000, 2000000))
         parser.add_argument('-m', '--multiprocess', action="store_true", help = "Add to take advantage of multiple cores.")
 
         parser.add_argument('-w', "--custom-weights", nargs = 8, metavar = "float", type = float,
                             help = "List of waights for the beta distributions.", 
-                            default= ( 2.490744994387892,2.6031189695165597,0.5401027713447459,0.32109300386782624,0.6878348939570403,0.4389166002041694,0.22515465777238508,0.8146717281526472))
+                            default= [1.3500523980958758,0.9756729865636893,1.4562248430720026,0.22767153268062393,1.055699069458428,0.9060404341929743,0.35052426603213255,1.157122011830607])
 
 
 
@@ -84,6 +88,19 @@ def generate(self):
 
         generate_result_dataset(args.from_file, args.custom_weights, args.parameters_file, args.name, args.folder, args.dataset_size, args.edges_between, args.multiprocess)
 
+
+    def statistics(self):
+        parser = argparse.ArgumentParser(description = "Calculate some statistics over a dataset.")
+
+        parser.add_argument('-f', "--folder", metavar = "str", type = str,
+                            help = "Folder where the dataset to analize was generated.", default= "data/validation_dataset")
+        parser.add_argument('-s', "--sample-size", metavar = "int", type = int,
+                            help = "The size of the sample.", default= 1000)
+
+        args = parser.parse_args(sys.argv[2:])
+        from processes.statistics import statistics
+        statistics(args.folder, args.sample_size)
+
     def plot(self):
         parser = argparse.ArgumentParser(description = "Some plots to analyze the results.")
 
@@ -105,13 +122,13 @@ def plot(self):
                             choices= choices)
         parser.add_argument('-w', "--custom-weights", nargs = 8, metavar = "float", type = float,
                             help = "List of waights for the beta distributions.", 
-                            default= (2.490744994387892,2.6031189695165597,0.5401027713447459,0.32109300386782624,0.6878348939570403,0.4389166002041694,0.22515465777238508,0.8146717281526472))
+                            default= ((1,1,1,1,1,1,1,1)))
         choices = ["custom", "initial"]
         parser.add_argument('-ws', "--weight-source", metavar = "str", type = str,
                             help = "Where to get the waights used for the plot from. Posible values: {}".format(choices), default= "custom",
                             choices= choices)
         parser.add_argument('-n', "--name", metavar = "str", type = str,
-                            help = "Name of the params to use for the fitness_evolution.", default= None)
+                            help = "Name of the params to use for the fitness_evolution.", default= "result")
 
 
         args = parser.parse_args(sys.argv[2:])
@@ -127,7 +144,7 @@ def baseline(self):
         parser.add_argument('-s', "--dataset-size", metavar = "int", type = int,
                             help = "The size of the baseline dataset.", default= 10000)
         parser.add_argument('-e', "--edges-between", nargs = 2, metavar = "int", type = int,
-                            help = "The min and max vallue the edges argument can take.", default= (1000, 1000000))
+                            help = "The min and max vallue the edges argument can take.", default= (100000, 2000000))
         parser.add_argument('-m', '--multiprocess', action="store_true", help = "Add to take advantage of multiple cores.")
 
         args = parser.parse_args(sys.argv[3:])
@@ -159,11 +176,14 @@ def optimize(self):
                             help = "Folder where the dataset is.", default= "../baseline_dataset")
         parser.add_argument('-g', "--grid-size", metavar = "int", type = int,
                             help = "The number of rows and columns the grid has.", default=15)
+        parser.add_argument('-w', "--custom-weights", nargs = 8, metavar = "float", type = float,
+                            help = "Initial weights for optimization.", 
+                            default= [1.3500523980958758,0.9756729865636893,1.4562248430720026,0.22767153268062393,1.055699069458428,0.9060404341929743,0.35052426603213255,1.157122011830607])
 
         args = parser.parse_args(sys.argv[3:])
 
         from processes.optimization import optimize
-        optimize(args.name, args.folder, args.grid_size)
+        optimize(args.name, args.folder, args.grid_size, args.custom_weights)
 
 
 if __name__ == "__main__":

diff --git a/README.md b/README.md
@@ -189,9 +189,15 @@ This work was done by the Barcelona Neural Network group (BNN) and is part of a
 
 ## Citing
 
-This repository is associated with a Paper. If you are using it for a study, please cite.
+This repository is associated with the paper ["Bias Reduction via Cooperative Bargaining in Synthetic Graph Dataset Generation"](https://arxiv.org/abs/2205.13901). If you are using it for a study, please cite. 
 
-The citation text and BibTex will be available once the paper is published.
+```
+@inproceedings{Wassington2022BiasRV,
+  title={Bias Reduction via Cooperative Bargaining in Synthetic Graph Dataset Generation},
+  author={Axel Wassington and S. Abadal},
+  year={2022}
+}
+```
 
 
 ## License

diff --git a/data/validation_dataset/dataset_metrics.csv b/data/validation_dataset/dataset_metrics.csv
@@ -1,31 +1,21 @@
 name,clustering,density,density_log,max_degree,nodes,edges
 sx-superuser,0.26,4.3660716413995854e-05,-4.359909142376479,14296,189191,781375
 bcsstm27,0.672,0.038311181654455184,-1.4166744526085413,55,1224,28675
-co2010,0.408,2.4107766948891065e-05,-4.617843015532475,120,201062,487287
 cavity05,0.77,0.026261836093476664,-1.5806749135325535,63,1182,18330
-struct4,0.364,0.012799771647861974,-1.8927977782394834,91,4350,121074
-cage10,0.322,0.0012476265437571044,-2.903915394026621,26,11397,81021
 ca-HepPh,0.593,0.0018746094397688842,-2.727089200374292,491,11204,117649
 dblp-2010,0.637,2.7952531408644447e-05,-4.55357885586257,238,226413,716460
 cage11,0.283,0.00039205070982041877,-3.406657755502905,32,39082,299402
-cond-mat-2003,0.64,0.00030684255571491705,-3.5130844086392994,202,27519,116181
-coAuthorsDBLP,0.636,2.186197601957441e-05,-4.660310586411358,336,299067,977676
-cavity16,0.766,0.007106166004766985,-2.1483646513200694,63,4562,73930
 fe_rotor,0.399,0.00013350824470442644,-3.8744919139317995,125,99617,662431
 patents_main,0.044,2.085655015760525e-05,-4.680757525795384,212,230686,554949
 crystm01,0.526,0.013921182266009852,-1.8563238804006732,28,1625,18369
 fpga_dcop_10,0.561,0.00529861886254522,-2.2758373188035748,37,1220,3940
-cit-HepTh,0.31,0.0009379078516105468,-3.0278398284684074,2468,27400,352059
-wa2010,0.374,2.4770115585980415e-05,-4.606071966840228,158,195574,473716
-HEP-th,0.294,0.0009465711478355964,-3.023846737272162,2411,26870,341698
 ca-CondMat,0.647,0.0004003099279382089,-3.397603638984791,281,21363,91342
 email-Enron,0.49,0.0003185011711252004,-3.4968889664298017,1383,33696,180811
 wiki-Vote,0.149,0.004035793145569756,-2.39407110161249,1065,7066,100736
 qc2534,0.988,0.07258473859342394,-1.1391546829004182,184,2534,232947
 psmigr_3,0.48,0.08398978943758713,-1.0757735075148394,2746,3140,413921
 com-Amazon,0.402,1.6513834036534368e-05,-4.782152084362822,549,334863,925872
 nemeth17,0.742,0.007073535770702365,-2.1503634459119887,75,9506,319563
-amazon0302,0.427,2.6194088195261075e-05,-4.581796714553647,420,262111,899792
 LeGresley_4908,0.762,0.0014934671607465277,-2.825804322482934,39,4908,17984
 soc-Epinions1,0.126,0.00014094905573394035,-3.8509378292702987,3044,75877,405739
 Linux_call_graph,0.086,2.3888194076898408e-05,-4.6218166812289105,15979,317926,1207269
@@ -34,30 +24,21 @@ internet,0.099,2.64909704107265e-05,-4.576902132442502,153,124651,205805
 coAuthorsCiteseer,0.693,3.151029750712278e-05,-4.501547496378784,1372,227320,814134
 usroads-48,0.024,2.035482734874879e-05,-4.691332576989012,7,126146,161950
 lhr07c,0.024,0.005765061357494149,-2.239196066152573,99,7337,155150
-delaunay_n18,0.44,2.2887223163681178e-05,-4.640406895741597,21,262144,786396
-or2010,0.455,2.5336823645872357e-05,-4.596247831423197,120,196621,489756
 cit-HepPh,0.29,0.0007112218884929657,-3.14799488602908,846,34401,420828
 Na5,0.393,0.009158925180889522,-2.0381554887117272,205,5832,155731
 p2p-Gnutella31,0.004,7.556716716096343e-05,-4.121666858176848,95,62561,147878
 language,0.556,1.4973538912500522e-05,-4.824675544444244,11611,399130,1192675
-soc-Slashdot0902,0.575,0.00017256407249715723,-3.7630496184724294,2554,82168,582533
 msc01440,0.514,0.023024283839085787,-1.6378138694729287,46,1440,23855
-astro-ph,0.655,0.0010859724564231906,-2.964181189641385,360,14845,119652
 loc-Gowalla,0.239,4.917880929251338e-05,-4.308221990521074,14730,196591,950327
-NotreDame_www,0.275,2.5423259717214715e-05,-4.594768765881246,10717,245529,766311
 TSOPF_RS_b162_c1,0.094,0.01402032838523068,-1.8532418141708273,2505,5374,202415
 598a,0.428,0.00012049809570614693,-3.919019816415646,26,110971,741934
 rajat17,0.728,8.445435249719433e-05,-4.073377963419152,28756,93342,367910
-sparsine,0.326,0.0006396079921598432,-3.1940861182071774,57,50000,799494
 EAT_SR,0.098,0.0011334632895532034,-2.9455925411772093,1092,23218,305498
-nemeth20,0.748,0.010861392339708922,-1.9641144982560987,121,9506,490688
 foldoc,0.325,0.0010256356780519167,-2.9890068800721705,728,13356,91471
 oh2010,0.383,1.3247637646887547e-05,-4.877861559341608,62,365344,884120
 dictionary28,0.236,0.00023035786353932204,-3.637596958000205,38,24831,71014
 soc-Slashdot0811,0.617,0.0001826343750092322,-3.73841747722999,2541,77360,546487
 TSC_OPF_300,0.359,0.008696970975067726,-2.060631979320414,4207,9773,415288
 Wordnet3,0.036,4.215114110861255e-05,-4.375190663732821,543,75606,120472
 ca-AstroPh,0.663,0.0012295245160221869,-2.9102628072530363,504,17903,197031
-piston,0.752,0.024896306055726347,-1.6038650857806964,64,2025,51020
-web-NotreDame,0.236,2.1066408037288526e-05,-4.676409508177843,10721,325729,1117563
-la2010,0.368,2.346105764521534e-05,-4.629652413400064,581,204447,490317
+web-NotreDame,0.236,2.1066408037288526e-05,-4.676409508177843,10721,325729,1117563