Skip to content

Commit

Permalink
Merge pull request #1 from BNN-UPC/feature/continious_node_count
Browse files Browse the repository at this point in the history
Feature/continious node count
  • Loading branch information
axelwass authored May 31, 2022
2 parents e36b48f + 1dfe30d commit 3f6594e
Show file tree
Hide file tree
Showing 12 changed files with 1,728 additions and 1,970 deletions.
44 changes: 32 additions & 12 deletions GraphlaxyDataGen.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,16 @@ def __init__(self):
The available commands are:
optimization Create a baseline dataset and optimize the parameters.
generate Using the fitted parameters generate a synthetic graph dataset.
plots Generate plots showing different characteristics of the baseline, sampled, and final datasets.
plots Generate plots showing different characteristics of the baseline, sampled, and final datasets.
statistics Print some basic statistics of target dataset
''')
parser.add_argument('command', help='Subcommand to run')
commands = {"optimization":self.optimization, "generate":self.generate, "plot": self.plot}
commands = {
"optimization":self.optimization,
"generate":self.generate,
"plots": self.plot,
"statistics": self.statistics
}
args = parser.parse_args(sys.argv[1:2])
if not args.command in commands:
print('Unrecognized command')
Expand All @@ -32,8 +38,7 @@ def optimization(self):
The available subcommands are:
baseline Only creates the baseline dataset
metrics Calculate the metrics of a dataset
optimize Use sampling and the Powell method with cooperative bargaining to optimize the input RMat parameters
plot Some plots to show analyze the results
optimize Use sampling and the Powell method with cooperative bargaining to optimize the input RMat parameters
*************************************
To run the full optimization in steps:
Expand All @@ -45,8 +50,7 @@ def optimization(self):
commands = {
"baseline":self.baseline,
"metrics":self.metrics,
"optimize": self.optimize,
"plot": self.plot
"optimize": self.optimize
}
args = parser.parse_args(sys.argv[2:3])
if not args.subcommand in commands:
Expand All @@ -63,12 +67,12 @@ def generate(self):
parser.add_argument('-s', "--dataset-size", metavar = "int", type = int,
help = "The size of the dataset to generate.", default= 5000)
parser.add_argument('-e', "--edges-between", nargs = 2, metavar = "int", type = int,
help = "The min and max vallue the edges argument can take.", default= (1000, 1000000))
help = "The min and max vallue the edges argument can take.", default= (100000, 2000000))
parser.add_argument('-m', '--multiprocess', action="store_true", help = "Add to take advantage of multiple cores.")

parser.add_argument('-w', "--custom-weights", nargs = 8, metavar = "float", type = float,
help = "List of waights for the beta distributions.",
default= ( 2.490744994387892,2.6031189695165597,0.5401027713447459,0.32109300386782624,0.6878348939570403,0.4389166002041694,0.22515465777238508,0.8146717281526472))
default= [1.3500523980958758,0.9756729865636893,1.4562248430720026,0.22767153268062393,1.055699069458428,0.9060404341929743,0.35052426603213255,1.157122011830607])



Expand All @@ -84,6 +88,19 @@ def generate(self):

generate_result_dataset(args.from_file, args.custom_weights, args.parameters_file, args.name, args.folder, args.dataset_size, args.edges_between, args.multiprocess)


def statistics(self):
parser = argparse.ArgumentParser(description = "Calculate some statistics over a dataset.")

parser.add_argument('-f', "--folder", metavar = "str", type = str,
help = "Folder where the dataset to analize was generated.", default= "data/validation_dataset")
parser.add_argument('-s', "--sample-size", metavar = "int", type = int,
help = "The size of the sample.", default= 1000)

args = parser.parse_args(sys.argv[2:])
from processes.statistics import statistics
statistics(args.folder, args.sample_size)

def plot(self):
parser = argparse.ArgumentParser(description = "Some plots to analyze the results.")

Expand All @@ -105,13 +122,13 @@ def plot(self):
choices= choices)
parser.add_argument('-w', "--custom-weights", nargs = 8, metavar = "float", type = float,
help = "List of waights for the beta distributions.",
default= (2.490744994387892,2.6031189695165597,0.5401027713447459,0.32109300386782624,0.6878348939570403,0.4389166002041694,0.22515465777238508,0.8146717281526472))
default= ((1,1,1,1,1,1,1,1)))
choices = ["custom", "initial"]
parser.add_argument('-ws', "--weight-source", metavar = "str", type = str,
help = "Where to get the waights used for the plot from. Posible values: {}".format(choices), default= "custom",
choices= choices)
parser.add_argument('-n', "--name", metavar = "str", type = str,
help = "Name of the params to use for the fitness_evolution.", default= None)
help = "Name of the params to use for the fitness_evolution.", default= "result")


args = parser.parse_args(sys.argv[2:])
Expand All @@ -127,7 +144,7 @@ def baseline(self):
parser.add_argument('-s', "--dataset-size", metavar = "int", type = int,
help = "The size of the baseline dataset.", default= 10000)
parser.add_argument('-e', "--edges-between", nargs = 2, metavar = "int", type = int,
help = "The min and max vallue the edges argument can take.", default= (1000, 1000000))
help = "The min and max vallue the edges argument can take.", default= (100000, 2000000))
parser.add_argument('-m', '--multiprocess', action="store_true", help = "Add to take advantage of multiple cores.")

args = parser.parse_args(sys.argv[3:])
Expand Down Expand Up @@ -159,11 +176,14 @@ def optimize(self):
help = "Folder where the dataset is.", default= "../baseline_dataset")
parser.add_argument('-g', "--grid-size", metavar = "int", type = int,
help = "The number of rows and columns the grid has.", default=15)
parser.add_argument('-w', "--custom-weights", nargs = 8, metavar = "float", type = float,
help = "Initial weights for optimization.",
default= [1.3500523980958758,0.9756729865636893,1.4562248430720026,0.22767153268062393,1.055699069458428,0.9060404341929743,0.35052426603213255,1.157122011830607])

args = parser.parse_args(sys.argv[3:])

from processes.optimization import optimize
optimize(args.name, args.folder, args.grid_size)
optimize(args.name, args.folder, args.grid_size, args.custom_weights)


if __name__ == "__main__":
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,15 @@ This work was done by the Barcelona Neural Network group (BNN) and is part of a

## Citing

This repository is associated with a Paper. If you are using it for a study, please cite.
This repository is associated with the paper ["Bias Reduction via Cooperative Bargaining in Synthetic Graph Dataset Generation"](https://arxiv.org/abs/2205.13901). If you are using it for a study, please cite.

The citation text and BibTex will be available once the paper is published.
```
@inproceedings{Wassington2022BiasRV,
title={Bias Reduction via Cooperative Bargaining in Synthetic Graph Dataset Generation},
author={Axel Wassington and S. Abadal},
year={2022}
}
```


## License
Expand Down
21 changes: 1 addition & 20 deletions data/validation_dataset/dataset_metrics.csv
Original file line number Diff line number Diff line change
@@ -1,31 +1,21 @@
name,clustering,density,density_log,max_degree,nodes,edges
sx-superuser,0.26,4.3660716413995854e-05,-4.359909142376479,14296,189191,781375
bcsstm27,0.672,0.038311181654455184,-1.4166744526085413,55,1224,28675
co2010,0.408,2.4107766948891065e-05,-4.617843015532475,120,201062,487287
cavity05,0.77,0.026261836093476664,-1.5806749135325535,63,1182,18330
struct4,0.364,0.012799771647861974,-1.8927977782394834,91,4350,121074
cage10,0.322,0.0012476265437571044,-2.903915394026621,26,11397,81021
ca-HepPh,0.593,0.0018746094397688842,-2.727089200374292,491,11204,117649
dblp-2010,0.637,2.7952531408644447e-05,-4.55357885586257,238,226413,716460
cage11,0.283,0.00039205070982041877,-3.406657755502905,32,39082,299402
cond-mat-2003,0.64,0.00030684255571491705,-3.5130844086392994,202,27519,116181
coAuthorsDBLP,0.636,2.186197601957441e-05,-4.660310586411358,336,299067,977676
cavity16,0.766,0.007106166004766985,-2.1483646513200694,63,4562,73930
fe_rotor,0.399,0.00013350824470442644,-3.8744919139317995,125,99617,662431
patents_main,0.044,2.085655015760525e-05,-4.680757525795384,212,230686,554949
crystm01,0.526,0.013921182266009852,-1.8563238804006732,28,1625,18369
fpga_dcop_10,0.561,0.00529861886254522,-2.2758373188035748,37,1220,3940
cit-HepTh,0.31,0.0009379078516105468,-3.0278398284684074,2468,27400,352059
wa2010,0.374,2.4770115585980415e-05,-4.606071966840228,158,195574,473716
HEP-th,0.294,0.0009465711478355964,-3.023846737272162,2411,26870,341698
ca-CondMat,0.647,0.0004003099279382089,-3.397603638984791,281,21363,91342
email-Enron,0.49,0.0003185011711252004,-3.4968889664298017,1383,33696,180811
wiki-Vote,0.149,0.004035793145569756,-2.39407110161249,1065,7066,100736
qc2534,0.988,0.07258473859342394,-1.1391546829004182,184,2534,232947
psmigr_3,0.48,0.08398978943758713,-1.0757735075148394,2746,3140,413921
com-Amazon,0.402,1.6513834036534368e-05,-4.782152084362822,549,334863,925872
nemeth17,0.742,0.007073535770702365,-2.1503634459119887,75,9506,319563
amazon0302,0.427,2.6194088195261075e-05,-4.581796714553647,420,262111,899792
LeGresley_4908,0.762,0.0014934671607465277,-2.825804322482934,39,4908,17984
soc-Epinions1,0.126,0.00014094905573394035,-3.8509378292702987,3044,75877,405739
Linux_call_graph,0.086,2.3888194076898408e-05,-4.6218166812289105,15979,317926,1207269
Expand All @@ -34,30 +24,21 @@ internet,0.099,2.64909704107265e-05,-4.576902132442502,153,124651,205805
coAuthorsCiteseer,0.693,3.151029750712278e-05,-4.501547496378784,1372,227320,814134
usroads-48,0.024,2.035482734874879e-05,-4.691332576989012,7,126146,161950
lhr07c,0.024,0.005765061357494149,-2.239196066152573,99,7337,155150
delaunay_n18,0.44,2.2887223163681178e-05,-4.640406895741597,21,262144,786396
or2010,0.455,2.5336823645872357e-05,-4.596247831423197,120,196621,489756
cit-HepPh,0.29,0.0007112218884929657,-3.14799488602908,846,34401,420828
Na5,0.393,0.009158925180889522,-2.0381554887117272,205,5832,155731
p2p-Gnutella31,0.004,7.556716716096343e-05,-4.121666858176848,95,62561,147878
language,0.556,1.4973538912500522e-05,-4.824675544444244,11611,399130,1192675
soc-Slashdot0902,0.575,0.00017256407249715723,-3.7630496184724294,2554,82168,582533
msc01440,0.514,0.023024283839085787,-1.6378138694729287,46,1440,23855
astro-ph,0.655,0.0010859724564231906,-2.964181189641385,360,14845,119652
loc-Gowalla,0.239,4.917880929251338e-05,-4.308221990521074,14730,196591,950327
NotreDame_www,0.275,2.5423259717214715e-05,-4.594768765881246,10717,245529,766311
TSOPF_RS_b162_c1,0.094,0.01402032838523068,-1.8532418141708273,2505,5374,202415
598a,0.428,0.00012049809570614693,-3.919019816415646,26,110971,741934
rajat17,0.728,8.445435249719433e-05,-4.073377963419152,28756,93342,367910
sparsine,0.326,0.0006396079921598432,-3.1940861182071774,57,50000,799494
EAT_SR,0.098,0.0011334632895532034,-2.9455925411772093,1092,23218,305498
nemeth20,0.748,0.010861392339708922,-1.9641144982560987,121,9506,490688
foldoc,0.325,0.0010256356780519167,-2.9890068800721705,728,13356,91471
oh2010,0.383,1.3247637646887547e-05,-4.877861559341608,62,365344,884120
dictionary28,0.236,0.00023035786353932204,-3.637596958000205,38,24831,71014
soc-Slashdot0811,0.617,0.0001826343750092322,-3.73841747722999,2541,77360,546487
TSC_OPF_300,0.359,0.008696970975067726,-2.060631979320414,4207,9773,415288
Wordnet3,0.036,4.215114110861255e-05,-4.375190663732821,543,75606,120472
ca-AstroPh,0.663,0.0012295245160221869,-2.9102628072530363,504,17903,197031
piston,0.752,0.024896306055726347,-1.6038650857806964,64,2025,51020
web-NotreDame,0.236,2.1066408037288526e-05,-4.676409508177843,10721,325729,1117563
la2010,0.368,2.346105764521534e-05,-4.629652413400064,581,204447,490317
web-NotreDame,0.236,2.1066408037288526e-05,-4.676409508177843,10721,325729,1117563
Loading

0 comments on commit 3f6594e

Please sign in to comment.