|
20 | 20 |
|
21 | 21 | # options for dataset generation
|
22 | 22 | vnum_per_partition = 20
|
23 |
| -vnum_total = vnum_per_partition * 2 |
| 23 | +num_partition = 2 |
| 24 | +vnum_total = vnum_per_partition * num_partition # 40 |
24 | 25 | degree = 2
|
25 |
| -enum_total = vnum_total * degree |
| 26 | +enum_per_partition = vnum_per_partition * degree # 40 |
| 27 | +enum_total = enum_per_partition * num_partition # 80 |
26 | 28 |
|
27 | 29 | # for hetero dataset
|
28 | 30 | user_ntype = 'user'
|
|
36 | 38 | device_num = 2
|
37 | 39 |
|
38 | 40 |
|
39 |
| -def _prepare_dataset(rank: int, weighted: bool = False): |
40 |
| - # partition |
41 |
| - node_pb = torch.tensor( |
42 |
| - [v % 2 for v in range(0, vnum_total)], |
43 |
| - dtype=torch.long |
44 |
| - ) |
45 |
| - edge_pb = torch.tensor( |
46 |
| - [((e // degree) % 2) for e in range(0, enum_total)], |
47 |
| - dtype=torch.long |
48 |
| - ) |
| 41 | +def _prepare_dataset(rank: int, |
| 42 | + weighted: bool = False, |
| 43 | + is_range_partition: bool = False): |
| 44 | + """ |
| 45 | + Prepare a synthetic graph dataset with 40 nodes and 80 edges for unit tests. |
| 46 | +
|
| 47 | + Graph topology: |
| 48 | + - rows: [0, 0, 1, 1, 2, 2, ... 37, 37, 38, 38, 39, 39] |
| 49 | + - cols: [1, 2, 2, 3, 3, 4, ... 38, 39, 39, 0, 0, 1] |
| 50 | + - eids: [0, 1, 2, 3, 4, 5, ... 74, 75, 76, 77, 78, 79] |
| 51 | +
|
| 52 | + Node features: |
| 53 | + [[0., 0., ..., 0., 0.], |
| 54 | + [1., 1., ..., 1., 1.], |
| 55 | + ... |
| 56 | + [39., 39., ..., 39., 39.]] |
| 57 | +
|
| 58 | + Edge features: |
| 59 | + [[0., 0., ..., 0., 0.], |
| 60 | + [1., 1., ..., 1., 1.], |
| 61 | + ... |
| 62 | + [79., 79., ..., 79., 79.]] |
| 63 | +
|
| 64 | + Two partition strategies are available: |
| 65 | + 1. Range partition: |
| 66 | + - Nodes with IDs [0, 19] and edges with IDs [0, 39] are on partition 0 |
| 67 | + - Nodes with IDs [20, 39] and edges with IDs [40, 79] are on partition 1 |
| 68 | + 2. Hash partition: |
| 69 | + - Even-numbered nodes and edges are on partition 0 |
| 70 | + - Odd-numbered nodes and edges are on partition 1 |
| 71 | +
|
| 72 | + The graph topology and features are identical under both partition strategies. |
| 73 | + """ |
| 74 | + if is_range_partition: |
| 75 | + node_ranges = [(0, vnum_per_partition), (vnum_per_partition, vnum_total)] |
| 76 | + edge_ranges = [(0, enum_total // 2), (enum_total // 2, enum_total)] |
| 77 | + node_pb = glt.partition.RangePartitionBook( |
| 78 | + node_ranges, rank) |
| 79 | + edge_pb = glt.partition.RangePartitionBook( |
| 80 | + edge_ranges, rank) |
| 81 | + start, end, step = rank * vnum_per_partition, (rank + 1) * vnum_per_partition, 1 |
| 82 | + else: |
| 83 | + node_pb = torch.tensor( |
| 84 | + [v % 2 for v in range(0, vnum_total)], |
| 85 | + dtype=torch.long |
| 86 | + ) |
| 87 | + edge_pb = torch.tensor( |
| 88 | + [((e // degree) % 2) for e in range(0, enum_total)], |
| 89 | + dtype=torch.long |
| 90 | + ) |
| 91 | + start, end, step = rank, vnum_total, 2 |
| 92 | + |
49 | 93 |
|
50 | 94 | # graph
|
51 | 95 | nodes, rows, cols, eids = [], [], [], []
|
52 |
| - for v in range(rank, vnum_total, 2): |
| 96 | + for v in range(start, end, step): |
53 | 97 | nodes.append(v)
|
54 | 98 | rows.extend([v for _ in range(degree)])
|
55 | 99 | cols.extend([((v + i + 1) % vnum_total) for i in range(degree)])
|
56 | 100 | eids.extend([(v * degree + i) for i in range(degree)])
|
| 101 | + |
57 | 102 | edge_index = torch.tensor([rows, cols], dtype=torch.int64)
|
58 | 103 | edge_ids = torch.tensor(eids, dtype=torch.int64)
|
59 | 104 | edge_weights = (edge_ids % 2).to(torch.float)
|
60 | 105 | csr_topo = glt.data.Topology(edge_index=edge_index, edge_ids=edge_ids)
|
| 106 | + graph = glt.data.Graph(csr_topo, 'ZERO_COPY', device=0) |
| 107 | + |
61 | 108 | weighted_csr_topo = glt.data.Topology(
|
62 | 109 | edge_index=edge_index, edge_ids=edge_ids, edge_weights=edge_weights)
|
63 |
| - graph = glt.data.Graph(csr_topo, 'ZERO_COPY', device=0) |
64 | 110 | weighted_graph = glt.data.Graph(weighted_csr_topo, 'CPU')
|
65 | 111 |
|
66 | 112 | # feature
|
67 | 113 | device_group_list = [glt.data.DeviceGroup(0, [0]),
|
68 | 114 | glt.data.DeviceGroup(1, [1])]
|
69 | 115 | split_ratio = 0.2
|
70 | 116 |
|
71 |
| - nfeat = rank + torch.zeros(len(nodes), 512, dtype=torch.float32) |
72 |
| - nfeat_id2idx = glt.utils.id2idx(nodes) |
| 117 | + nfeat = torch.tensor(nodes, dtype=torch.float32).unsqueeze(1).repeat(1, 512) |
| 118 | + nfeat_id2idx = node_pb.id2index if is_range_partition else glt.utils.id2idx(nodes) |
73 | 119 | node_feature = glt.data.Feature(nfeat, nfeat_id2idx, split_ratio,
|
74 | 120 | device_group_list, device=0)
|
75 | 121 |
|
76 |
| - efeat = rank + torch.ones(len(eids), 10, dtype=torch.float32) |
77 |
| - efeat_id2idx = glt.utils.id2idx(eids) |
| 122 | + efeat = torch.tensor(eids, dtype=torch.float32).unsqueeze(1).repeat(1, 10) |
| 123 | + efeat_id2idx = edge_pb.id2index if is_range_partition else glt.utils.id2idx(eids) |
78 | 124 | edge_feature = glt.data.Feature(efeat, efeat_id2idx, split_ratio,
|
79 | 125 | device_group_list, device=0)
|
80 | 126 |
|
81 | 127 | # whole node label
|
82 | 128 | node_label = torch.arange(vnum_total)
|
83 | 129 |
|
84 | 130 | # dist dataset
|
85 |
| - if weighted: |
86 |
| - return glt.distributed.DistDataset( |
87 |
| - 2, rank, |
88 |
| - weighted_graph, node_feature, edge_feature, node_label, |
89 |
| - node_pb, edge_pb |
90 |
| - ) |
91 |
| - else: |
92 |
| - return glt.distributed.DistDataset( |
93 |
| - 2, rank, |
94 |
| - graph, node_feature, edge_feature, node_label, |
95 |
| - node_pb, edge_pb |
96 |
| - ) |
| 131 | + ds = glt.distributed.DistDataset( |
| 132 | + 2, rank, |
| 133 | + weighted_graph if weighted else graph, |
| 134 | + node_feature, edge_feature, node_label, |
| 135 | + node_pb, edge_pb |
| 136 | + ) |
| 137 | + |
| 138 | + if is_range_partition: |
| 139 | + ds.id_filter = node_pb.id_filter |
| 140 | + return ds |
97 | 141 |
|
98 | 142 |
|
99 | 143 | def _prepare_hetero_dataset(
|
|
0 commit comments