5
5
PWC_INPUT = "/Users/ioannisdasoulas/Desktop/ML-Discovery/ML-KG/Data/PwC-Data/"
6
6
OUTPUT_PATH = "/Users/ioannisdasoulas/Desktop/ML-Discovery/ML-KG/RDF_Dumps/"
7
7
ORIGINAL_DATA_FOLDER = "Original-Data/"
8
- # UPDATE_MONTH_FOLDER = "December2023 /"
8
+ UPDATE_MONTH_FOLDER = "10-01-2024 /"
9
9
10
10
# OpenML API Checkpoints
11
- OPENML_RUN_CHECKPOINT = 4037070
11
+ OPENML_RUN_CHECKPOINT = 4037082
12
12
OPENML_RUN_CURRENT_OFFSET = 6000000
13
- OPENML_DATASET_CHECKPOINT = 5399
14
- OPENML_FLOW_CHECKPOINT = 47250
15
- OPENML_TASK_CHECKPOINT = 16736
13
+ OPENML_DATASET_CHECKPOINT = 5402
14
+ OPENML_FLOW_CHECKPOINT = 16751
15
+ OPENML_TASK_CHECKPOINT = 47250
16
+
17
+ # Dumps current file number
18
+ OPENML_TASK_DUMP_PART = 1
19
+ OPENML_FLOW_DUMP_PART = 1
20
+ OPENML_DATASET_DUMP_PART = 1
21
+ OPENML_RUN_DUMP_PART = 29
22
+ KAGGLE_DUMP_PART = 1
23
+ PWC_DUMP_PART = 1
24
+
25
+ # Triples limit per dump
26
+ OPENML_DUMP_LIMIT = 50000000
27
+ KAGGLE_DUMP_LIMIT = 30000000
28
+ PWC_DUMP_LIMIT = 20000000
16
29
17
30
def update_openml_checkpoints (run_cp , dataset_cp , task_cp , flow_cp ):
18
31
@@ -21,10 +34,10 @@ def update_openml_checkpoints(run_cp, dataset_cp, task_cp, flow_cp):
21
34
content = file .read ()
22
35
23
36
# Update the values in memory
24
- content = content .replace ('OPENML_RUN_CHECKPOINT = 4037070 ' , 'OPENML_RUN_CHECKPOINT = ' + str (run_cp ))
25
- content = content .replace ('OPENML_DATASET_CHECKPOINT = 5399 ' , 'OPENML_DATASET_CHECKPOINT = ' + str (dataset_cp ))
26
- content = content .replace ('OPENML_FLOW_CHECKPOINT = 47250 ' , 'OPENML_FLOW_CHECKPOINT = ' + str (task_cp ))
27
- content = content .replace ('OPENML_TASK_CHECKPOINT = 16736 ' , 'OPENML_TASK_CHECKPOINT = ' + str (flow_cp ))
37
+ content = content .replace ('OPENML_RUN_CHECKPOINT = 4037082 ' , 'OPENML_RUN_CHECKPOINT = ' + str (run_cp ))
38
+ content = content .replace ('OPENML_DATASET_CHECKPOINT = 5402 ' , 'OPENML_DATASET_CHECKPOINT = ' + str (dataset_cp ))
39
+ content = content .replace ('OPENML_FLOW_CHECKPOINT = 16751 ' , 'OPENML_FLOW_CHECKPOINT = ' + str (flow_cp ))
40
+ content = content .replace ('OPENML_TASK_CHECKPOINT = 47250 ' , 'OPENML_TASK_CHECKPOINT = ' + str (task_cp ))
28
41
29
42
# Write the changes back to the constants.py file
30
43
with open ('config.py' , 'w' ) as file :
0 commit comments