2222from deep_code .utils .dataset_stac_generator import OscDatasetStacGenerator
2323from deep_code .utils .github_automation import GitHubAutomation
2424from deep_code .utils .ogc_api_record import WorkflowAsOgcRecord , \
25- ExperimentAsOgcRecord
25+ ExperimentAsOgcRecord , LinksBuilder
2626from deep_code .utils .ogc_record_generator import OSCWorkflowOGCApiRecordGenerator
2727
2828logger = logging .getLogger (__name__ )
@@ -93,67 +93,84 @@ def publish_files(
9393 self .github_automation .clean_up ()
9494
9595
96- class DatasetPublisher :
96+ class Publisher :
9797 """Publishes products (datasets) to the OSC GitHub repository.
9898 Inherits from BasePublisher for GitHub publishing logic.
9999 """
100100
101- def __init__ (self ):
101+ def __init__ (self , dataset_config_path : str , workflow_config_path : str ):
102102 # Composition
103103 self .gh_publisher = GitHubPublisher ()
104-
105- @staticmethod
106- def clean_title (title : str ) -> str :
107- """Clean up titles by replacing Unicode escape sequences with standard characters."""
108- title = title .replace ('\u00a0 ' ,
109- ' ' ) # Replace non-breaking space with normal space
110- title = title .replace ('\u00b0 ' ,
111- '°' ) # Replace unicode degree symbol with actual degree symbol
112- return title
113-
114- def clean_catalog_titles (self , catalog : Catalog ):
115- """Recursively clean all titles in the catalog."""
116- # Clean title for the catalog itself
117- if isinstance (catalog .title , str ):
118- catalog .title = self .clean_title (catalog .title )
119-
120- # Clean titles in all links of the catalog
121- for link in catalog .links :
122- if isinstance (link .title , str ):
123- link .title = self .clean_title (link .title )
124-
125- for link in catalog .links :
126- if link .rel == 'child' :
127- try :
128- # If the link points to another catalog or collection, clean it recursively
129- child_catalog = Catalog .from_file (link .href )
130- self .clean_catalog_titles (child_catalog )
131- except Exception as e :
132- # If the link doesn't point to a valid catalog file, skip it
133- pass
134-
135- def publish_dataset (self , dataset_config_path : str ):
104+ self .collection_id = ""
105+
106+ # Paths to configuration files
107+ self .dataset_config_path = dataset_config_path
108+ self .workflow_config_path = workflow_config_path
109+
110+ # Load configuration files
111+ self ._read_config_files ()
112+ self .collection_id = self .dataset_config .get ("collection_id" )
113+
114+ # Ensure collection_id is set
115+ if not self .collection_id :
116+ raise ValueError ("collection_id is missing in dataset config." )
117+
118+ # @staticmethod
119+ # def clean_title(title: str) -> str:
120+ # """Clean up titles by replacing Unicode escape sequences with standard characters."""
121+ # title = title.replace('\u00a0',
122+ # ' ') # Replace non-breaking space with normal space
123+ # title = title.replace('\u00b0',
124+ # '°') # Replace unicode degree symbol with actual degree symbol
125+ # return title
126+
127+ # def clean_catalog_titles(self, catalog: Catalog):
128+ # """Recursively clean all titles in the catalog."""
129+ # # Clean title for the catalog itself
130+ # if isinstance(catalog.title, str):
131+ # catalog.title = self.clean_title(catalog.title)
132+ #
133+ # # Clean titles in all links of the catalog
134+ # for link in catalog.links:
135+ # if isinstance(link.title, str):
136+ # link.title = self.clean_title(link.title)
137+ #
138+ # for link in catalog.links:
139+ # if link.rel == 'child':
140+ # try:
141+ # # If the link points to another catalog or collection, clean it recursively
142+ # child_catalog = Catalog.from_file(link.href)
143+ # self.clean_catalog_titles(child_catalog)
144+ # except Exception as e:
145+ # # If the link doesn't point to a valid catalog file, skip it
146+ # pass
147+
148+ def _read_config_files (self ) -> None :
149+ with fsspec .open (self .dataset_config_path , "r" ) as file :
150+ self .dataset_config = yaml .safe_load (file ) or {}
151+ with fsspec .open (self .workflow_config_path , "r" ) as file :
152+ self .workflow_config = yaml .safe_load (file ) or {}
153+
154+ def publish_dataset (self ):
136155 """Publish a product collection to the specified GitHub repository."""
137- with fsspec .open (dataset_config_path , "r" ) as file :
138- dataset_config = yaml .safe_load (file ) or {}
139-
140- dataset_id = dataset_config .get ("dataset_id" )
141- collection_id = dataset_config .get ("collection_id" )
142- documentation_link = dataset_config .get ("documentation_link" )
143- access_link = dataset_config .get ("access_link" )
144- dataset_status = dataset_config .get ("dataset_status" )
145- osc_region = dataset_config .get ("osc_region" )
146- osc_themes = dataset_config .get ("osc_themes" )
147- cf_params = dataset_config .get ("cf_parameter" )
148-
149- if not dataset_id or not collection_id :
156+
157+ dataset_id = self .dataset_config .get ("dataset_id" )
158+ self .collection_id = self .dataset_config .get ("collection_id" )
159+ documentation_link = self .dataset_config .get ("documentation_link" )
160+ access_link = self .dataset_config .get ("access_link" )
161+ dataset_status = self .dataset_config .get ("dataset_status" )
162+ osc_region = self .dataset_config .get ("osc_region" )
163+ osc_themes = self .dataset_config .get ("osc_themes" )
164+ cf_params = self .dataset_config .get ("cf_parameter" )
165+
166+ if not dataset_id or not self .collection_id :
150167 raise ValueError ("Dataset ID or Collection ID missing in the config." )
151168
152169 logger .info ("Generating STAC collection..." )
153170
154171 generator = OscDatasetStacGenerator (
155172 dataset_id = dataset_id ,
156- collection_id = collection_id ,
173+ collection_id = self . collection_id ,
157174 documentation_link = documentation_link ,
158175 access_link = access_link ,
159176 osc_status = dataset_status ,
@@ -167,7 +184,7 @@ def publish_dataset(self, dataset_config_path: str):
167184
168185 # Prepare a dictionary of file paths and content
169186 file_dict = {}
170- product_path = f"products/{ collection_id } /collection.json"
187+ product_path = f"products/{ self . collection_id } /collection.json"
171188 file_dict [product_path ] = ds_collection .to_dict ()
172189
173190 variable_base_catalog_path = f"variables/catalog.json"
@@ -177,9 +194,6 @@ def publish_dataset(self, dataset_config_path: str):
177194 )
178195 # Add or update variable files
179196 for var_id in variable_ids :
180- # if var_id in ["crs", "spatial_ref"]:
181- # logger.info(f"Skipping CRS variable: {var_id}")
182- # continue
183197 var_file_path = f"variables/{ var_id } /catalog.json"
184198 if not self .gh_publisher .github_automation .file_exists (var_file_path ):
185199 logger .info (
@@ -219,7 +233,7 @@ def publish_dataset(self, dataset_config_path: str):
219233 )
220234 updated_product_base_catalog = generator .update_product_base_catalog (full_path )
221235 # clean special characters
222- self .clean_catalog_titles (updated_product_base_catalog )
236+ # self.clean_catalog_titles(updated_product_base_catalog)
223237 file_dict [product_catalog_path ] = updated_product_base_catalog .to_dict ()
224238
225239 #Link product to project catalog
@@ -233,10 +247,12 @@ def publish_dataset(self, dataset_config_path: str):
233247 file_dict [deepesdl_collection_path ] = updated_deepesdl_collection .to_dict ()
234248
235249 # Create branch name, commit message, PR info
236- branch_name = f"{ OSC_BRANCH_NAME } -{ collection_id } -{ datetime .now ().strftime ('%Y%m%d%H%M%S' )} "
237- commit_message = f"Add new dataset collection: { collection_id } "
238- pr_title = f"Add new dataset collection: { collection_id } "
239- pr_body = (f"This PR adds a new dataset collection: { collection_id } and it's "
250+ branch_name = (f"{ OSC_BRANCH_NAME } -{ self .collection_id } "
251+ f"-{ datetime .now ().strftime ('%Y%m%d%H%M%S' )} " )
252+ commit_message = f"Add new dataset collection: { self .collection_id } "
253+ pr_title = f"Add new dataset collection: { self .collection_id } "
254+ pr_body = (f"This PR adds a new dataset collection: { self .collection_id } and "
255+ f"it's "
240256 f"corresponding variable catalogs to the repository." )
241257
242258 # Publish all files in one go
@@ -251,12 +267,6 @@ def publish_dataset(self, dataset_config_path: str):
251267 logger .info (f"Pull request created: { pr_url } " )
252268
253269
254- class WorkflowPublisher :
255- """Publishes workflows to the OSC GitHub repository."""
256-
257- def __init__ (self ):
258- self .gh_publisher = GitHubPublisher ()
259-
260270 @staticmethod
261271 def _normalize_name (name : str | None ) -> str | None :
262272 return name .replace (" " , "-" ).lower () if name else None
@@ -277,60 +287,65 @@ def _write_to_file(file_path: str, data: dict):
277287 json .dump (data , file , indent = 4 )
278288 logger .info (f"File written to { file_path } " )
279289
280- def publish_workflow_experiment (self , workflow_config_path : str , write_to_file : bool = False ):
281- with fsspec .open (workflow_config_path , "r" ) as file :
282- workflow_config = yaml .safe_load (file ) or {}
283-
284- workflow_id = self ._normalize_name (workflow_config .get ("workflow_id" ))
290+ def publish_workflow_experiment (self , write_to_file : bool = False ):
291+ workflow_id = self ._normalize_name (self .workflow_config .get ("workflow_id" ))
285292 if not workflow_id :
286293 raise ValueError ("workflow_id is missing in workflow config." )
287294
288- properties_list = workflow_config .get ("properties" , [])
289- contacts = workflow_config .get ("contact" , [])
290- links = workflow_config .get ("links" , [])
291- jupyter_notebook_url = workflow_config .get ("jupyter_notebook_url" )
295+ properties_list = self .workflow_config .get ("properties" , {})
296+ osc_themes = properties_list .get ("themes" )
297+ contacts = self .workflow_config .get ("contact" , [])
298+ links = self .workflow_config .get ("links" , [])
299+ jupyter_notebook_url = self .workflow_config .get ("jupyter_notebook_url" )
292300
293301 logger .info ("Generating OGC API Record for the workflow..." )
294302 rg = OSCWorkflowOGCApiRecordGenerator ()
295- wf_record_properties = rg .build_record_properties (properties_list , contacts ,
296- caller = "WorkflowAsOgcRecord" )
303+ wf_record_properties = rg .build_record_properties (properties_list , contacts )
304+
305+ link_builder = LinksBuilder (osc_themes )
306+ theme_links = link_builder .build_them_links_for_records ()
307+
297308 workflow_record = WorkflowAsOgcRecord (
298309 id = workflow_id ,
299310 type = "Feature" ,
300311 properties = wf_record_properties ,
301- links = links ,
302- jupyter_notebook_url = jupyter_notebook_url
312+ links = links + theme_links ,
313+ jupyter_notebook_url = jupyter_notebook_url ,
314+ themes = osc_themes
303315 )
304316 # Convert to dictionary and remove jupyter_notebook_url
305317 workflow_dict = workflow_record .to_dict ()
306318 if "jupyter_notebook_url" in workflow_dict :
307319 del workflow_dict ["jupyter_notebook_url" ]
308-
309320 wf_file_path = f"workflow/{ workflow_id } /record.json"
310321 file_dict = {wf_file_path : workflow_dict }
311322
312323 # Build properties for the experiment record
313324 exp_record_properties = copy .deepcopy (wf_record_properties )
314325 exp_record_properties .type = "experiment"
326+ exp_record_properties .osc_workflow = workflow_id
315327
316328 experiment_record = ExperimentAsOgcRecord (
317329 id = workflow_id ,
318330 type = "Feature" ,
331+ jupyter_notebook_url = jupyter_notebook_url ,
332+ collection_id = self .collection_id ,
319333 properties = exp_record_properties ,
320- links = links ,
321- jupyter_notebook_url = jupyter_notebook_url
334+ links = links + theme_links
322335 )
323336 # Convert to dictionary and remove jupyter_notebook_url
324337 experiment_dict = experiment_record .to_dict ()
325338 if "jupyter_notebook_url" in experiment_dict :
326339 del experiment_dict ["jupyter_notebook_url" ]
340+ if "collection_id" in experiment_dict :
341+ del experiment_dict ["collection_id" ]
327342 exp_file_path = f"experiments/{ workflow_id } /record.json"
328343 file_dict [exp_file_path ] = experiment_dict
329344
330345 # Write to files if testing
331346 if write_to_file :
332- self ._write_to_file (wf_file_path , workflow_record . to_dict () )
333- self ._write_to_file (exp_file_path , experiment_record . to_dict () )
347+ self ._write_to_file (wf_file_path , workflow_dict )
348+ self ._write_to_file (exp_file_path , experiment_dict )
334349
335350 # Publish to GitHub if not testing
336351 if not write_to_file :
@@ -351,9 +366,8 @@ def publish_workflow_experiment(self, workflow_config_path: str, write_to_file:
351366
352367if __name__ == '__main__' :
353368 # Example usage for testing
354- publisher = WorkflowPublisher ()
355- publisher .publish_workflow_experiment (
356- workflow_config_path = "/home/tejas/bc/projects/deepesdl/deep-code/workflow"
357- "-config.yaml" ,
358- write_to_file = True
359- )
369+ publisher = Publisher (dataset_config_path = "/home/tejas/bc/projects/deepesdl/deep"
370+ "-code/dataset-config.yaml" ,
371+ workflow_config_path = "/home/tejas/bc/projects/deepesdl/deep-code/workflow"
372+ "-config.yaml" )
373+ publisher .publish_workflow_experiment (write_to_file = True )
0 commit comments