From 6b5b38c8e7076daca7bde454b8855bd6f44ba420 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Mon, 9 Mar 2020 10:42:23 -0700 Subject: [PATCH 01/22] Fixed mispelled module name --- nifty/pipeline/nifsLowMemoryPipeline.py | 2 +- nifty/pipeline/nifsPipeline.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nifty/pipeline/nifsLowMemoryPipeline.py b/nifty/pipeline/nifsLowMemoryPipeline.py index 247c94f..90c2c5a 100644 --- a/nifty/pipeline/nifsLowMemoryPipeline.py +++ b/nifty/pipeline/nifsLowMemoryPipeline.py @@ -42,7 +42,7 @@ # Import config parsing. # Import config parsing. from configobj.configobj import ConfigObj -from objectoriented.getConfig import GetConfig +from objectoriented.GetConfig import GetConfig # Import custom Nifty functions. from nifsUtils import datefmt, printDirectoryLists, writeList, getParam, interactiveNIFSInput diff --git a/nifty/pipeline/nifsPipeline.py b/nifty/pipeline/nifsPipeline.py index 4fdd110..dec2ae2 100644 --- a/nifty/pipeline/nifsPipeline.py +++ b/nifty/pipeline/nifsPipeline.py @@ -46,7 +46,7 @@ # Import configuration file parsing. from configobj.configobj import ConfigObj # Import custom pipeline setup Class. -from objectoriented.getConfig import GetConfig +from objectoriented.GetConfig import GetConfig # Conveniently import some utility functions so we don't have to type the full name. from nifsUtils import datefmt, printDirectoryLists, writeList, getParam, interactiveNIFSInput From b70612ef24a8b8807f525be785fad4bbd979dfe9 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Wed, 11 Mar 2020 20:35:22 -0700 Subject: [PATCH 02/22] Experimental fix for sorting problem. --- nifty/pipeline/steps/nifsSort.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/nifty/pipeline/steps/nifsSort.py b/nifty/pipeline/steps/nifsSort.py index e6f6ccc..97daa65 100644 --- a/nifty/pipeline/steps/nifsSort.py +++ b/nifty/pipeline/steps/nifsSort.py @@ -53,7 +53,7 @@ # Paths to Nifty data. RECIPES_PATH = pkg_resources.resource_filename('nifty', 'recipes/') RUNTIME_DATA_PATH = pkg_resources.resource_filename('nifty', 'runtimeData/') - + def start(): """ @@ -423,12 +423,15 @@ def makePythonLists(rawPath, skyThreshold): date = header[0].header['DATE'].replace('-','') # Make sure no duplicate dates are being entered. if flatlist.index(flat)==0 or not oldobsid==obsid: - if date in sciDateList: - list1 = [date, obsid] - else: - list1 = [sciDateList[n], obsid] + #if date in sciDateList: + list1 = [date, obsid] obsidDateList.append(list1) - n+=1 + #else: + # Ugly fix, we have to check there aren't more flats than science dates. + # if n < len(sciDateList): + # list1 = [sciDateList[n], obsid] + # obsidDateList.append(list1) + #n+=1 oldobsid = obsid os.chdir(path) @@ -848,11 +851,11 @@ def sortCalibrations(arcdarklist, arclist, flatlist, flatdarklist, ronchilist, o if path1+'/'+entry[0]+'/'+entry[1]+'/Calibrations_'+grating not in calDirList: calDirList.append(path1+'/'+entry[0]+'/'+entry[1]+'/Calibrations_'+grating) # Copy lamps on flats to appropriate directory. - shutil.copy('./'+flatlist[i][0], objDir+'/Calibrations_'+grating+'/') + shutil.copy('./'+flatlist[i][0], path1+'/'+entry[0]+'/'+entry[1]+'/Calibrations_'+grating) flatlist[i][1] = 0 logging.info(flatlist[i][0]) count += 1 - path = objDir+'/Calibrations_'+grating+'/' + path = path1+'/'+entry[0]+'/'+entry[1]+'/Calibrations_'+grating+'/' # Create a flatlist in the relevent directory. # Create a text file called flatlist to store the names of the # lamps on flats for later use by the pipeline. From 3f67ce352508f85f029a0859e589eedae0c9ca95 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Wed, 11 Mar 2020 22:16:55 -0700 Subject: [PATCH 03/22] Fixed some non-standard wavelength crashes. --- .../pipeline/steps/nifsBaselineCalibration.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/nifty/pipeline/steps/nifsBaselineCalibration.py b/nifty/pipeline/steps/nifsBaselineCalibration.py index a95b518..bcb2430 100755 --- a/nifty/pipeline/steps/nifsBaselineCalibration.py +++ b/nifty/pipeline/steps/nifsBaselineCalibration.py @@ -28,7 +28,7 @@ # STDLIB -import logging, os, pkg_resources, glob, shutil +import logging, os, pkg_resources, glob, shutil, sys import astropy.io.fits from pyraf import iraf, iraffunctions @@ -648,17 +648,18 @@ def makeWaveCal(arclist, arc, arcdarklist, arcdark, grating, log, over, path): # Set interactive mode. Default False for standard configurations (and True for non-standard wavelength configurations ). pauseFlag = False + interative = 'no' if band == "K" and central_wavelength == 2.20: clist=RUNTIME_DATA_PATH+"k_ar.dat" my_thresh = 50.0 - elif band == "J": + elif band == "J" and central_wavelength == 1.25: clist=RUNTIME_DATA_PATH+"j_ar.dat" my_thresh=100.0 - elif band == "H": + elif band == "H" and central_wavelength == 1.65: clist=RUNTIME_DATA_PATH+"h_ar.dat" my_thresh=100.0 - elif band == "Z": + elif band == "Z" and central_wavelength == 1.05: clist="nifs$data/ArXe_Z.dat" my_thresh=100.0 else: @@ -676,6 +677,7 @@ def makeWaveCal(arclist, arc, arcdarklist, arcdark, grating, log, over, path): clist="gnirs$data/argon.dat" my_thresh=100.0 interactive = 'yes' + pauseFlag = True # TODO(nat): I don't like this nesting at all if not pauseFlag: @@ -686,19 +688,18 @@ def makeWaveCal(arclist, arc, arcdarklist, arcdark, grating, log, over, path): if over: iraf.delete("wrgn"+arc+".fits") iraf.nswavelength("rgn"+arc, coordli=clist, nsum=10, thresho=my_thresh, \ - trace='yes', fwidth=2.0, match=-6,cradius=8.0,fl_inter='no',nfound=10,nlost=10, \ + trace='yes', fwidth=2.0, match=-6,cradius=8.0,fl_inter=interactive,nfound=10,nlost=10, \ logfile=log) else: print "\nOutput file exists and -over not set - ",\ "not determining wavelength solution and recreating the wavelength reference arc.\n" else: iraf.nswavelength("rgn"+arc, coordli=clist, nsum=10, thresho=my_thresh, \ - trace='yes', fwidth=2.0, match=-6,cradius=8.0,fl_inter='no',nfound=10,nlost=10, \ + trace='yes', fwidth=2.0, match=-6,cradius=8.0,fl_inter=interactive,nfound=10,nlost=10, \ logfile=log) else: - a = raw_input("For now, interactive Z or non-standard wavelength calibrations are unsupported. " + \ - "Bugs running IRAF tasks interactively from python mean iraf.nswavelength cannot be activated automatically. " + \ - "Therefore please run iraf.nswavelength() interactively from Pyraf to do a wavelength calibration by hand.") + print "ERROR: For now, only some wavelength configurations are supported. The grating/central wavelength(microns) possibilities are Z/1.05, J/1.25, H/1.65, K/2.20." + sys.exit(1) # Copy to relevant science observation/calibrations/ directories for item in glob.glob('database/idwrgn*'): From 40f63733572f08fe77ca5cad4307b9f86c1cdd99 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Sat, 21 Mar 2020 21:51:27 -0700 Subject: [PATCH 04/22] Added exception handlers in nifsMerge. Bad information in the config.cfg (a science directory that didn't exist) was causing nifsMerge to fail. I added better exception handling so hint to users why it is failing. --- nifty/pipeline/steps/nifsMerge.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nifty/pipeline/steps/nifsMerge.py b/nifty/pipeline/steps/nifsMerge.py index bdd3900..56e9475 100644 --- a/nifty/pipeline/steps/nifsMerge.py +++ b/nifty/pipeline/steps/nifsMerge.py @@ -226,6 +226,9 @@ def mergeCubes(obsDirList, cubeType, mergeType, use_pq_offsets, im3dtran, over=" # temp3 == ('/Users/ncomeau/research/newer-nifty/hd165459', '20160705') # temp4 == ('/Users/ncomeau/research/newer-nifty', 'hd165459') + if not obsDir: + raise ValueError("nifsMerge: There was a problem with the science directory list.") + # TODO: make this clearer. temp1 = os.path.split(obsDir) temp2 = os.path.split(temp1[0]) @@ -236,7 +239,10 @@ def mergeCubes(obsDirList, cubeType, mergeType, use_pq_offsets, im3dtran, over=" obsid = temp1[1] obsPath = temp3[0] targetDirectory = temp4[0] - os.chdir(obsDir + '/'+unmergedDirectory) + try: + os.chdir(obsDir + '/'+unmergedDirectory) + except OSError: + raise OSError("nifsMerge: a science directory didn't exist.") obsidlist.append(obsPath+'/Merged'+suffix+'/'+date+'_'+obsid) From 8f12c2f47cf90609740edfbbc07f47dd4f32ebec Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Wed, 6 May 2020 10:52:14 -0700 Subject: [PATCH 05/22] Fixes misspelled variable --- nifty/pipeline/steps/nifsBaselineCalibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nifty/pipeline/steps/nifsBaselineCalibration.py b/nifty/pipeline/steps/nifsBaselineCalibration.py index bcb2430..8b42b22 100755 --- a/nifty/pipeline/steps/nifsBaselineCalibration.py +++ b/nifty/pipeline/steps/nifsBaselineCalibration.py @@ -648,7 +648,7 @@ def makeWaveCal(arclist, arc, arcdarklist, arcdark, grating, log, over, path): # Set interactive mode. Default False for standard configurations (and True for non-standard wavelength configurations ). pauseFlag = False - interative = 'no' + interactive = 'no' if band == "K" and central_wavelength == 2.20: clist=RUNTIME_DATA_PATH+"k_ar.dat" From 8c59195985eab17af54bad5615b1924310ebac1e Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 7 May 2020 08:52:57 -0700 Subject: [PATCH 06/22] Adds CADC download option, plus fixes bugs. Users can now specify -c (eg; runNifty nifsPipeline -c ...) to download raw data from the Canadian Astronomy Data Centre. This has been tested to work from the interactive config session (runNifty nifsPipeline -i) and the fully automatic mode (runNifty nifsPipeline -c -f ). --- nifty/pipeline/nifsPipeline.py | 7 +- nifty/pipeline/nifsUtils.py | 76 ++++++++++++++++++++-- nifty/pipeline/objectoriented/GetConfig.py | 14 ++++ nifty/pipeline/steps/nifsSort.py | 21 +++--- nifty/recipes/defaultConfig.cfg | 1 + setup.py | 8 +-- 6 files changed, 105 insertions(+), 22 deletions(-) diff --git a/nifty/pipeline/nifsPipeline.py b/nifty/pipeline/nifsPipeline.py index dec2ae2..5e8dd29 100644 --- a/nifty/pipeline/nifsPipeline.py +++ b/nifty/pipeline/nifsPipeline.py @@ -77,7 +77,7 @@ # The current version: # TODO(nat): fix this to import the version from setup.py. -__version__ = "1.0.0" +__version__ = "2.0.0" # The time when Nifty was started is: startTime = str(datetime.now()) @@ -101,7 +101,6 @@ def start(args): """ # Save starting path for later use and change one directory up. path = os.getcwd() - print "IT WORKED!" # Get paths to built-in Nifty data. Special code in setup.py makes sure recipes/ and # runtimeData/ will be installed when someone installs Nifty, and accessible in this way. RECIPES_PATH = pkg_resources.resource_filename('nifty', 'recipes/') @@ -127,9 +126,7 @@ def start(args): logging.info("# NIFTY #") logging.info("# NIFS Data Reduction Pipeline #") logging.info("# Version "+ __version__+ " #") - logging.info("# July 25th, 2017 #") - logging.info("# Marie Lemoine-Busserolle #") - logging.info("# Gemini Observatory, Hilo, Hawaii #") + logging.info("# 2020 #") logging.info("# #") logging.info("####################################\n") diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index 6965cfd..4c195b3 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -26,10 +26,11 @@ # STDLIB -import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re +import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections import numpy as np from xml.dom.minidom import parseString from pyraf import iraf +from astroquery.cadc import Cadc # LOCAL @@ -74,7 +75,7 @@ def interactiveNIFSInput(): """ - logging.info("\nWelcome to Nifty! The current mode is NIFS data reduction.\n\nPress enter to accept default data reduction options.") + logging.info("\nWelcome to Nifty! The current mode is NIFS data reduction.\n\nPress enter to accept default data reduction options. Type 'yes' or 'no' when prompted.") fullReduction = getParam( "Do a full data reduction with default parameters loaded from recipes/defaultConfig.cfg? [no]: ", @@ -196,7 +197,7 @@ def interactiveNIFSInput(): rawPath = getParam( "Path to raw files directory? []: ", "", - "An example of a valid raw files path string: \"/Users/nat/data/spaceMonster\"" + "An example of a valid raw files path string: \"/Users/nat/data/\"" ) program = getParam( "Gemini Program ID? []: ", @@ -207,9 +208,14 @@ def interactiveNIFSInput(): proprietaryCookie = getParam( "Cookie for proprietary downloads? []: ", '', - "You can provide a cookie from you Gemini public archive login session to automatically " + "You can provide a cookie from your Gemini public archive login session to automatically " + \ "download proprietary data." ) + cadc = getParam( + "Download from CADC? If no download will be from Gemini. [no]: ", + 'no', + "Automatic downloads can happen from either the Gemini Science Archive or the Canadian Astronomy Data Centre." + ) skyThreshold = getParam( "Sky threshold? [2.0]: ", 2.0, @@ -291,7 +297,7 @@ def interactiveNIFSInput(): # Some of these are disabled (for now!) because of bugs in interactive Pyraf tasks. # TODO(nat): when interactive is fixed re-enable this. # Temp fix: - hlineinter = getParam( + hLineInter = getParam( "Interative H-line removal? [no]: ", False, "WARNING: This is currently broken due to bugs in interactive PyRAF tasks. Use with caution." @@ -413,6 +419,7 @@ def interactiveNIFSInput(): config['sortConfig']['rawPath'] = rawPath config['sortConfig']['program'] = program config['sortConfig']['proprietaryCookie'] = proprietaryCookie + config['sortConfig']['cadc'] = cadc config['sortConfig']['skyThreshold'] = skyThreshold config['sortConfig']['sortTellurics'] = sortTellurics config['sortConfig']['telluricTimeThreshold'] = telluricTimeThreshold @@ -456,6 +463,21 @@ def interactiveNIFSInput(): config['mergeConfig']['use_pq_offsets'] = use_pq_offsets config['mergeConfig']['im3dtran'] = im3dtran + # Convert yes/no responses to True/False + def update(u): + for k, v in u.iteritems(): + if isinstance(v, collections.Mapping): + u[k] = update(u.get(k)) + else: + if u[k] == 'yes': + u[k] = True + elif u[k] == 'no': + u[k] = False + return u + + update(config) + + with open('./config.cfg', 'w') as outfile: config.write(outfile) @@ -1163,3 +1185,47 @@ def MEFarith(MEF, image, op, result): iraf.imarith(operand1=result+'['+str(i)+']', op=op, operand2 = image, result = result+'['+str(i)+', overwrite]', divzero = 0.0) #-----------------------------------------------------------------------------# + +def download_query_cadc(program, directory='./rawData'): + """ + Finds and downloads all CADC files for a particular gemini program ID to + the current working directory. + """ + + cadc = Cadc() + job = cadc.create_async("SELECT observationID, publisherID, productID FROM caom2.Observation \ + AS o JOIN caom2.Plane AS p ON o.obsID=p.obsID \ + WHERE instrument_name='NIFS' AND proposal_id={}".format("'"+program+"'")) + job.run().wait() + job.raise_if_error() + result = job.fetch_result().to_table() + + # Store product id's for later + pids = list(result['productID']) + + urls = cadc.get_data_urls(result) + for url, pid in zip(urls, pids): + try: + urllib.urlretrieve(url, directory+'/'+pid+'.fits') + logging.debug("Downloaded {}".format(directory+'/'+pid+'.fits')) + except Exception as e: + logging.error("A frame failed to download.") + raise e + + + + + + + + + + + + + + + + +#-----------------------------------------------------------------------------# + diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py index 6fe1d0c..82e8515 100644 --- a/nifty/pipeline/objectoriented/GetConfig.py +++ b/nifty/pipeline/objectoriented/GetConfig.py @@ -72,6 +72,8 @@ def makeConfig(self): self.parser.add_argument('-i', '--interactive', dest = 'interactive', default = False, action = 'store_true', help = 'Create a config.cfg file interactively.') # Ability to repeat the last data reduction self.parser.add_argument('-r', '--repeat', dest = 'repeat', default = False, action = 'store_true', help = 'Repeat the last data reduction, loading saved reduction parameters from runtimeData/config.cfg.') + # Specify where downloads come from; either Gemini or CADC. + self.parser.add_argument('-c', '--cadc', dest = 'cadc', default = False, action = 'store_true', help = 'Download raw data from Canadian Astronomy Data Centre rather than the Gemini Science Archive.') # Ability to load a built-in configuration file (recipe) self.parser.add_argument('-l', '--recipe', dest = 'recipe', action = 'store', help = 'Load data reduction parameters from the a provided recipe. Default is default_input.cfg.') # Ability to load your own configuration file @@ -85,6 +87,7 @@ def makeConfig(self): self.repeat = self.args.repeat self.fullReduction = self.args.fullReduction self.inputfile = self.args.inputfile + self.cadc = self.args.cadc if self.inputfile: # Load input from a .cfg file user specified at command line. @@ -121,3 +124,14 @@ def makeConfig(self): with open('./' + self.configFile, 'w') as self.outfile: self.config.write(self.outfile) logging.info("\nData reduction parameters for this reduction were copied from recipes/defaultConfig.cfg to ./config.cfg.") + + if self.cadc: + try: + with open('./' + self.configFile, 'r') as self.config_file: + self.config = ConfigObj(self.config_file, unrepr=True) + self.config['sortConfig']['cadc'] = self.cadc + with open('./' + self.configFile, 'w') as self.outfile: + self.config.write(self.outfile) + logging.debug("Set CADC flag in config file.") + except: + raise ValueError("Failed to set CADC download option.") diff --git a/nifty/pipeline/steps/nifsSort.py b/nifty/pipeline/steps/nifsSort.py index 97daa65..568a793 100644 --- a/nifty/pipeline/steps/nifsSort.py +++ b/nifty/pipeline/steps/nifsSort.py @@ -44,7 +44,7 @@ # TODO(nat): goodness, this is a lot of functions. It would be nice to split this up somehow. from ..nifsUtils import getUrlFiles, getFitsHeader, FitsKeyEntry, stripString, stripNumber, \ datefmt, checkOverCopy, checkQAPIreq, checkDate, writeList, checkEntry, timeCalc, checkSameLengthFlatLists, \ -rewriteSciImageList, datefmt +rewriteSciImageList, datefmt, download_query_cadc # Import NDMapper gemini data download, by James E.H. Turner. from ..downloadFromGeminiPublicArchive import download_query_gemini @@ -127,6 +127,7 @@ def start(): sortConfig = options['sortConfig'] rawPath = sortConfig['rawPath'] program = sortConfig['program'] + cadc = sortConfig['cadc'] proprietaryCookie = sortConfig['proprietaryCookie'] skyThreshold = sortConfig['skyThreshold'] sortTellurics = sortConfig['sortTellurics'] @@ -141,18 +142,22 @@ def start(): # Download data from gemini public archive to ./rawData/. if program: - url = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical' if not os.path.exists('./rawData'): os.mkdir('./rawData') - logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.') - logging.info('\nURL used for the download: \n' + str(url)) - if proprietaryCookie: - download_query_gemini(url, './rawData', proprietaryCookie) + if cadc: + logging.info('\nDownloading data from the CADC archive to ./rawData. This will take a few minutes.') + download_query_cadc(program, os.getcwd()+'/rawData') else: - download_query_gemini(url, './rawData') + url = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical' + logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.') + logging.info('\nURL used for the download: \n' + str(url)) + if proprietaryCookie: + download_query_gemini(url, './rawData', proprietaryCookie) + else: + download_query_gemini(url, './rawData') + rawPath = os.getcwd()+'/rawData' - ############################################################################ ############################################################################ # # diff --git a/nifty/recipes/defaultConfig.cfg b/nifty/recipes/defaultConfig.cfg index 6b4a931..5b6afe3 100644 --- a/nifty/recipes/defaultConfig.cfg +++ b/nifty/recipes/defaultConfig.cfg @@ -28,6 +28,7 @@ mergeMethod = '' [sortConfig] rawPath = '' program = '' +cadc=False proprietaryCookie = '' skyThreshold = 2.0 sortTellurics = True diff --git a/setup.py b/setup.py index 30fd4e5..679bb61 100644 --- a/setup.py +++ b/setup.py @@ -22,12 +22,12 @@ setup( name=NAME, - version="1.0.1", - author='mbusserolle', - author_email='mbussero@gemini.edu', + version="2.0.0", + author='ncomeau', + author_email='ncomeau@uvic.ca', description='Gemini Instruments Data Reduction Framework.', long_description = README_TEXT, - url='http://www.gemini.edu', + url='https://github.com/Nat1405/Nifty4Gemini', license='MIT', classifiers=[ 'Development Status :: 5 - Production/Stable', From 3e7ffeae118ab8cc800b535e8b4c8ebacd4b121f Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 7 May 2020 11:23:30 -0700 Subject: [PATCH 07/22] Adds support for downloads using requests. --- nifty/pipeline/nifsUtils.py | 14 ++++++++++++-- setup.py | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index 4c195b3..615250e 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -26,7 +26,7 @@ # STDLIB -import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections +import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests import numpy as np from xml.dom.minidom import parseString from pyraf import iraf @@ -1206,7 +1206,17 @@ def download_query_cadc(program, directory='./rawData'): urls = cadc.get_data_urls(result) for url, pid in zip(urls, pids): try: - urllib.urlretrieve(url, directory+'/'+pid+'.fits') + r = requests.get(url, stream=True) + # Parse out filename from header + filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0] + # Check that filename makes sense (ie starts with N and ends with .fits) + pattern = re.compile(r"N.*\.fits") + if not pattern.match(filename): + raise ValueError("Bad download filename.") + # Write the fits file + with open(directory+'/'+filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=128): + f.write(chunk) logging.debug("Downloaded {}".format(directory+'/'+pid+'.fits')) except Exception as e: logging.error("A frame failed to download.") diff --git a/setup.py b/setup.py index 679bb61..9c994c1 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ 'Topic :: Scientific/Engineering :: Physics', ], keywords='Gemini NIFS nifs pipeline reduction data IRAF iraf PYRAF pyraf astronomy integral field spectroscopy ifs ifu', - python_requires='~=2.7', + python_requires='<=2.7.17', scripts=SCRIPTS, # TODO(nat): Update this to use entry_points instead of scripts for better cross-platform performance packages=find_packages(), package_data=PACKAGE_DATA From 443662ea98c9b9bc1bfc67de2b9bed3023c05f66 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 7 May 2020 13:12:31 -0700 Subject: [PATCH 08/22] Splits out getting of file to new method. --- nifty/pipeline/nifsUtils.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index 615250e..6fa3b60 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -1206,24 +1206,27 @@ def download_query_cadc(program, directory='./rawData'): urls = cadc.get_data_urls(result) for url, pid in zip(urls, pids): try: - r = requests.get(url, stream=True) - # Parse out filename from header - filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0] - # Check that filename makes sense (ie starts with N and ends with .fits) - pattern = re.compile(r"N.*\.fits") - if not pattern.match(filename): - raise ValueError("Bad download filename.") - # Write the fits file - with open(directory+'/'+filename, 'wb') as f: - for chunk in r.iter_content(chunk_size=128): - f.write(chunk) - logging.debug("Downloaded {}".format(directory+'/'+pid+'.fits')) + filename = get_file(url) + shutil.move(filename, directory+'/'+filename) + logging.debug("Downloaded {}".format(filename)) except Exception as e: logging.error("A frame failed to download.") raise e - +def get_file(url): + """ + Gets a file from the specified url and returns the filename. + """ + r = requests.get(url, stream=True) + # Parse out filename from header + filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0] + # Write the fits file + with open(filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=128): + f.write(chunk) + + return filename From 0aae71e6f266c041caa3ae02f3034c76f3bbe54b Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 7 May 2020 13:15:23 -0700 Subject: [PATCH 09/22] Fixes function naming conventions to match original project. --- nifty/pipeline/nifsUtils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index 6fa3b60..acd3d21 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -1186,7 +1186,7 @@ def MEFarith(MEF, image, op, result): #-----------------------------------------------------------------------------# -def download_query_cadc(program, directory='./rawData'): +def downloadQueryCadc(program, directory='./rawData'): """ Finds and downloads all CADC files for a particular gemini program ID to the current working directory. @@ -1214,7 +1214,7 @@ def download_query_cadc(program, directory='./rawData'): raise e -def get_file(url): +def getFile(url): """ Gets a file from the specified url and returns the filename. """ From 22c3f62f73f2f4db3147e7fae1d6e20c439f1e4f Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 7 May 2020 13:41:22 -0700 Subject: [PATCH 10/22] Changes '-c' CADC flag to '-d/--data-source' option. Rather than just choosing between Gemini and CADC downloads, --data-source adds support for more than just two data sources. To add support for a new archive at minimum nifsSort.py needs changed. --- nifty/pipeline/nifsUtils.py | 10 +++++----- nifty/pipeline/objectoriented/GetConfig.py | 21 ++++++++++++++------- nifty/pipeline/steps/nifsSort.py | 12 +++++++----- nifty/recipes/defaultConfig.cfg | 2 +- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index acd3d21..1620e74 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -211,9 +211,9 @@ def interactiveNIFSInput(): "You can provide a cookie from your Gemini public archive login session to automatically " + \ "download proprietary data." ) - cadc = getParam( - "Download from CADC? If no download will be from Gemini. [no]: ", - 'no', + dataSource = getParam( + "Select a raw data source; 'GSA' for Gemini Science Archive, 'CADC' for Canadian Astronomy Data Centre. [GSA]: ", + 'GSA', "Automatic downloads can happen from either the Gemini Science Archive or the Canadian Astronomy Data Centre." ) skyThreshold = getParam( @@ -419,7 +419,7 @@ def interactiveNIFSInput(): config['sortConfig']['rawPath'] = rawPath config['sortConfig']['program'] = program config['sortConfig']['proprietaryCookie'] = proprietaryCookie - config['sortConfig']['cadc'] = cadc + config['sortConfig']['dataSource'] = dataSource config['sortConfig']['skyThreshold'] = skyThreshold config['sortConfig']['sortTellurics'] = sortTellurics config['sortConfig']['telluricTimeThreshold'] = telluricTimeThreshold @@ -1206,7 +1206,7 @@ def downloadQueryCadc(program, directory='./rawData'): urls = cadc.get_data_urls(result) for url, pid in zip(urls, pids): try: - filename = get_file(url) + filename = getFile(url) shutil.move(filename, directory+'/'+filename) logging.debug("Downloaded {}".format(filename)) except Exception as e: diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py index 82e8515..919710d 100644 --- a/nifty/pipeline/objectoriented/GetConfig.py +++ b/nifty/pipeline/objectoriented/GetConfig.py @@ -60,6 +60,14 @@ def __init__(self, args, script): self.makeConfig() + def checkConfigExists(self, configFile): + """ + Checks that a config file exists and if not, sets Nifty to use default configuration. + """ + if os.path.exists(configFile): + os.remove(configFile) + shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile) + def makeConfig(self): """ Make a configuration file. @@ -73,7 +81,7 @@ def makeConfig(self): # Ability to repeat the last data reduction self.parser.add_argument('-r', '--repeat', dest = 'repeat', default = False, action = 'store_true', help = 'Repeat the last data reduction, loading saved reduction parameters from runtimeData/config.cfg.') # Specify where downloads come from; either Gemini or CADC. - self.parser.add_argument('-c', '--cadc', dest = 'cadc', default = False, action = 'store_true', help = 'Download raw data from Canadian Astronomy Data Centre rather than the Gemini Science Archive.') + self.parser.add_argument('-d', '--data-source', dest = 'dataSource', default = 'GSA', action = 'store', help = 'Download raw data from the Canadian Astronomy Data Centre or the Gemini Science Archive. Valid options are "GSA" or "CADC".') # Ability to load a built-in configuration file (recipe) self.parser.add_argument('-l', '--recipe', dest = 'recipe', action = 'store', help = 'Load data reduction parameters from the a provided recipe. Default is default_input.cfg.') # Ability to load your own configuration file @@ -87,7 +95,7 @@ def makeConfig(self): self.repeat = self.args.repeat self.fullReduction = self.args.fullReduction self.inputfile = self.args.inputfile - self.cadc = self.args.cadc + self.dataSource = self.args.dataSource if self.inputfile: # Load input from a .cfg file user specified at command line. @@ -105,10 +113,7 @@ def makeConfig(self): self.fullReduction = interactiveNIFSInput() if self.fullReduction: - # Copy default input and use it - if os.path.exists('./' + self.configFile): - os.remove('./' + self.configFile) - shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', './'+ self.configFile) + self.checkConfigExists(self.configFile) # Update default config file with path to raw data or program ID. with open('./' + self.configFile, 'r') as self.config_file: self.config = ConfigObj(self.config_file, unrepr=True) @@ -125,8 +130,10 @@ def makeConfig(self): self.config.write(self.outfile) logging.info("\nData reduction parameters for this reduction were copied from recipes/defaultConfig.cfg to ./config.cfg.") - if self.cadc: + # If user selects a non-default data source, change it in the config file. + if self.dataSource != 'GSA': try: + self.checkConfigExists(self.configFile) with open('./' + self.configFile, 'r') as self.config_file: self.config = ConfigObj(self.config_file, unrepr=True) self.config['sortConfig']['cadc'] = self.cadc diff --git a/nifty/pipeline/steps/nifsSort.py b/nifty/pipeline/steps/nifsSort.py index 568a793..711b8eb 100644 --- a/nifty/pipeline/steps/nifsSort.py +++ b/nifty/pipeline/steps/nifsSort.py @@ -44,7 +44,7 @@ # TODO(nat): goodness, this is a lot of functions. It would be nice to split this up somehow. from ..nifsUtils import getUrlFiles, getFitsHeader, FitsKeyEntry, stripString, stripNumber, \ datefmt, checkOverCopy, checkQAPIreq, checkDate, writeList, checkEntry, timeCalc, checkSameLengthFlatLists, \ -rewriteSciImageList, datefmt, download_query_cadc +rewriteSciImageList, datefmt, downloadQueryCadc # Import NDMapper gemini data download, by James E.H. Turner. from ..downloadFromGeminiPublicArchive import download_query_gemini @@ -127,7 +127,7 @@ def start(): sortConfig = options['sortConfig'] rawPath = sortConfig['rawPath'] program = sortConfig['program'] - cadc = sortConfig['cadc'] + dataSource = sortConfig['dataSource'] proprietaryCookie = sortConfig['proprietaryCookie'] skyThreshold = sortConfig['skyThreshold'] sortTellurics = sortConfig['sortTellurics'] @@ -144,10 +144,10 @@ def start(): if program: if not os.path.exists('./rawData'): os.mkdir('./rawData') - if cadc: + if dataSource == 'CADC': logging.info('\nDownloading data from the CADC archive to ./rawData. This will take a few minutes.') - download_query_cadc(program, os.getcwd()+'/rawData') - else: + downloadQueryCadc(program, os.getcwd()+'/rawData') + elif dataSource == 'GSA': url = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical' logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.') logging.info('\nURL used for the download: \n' + str(url)) @@ -155,6 +155,8 @@ def start(): download_query_gemini(url, './rawData', proprietaryCookie) else: download_query_gemini(url, './rawData') + else: + raise ValueError("Invalid dataSource in config file.") rawPath = os.getcwd()+'/rawData' diff --git a/nifty/recipes/defaultConfig.cfg b/nifty/recipes/defaultConfig.cfg index 5b6afe3..94be885 100644 --- a/nifty/recipes/defaultConfig.cfg +++ b/nifty/recipes/defaultConfig.cfg @@ -28,7 +28,7 @@ mergeMethod = '' [sortConfig] rawPath = '' program = '' -cadc=False +dataSource='GSA' proprietaryCookie = '' skyThreshold = 2.0 sortTellurics = True From 6dd7cdd6bda77d2441f90fd06339295584e9bc1d Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 7 May 2020 14:18:22 -0700 Subject: [PATCH 11/22] Cleans up url construction code. --- .../ndmapperDownloader.py | 16 +++++++++++++--- nifty/pipeline/nifsUtils.py | 1 - nifty/pipeline/steps/nifsSort.py | 7 ++----- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/nifty/pipeline/downloadFromGeminiPublicArchive/ndmapperDownloader.py b/nifty/pipeline/downloadFromGeminiPublicArchive/ndmapperDownloader.py index 7a8bcbd..49342b6 100644 --- a/nifty/pipeline/downloadFromGeminiPublicArchive/ndmapperDownloader.py +++ b/nifty/pipeline/downloadFromGeminiPublicArchive/ndmapperDownloader.py @@ -31,6 +31,7 @@ import os, os.path import sys +import logging from contextlib import closing from StringIO import StringIO import urllib2 @@ -38,7 +39,7 @@ import tarfile import hashlib -def download_query_gemini(query, dirname='', cookieName=''): +def download_query_gemini(program, dirname='', cookieName=''): """ Perform a user-specified Gemini science archive query and save the files returned to a specified directory. @@ -54,16 +55,25 @@ def download_query_gemini(query, dirname='', cookieName=''): which should be optimal as long as the archive isn't unreasonably large (to do: consider adding an option to write it to a temporary file). + # Modified 2020 by Nat Comeau + Parameters ---------- - query : str - The query URL (or just the path component) to request from the server. + program : str + The Gemini program ID to request from the server. dirname : str, optional The (absolute or relative) directory path in which to place the files. """ + + # Modified 2020 by Nat Comeau + query = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical' + logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.') + logging.info('\nURL used for the download: \n' + str(query)) + + checksum_fn = 'md5sums.txt' aux_fn = [checksum_fn, 'README.txt'] diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index 1620e74..d7ee548 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -1206,7 +1206,6 @@ def downloadQueryCadc(program, directory='./rawData'): urls = cadc.get_data_urls(result) for url, pid in zip(urls, pids): try: - filename = getFile(url) shutil.move(filename, directory+'/'+filename) logging.debug("Downloaded {}".format(filename)) except Exception as e: diff --git a/nifty/pipeline/steps/nifsSort.py b/nifty/pipeline/steps/nifsSort.py index 711b8eb..e878a57 100644 --- a/nifty/pipeline/steps/nifsSort.py +++ b/nifty/pipeline/steps/nifsSort.py @@ -148,13 +148,10 @@ def start(): logging.info('\nDownloading data from the CADC archive to ./rawData. This will take a few minutes.') downloadQueryCadc(program, os.getcwd()+'/rawData') elif dataSource == 'GSA': - url = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical' - logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.') - logging.info('\nURL used for the download: \n' + str(url)) if proprietaryCookie: - download_query_gemini(url, './rawData', proprietaryCookie) + download_query_gemini(program, './rawData', proprietaryCookie) else: - download_query_gemini(url, './rawData') + download_query_gemini(program, './rawData') else: raise ValueError("Invalid dataSource in config file.") From ec027de263581daf9b5734539c1e6a59129980b3 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Fri, 8 May 2020 08:09:27 -0700 Subject: [PATCH 12/22] Fixes silly bugs introduced by 74a11e9. --- nifty/pipeline/nifsUtils.py | 1 + nifty/pipeline/objectoriented/GetConfig.py | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index d7ee548..1620e74 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -1206,6 +1206,7 @@ def downloadQueryCadc(program, directory='./rawData'): urls = cadc.get_data_urls(result) for url, pid in zip(urls, pids): try: + filename = getFile(url) shutil.move(filename, directory+'/'+filename) logging.debug("Downloaded {}".format(filename)) except Exception as e: diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py index 919710d..c761d43 100644 --- a/nifty/pipeline/objectoriented/GetConfig.py +++ b/nifty/pipeline/objectoriented/GetConfig.py @@ -64,10 +64,18 @@ def checkConfigExists(self, configFile): """ Checks that a config file exists and if not, sets Nifty to use default configuration. """ + if not os.path.exists(configFile): + shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile) + + def overwriteWithDefault(self, configFile): + """ + Overwrites with default configuration. + """ if os.path.exists(configFile): os.remove(configFile) shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile) + def makeConfig(self): """ Make a configuration file. @@ -113,7 +121,7 @@ def makeConfig(self): self.fullReduction = interactiveNIFSInput() if self.fullReduction: - self.checkConfigExists(self.configFile) + self.overwriteWithDefault(self.configFile) # Update default config file with path to raw data or program ID. with open('./' + self.configFile, 'r') as self.config_file: self.config = ConfigObj(self.config_file, unrepr=True) @@ -136,9 +144,9 @@ def makeConfig(self): self.checkConfigExists(self.configFile) with open('./' + self.configFile, 'r') as self.config_file: self.config = ConfigObj(self.config_file, unrepr=True) - self.config['sortConfig']['cadc'] = self.cadc + self.config['sortConfig']['dataSource'] = self.dataSource with open('./' + self.configFile, 'w') as self.outfile: self.config.write(self.outfile) - logging.debug("Set CADC flag in config file.") + logging.debug("Set dataSource option in config file.") except: - raise ValueError("Failed to set CADC download option.") + raise ValueError("Failed to set dataSource option.") From 42f70dfa4ad3de71e486e8ca764e7c301e691b08 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Fri, 8 May 2020 09:04:15 -0700 Subject: [PATCH 13/22] Adds better error handling to get_file(). --- nifty/pipeline/nifsUtils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index 1620e74..4c3e474 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -1220,7 +1220,13 @@ def getFile(url): """ r = requests.get(url, stream=True) # Parse out filename from header - filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0] + try: + filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0] + except KeyError: + # 'Content-Disposition' header wasn't found, so parse filename from URL + # Typical URL looks like: + # https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/data/pub/GEM/N20140505S0114.fits?RUNID=mf731ukqsipqpdgk + filename = (url.split('/')[-1]).split('?')[0] # Write the fits file with open(filename, 'wb') as f: for chunk in r.iter_content(chunk_size=128): From 453c787490cf8b00495cd7e3eeccf25587700e39 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Mon, 11 May 2020 16:13:32 -0700 Subject: [PATCH 14/22] Fixes silly bugs in d5ead22e7. There were problems with finding the config file. --- nifty/pipeline/objectoriented/GetConfig.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py index c761d43..14a3788 100644 --- a/nifty/pipeline/objectoriented/GetConfig.py +++ b/nifty/pipeline/objectoriented/GetConfig.py @@ -73,7 +73,7 @@ def overwriteWithDefault(self, configFile): """ if os.path.exists(configFile): os.remove(configFile) - shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile) + shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile) def makeConfig(self): From e006513898aa377eea6e810df73ac6a3f965352d Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 14 May 2020 08:27:10 -0700 Subject: [PATCH 15/22] Fixes path seperators (wasn't using os.path.join) in GetConfig.py. --- nifty/pipeline/objectoriented/GetConfig.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py index 14a3788..ad06b76 100644 --- a/nifty/pipeline/objectoriented/GetConfig.py +++ b/nifty/pipeline/objectoriented/GetConfig.py @@ -65,7 +65,7 @@ def checkConfigExists(self, configFile): Checks that a config file exists and if not, sets Nifty to use default configuration. """ if not os.path.exists(configFile): - shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile) + shutil.copy(os.path.join(self.RECIPES_PATH,'defaultConfig.cfg'), configFile) def overwriteWithDefault(self, configFile): """ @@ -73,7 +73,7 @@ def overwriteWithDefault(self, configFile): """ if os.path.exists(configFile): os.remove(configFile) - shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile) + shutil.copy(os.path.join(self.RECIPES_PATH,'defaultConfig.cfg'), configFile) def makeConfig(self): @@ -107,9 +107,9 @@ def makeConfig(self): if self.inputfile: # Load input from a .cfg file user specified at command line. - if self.inputfile != self.configFile and os.path.exists('./'+ self.configFile): - os.remove('./'+ self.configFile) - shutil.copy(self.inputfile, './'+ self.configFile) + if self.inputfile != self.configFile and os.path.exists(self.configFile): + os.remove(self.configFile) + shutil.copy(self.inputfile, self.configFile) logging.info("\nPipeline configuration for this data reduction was read from " + str(self.inputfile) + \ ", and if not named config.cfg, copied to ./config.cfg.") @@ -123,7 +123,7 @@ def makeConfig(self): if self.fullReduction: self.overwriteWithDefault(self.configFile) # Update default config file with path to raw data or program ID. - with open('./' + self.configFile, 'r') as self.config_file: + with open(self.configFile, 'r') as self.config_file: self.config = ConfigObj(self.config_file, unrepr=True) self.sortConfig = self.config['sortConfig'] if self.fullReduction[0] == "G": @@ -134,7 +134,7 @@ def makeConfig(self): # Else treat it as a path. self.sortConfig['program'] = "" self.sortConfig['rawPath'] = self.fullReduction - with open('./' + self.configFile, 'w') as self.outfile: + with open(self.configFile, 'w') as self.outfile: self.config.write(self.outfile) logging.info("\nData reduction parameters for this reduction were copied from recipes/defaultConfig.cfg to ./config.cfg.") @@ -142,10 +142,10 @@ def makeConfig(self): if self.dataSource != 'GSA': try: self.checkConfigExists(self.configFile) - with open('./' + self.configFile, 'r') as self.config_file: + with open(self.configFile, 'r') as self.config_file: self.config = ConfigObj(self.config_file, unrepr=True) self.config['sortConfig']['dataSource'] = self.dataSource - with open('./' + self.configFile, 'w') as self.outfile: + with open(self.configFile, 'w') as self.outfile: self.config.write(self.outfile) logging.debug("Set dataSource option in config file.") except: From 061fd560f710d559a8511eda4d472ecba716c1dc Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 14 May 2020 08:52:19 -0700 Subject: [PATCH 16/22] Sends downloaded CADC files to a temp location first. --- nifty/pipeline/nifsUtils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index 4c3e474..bb036b5 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -1207,8 +1207,8 @@ def downloadQueryCadc(program, directory='./rawData'): for url, pid in zip(urls, pids): try: filename = getFile(url) - shutil.move(filename, directory+'/'+filename) - logging.debug("Downloaded {}".format(filename)) + shutil.move(filename, os.path.join(directory, filename.lstrip('.temp-'))) + logging.debug("Downloaded {}".format(filename.lstrip('.temp-'))) except Exception as e: logging.error("A frame failed to download.") raise e @@ -1221,12 +1221,12 @@ def getFile(url): r = requests.get(url, stream=True) # Parse out filename from header try: - filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0] + filename = '.temp-' + re.findall("filename=(.+)", r.headers['Content-Disposition'])[0] except KeyError: # 'Content-Disposition' header wasn't found, so parse filename from URL # Typical URL looks like: # https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/data/pub/GEM/N20140505S0114.fits?RUNID=mf731ukqsipqpdgk - filename = (url.split('/')[-1]).split('?')[0] + filename = '.temp-' + (url.split('/')[-1]).split('?')[0] # Write the fits file with open(filename, 'wb') as f: for chunk in r.iter_content(chunk_size=128): From fa13680b2e7606b983177bc9bff1836798f0a431 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 14 May 2020 10:13:10 -0700 Subject: [PATCH 17/22] Adds CADC downloads MD5 verification. --- nifty/pipeline/nifsUtils.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index bb036b5..f693270 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -26,7 +26,7 @@ # STDLIB -import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests +import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests, hashlib import numpy as np from xml.dom.minidom import parseString from pyraf import iraf @@ -1232,6 +1232,19 @@ def getFile(url): for chunk in r.iter_content(chunk_size=128): f.write(chunk) + # Do MD5 Verification of the file; raise IO error if a problem happened. + try: + server_checksum = r.headers['Content-MD5'] + with open(filename, 'rb') as f: + download_checksum = hashlib.md5(f.read()).hexdigest() + if server_checksum != download_checksum: + logging.error("Problem downloading {} from {}.".format(filename, url)) + raise IOError + + except KeyError: + # Catch case that header didn't contain a 'content-md5' header + logging.warning("'Content-MD5 header not found for file {}. Skipping checksum validation.") + return filename From ba4b95cfc933617f968f6df8f7e7a0349d3d8a6c Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 14 May 2020 10:28:52 -0700 Subject: [PATCH 18/22] Changes -d/--data-source flag to -s/--data-source flag --- nifty/pipeline/objectoriented/GetConfig.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py index ad06b76..c6f1166 100644 --- a/nifty/pipeline/objectoriented/GetConfig.py +++ b/nifty/pipeline/objectoriented/GetConfig.py @@ -89,7 +89,7 @@ def makeConfig(self): # Ability to repeat the last data reduction self.parser.add_argument('-r', '--repeat', dest = 'repeat', default = False, action = 'store_true', help = 'Repeat the last data reduction, loading saved reduction parameters from runtimeData/config.cfg.') # Specify where downloads come from; either Gemini or CADC. - self.parser.add_argument('-d', '--data-source', dest = 'dataSource', default = 'GSA', action = 'store', help = 'Download raw data from the Canadian Astronomy Data Centre or the Gemini Science Archive. Valid options are "GSA" or "CADC".') + self.parser.add_argument('-s', '--data-source', dest = 'dataSource', default = 'GSA', action = 'store', help = 'Download raw data from the Canadian Astronomy Data Centre or the Gemini Science Archive. Valid options are "GSA" or "CADC".') # Ability to load a built-in configuration file (recipe) self.parser.add_argument('-l', '--recipe', dest = 'recipe', action = 'store', help = 'Load data reduction parameters from the a provided recipe. Default is default_input.cfg.') # Ability to load your own configuration file From 7e57b99df5278818d62f48a906785375e2da4236 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 14 May 2020 11:00:54 -0700 Subject: [PATCH 19/22] Moves temp files to download directory. --- nifty/pipeline/nifsUtils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index f693270..9598a33 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -1204,14 +1204,18 @@ def downloadQueryCadc(program, directory='./rawData'): pids = list(result['productID']) urls = cadc.get_data_urls(result) + cwd = os.getcwd() + os.chdir(directory) for url, pid in zip(urls, pids): try: filename = getFile(url) - shutil.move(filename, os.path.join(directory, filename.lstrip('.temp-'))) + shutil.move(filename, filename.lstrip('.temp-')) logging.debug("Downloaded {}".format(filename.lstrip('.temp-'))) except Exception as e: logging.error("A frame failed to download.") + os.chdir(cwd) raise e + os.chdir(cwd) def getFile(url): From ad6d231bbe381c9eebad0a5a07df00a0e5810851 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 14 May 2020 14:44:45 -0700 Subject: [PATCH 20/22] Changes md5 verification to happen at the same time as downloads. --- nifty/pipeline/nifsUtils.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index 9598a33..aa77c90 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -1231,17 +1231,17 @@ def getFile(url): # Typical URL looks like: # https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/data/pub/GEM/N20140505S0114.fits?RUNID=mf731ukqsipqpdgk filename = '.temp-' + (url.split('/')[-1]).split('?')[0] - # Write the fits file - with open(filename, 'wb') as f: - for chunk in r.iter_content(chunk_size=128): - f.write(chunk) - - # Do MD5 Verification of the file; raise IO error if a problem happened. + + # Write the fits file, verifying the md5 hash as we go try: server_checksum = r.headers['Content-MD5'] - with open(filename, 'rb') as f: - download_checksum = hashlib.md5(f.read()).hexdigest() - if server_checksum != download_checksum: + download_checksum = hashlib.md5() + with open(filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=128): + f.write(chunk) + download_checksum.update(chunk) + + if server_checksum != download_checksum.hexdigest(): logging.error("Problem downloading {} from {}.".format(filename, url)) raise IOError From 79a8f5972e45caebb3ff40574ad94d7b961816d4 Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Thu, 14 May 2020 15:36:45 -0700 Subject: [PATCH 21/22] Makes CADC downloads use temp files (via tempfile.TemporaryFile). --- nifty/pipeline/nifsUtils.py | 56 ++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py index aa77c90..bd49ceb 100644 --- a/nifty/pipeline/nifsUtils.py +++ b/nifty/pipeline/nifsUtils.py @@ -26,7 +26,7 @@ # STDLIB -import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests, hashlib +import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests, hashlib, tempfile import numpy as np from xml.dom.minidom import parseString from pyraf import iraf @@ -1209,8 +1209,7 @@ def downloadQueryCadc(program, directory='./rawData'): for url, pid in zip(urls, pids): try: filename = getFile(url) - shutil.move(filename, filename.lstrip('.temp-')) - logging.debug("Downloaded {}".format(filename.lstrip('.temp-'))) + logging.debug("Downloaded {}".format(filename)) except Exception as e: logging.error("A frame failed to download.") os.chdir(cwd) @@ -1225,38 +1224,45 @@ def getFile(url): r = requests.get(url, stream=True) # Parse out filename from header try: - filename = '.temp-' + re.findall("filename=(.+)", r.headers['Content-Disposition'])[0] + filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0] except KeyError: # 'Content-Disposition' header wasn't found, so parse filename from URL # Typical URL looks like: # https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/data/pub/GEM/N20140505S0114.fits?RUNID=mf731ukqsipqpdgk - filename = '.temp-' + (url.split('/')[-1]).split('?')[0] + filename = (url.split('/')[-1]).split('?')[0] - # Write the fits file, verifying the md5 hash as we go - try: - server_checksum = r.headers['Content-MD5'] - download_checksum = hashlib.md5() - with open(filename, 'wb') as f: - for chunk in r.iter_content(chunk_size=128): - f.write(chunk) - download_checksum.update(chunk) - - if server_checksum != download_checksum.hexdigest(): - logging.error("Problem downloading {} from {}.".format(filename, url)) - raise IOError - - except KeyError: - # Catch case that header didn't contain a 'content-md5' header - logging.warning("'Content-MD5 header not found for file {}. Skipping checksum validation.") + # Write the fits file to the current directory, verifying the md5 hash as we go. Store partial results in a temporary file. + writeWithTempFile(r, filename) return filename +def writeWithTempFile(request, filename): + """ Write the fits file, verifying the md5 hash as we go. Store partial results in a temporary file. """ + temp_downloads_path = '.temp-downloads' + if not os.path.exists(temp_downloads_path): + os.mkdir(temp_downloads_path) + try: + server_checksum = request.headers['Content-MD5'] + except KeyError: + # Catch case that header didn't contain a 'content-md5' header + logging.warning("Content-MD5 header not found for file {}. Skipping checksum validation.".format(filename)) + server_checksum = None + + # Write out content (first to a temp file) optionally doing an md5 verification. + download_checksum = hashlib.md5() + with tempfile.TemporaryFile(mode='w+b', prefix=filename, dir=temp_downloads_path) as f: + for chunk in request.iter_content(chunk_size=128): + f.write(chunk) + download_checksum.update(chunk) + if server_checksum and (server_checksum != download_checksum.hexdigest()): + logging.error("Problem downloading {} from {}.".format(filename, url)) + raise IOError + f.seek(0) + with open(filename, 'w') as out_fp: + out_fp.write(f.read()) - - - - + return filename From f788b67b7d41b078bdb98d32f7d8e9b90d2877ff Mon Sep 17 00:00:00 2001 From: Nat1405 Date: Mon, 8 Jun 2020 12:56:45 -0700 Subject: [PATCH 22/22] Adds backwards compatibility for old config files for dataSource option. --- nifty/pipeline/steps/nifsSort.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nifty/pipeline/steps/nifsSort.py b/nifty/pipeline/steps/nifsSort.py index e878a57..73c9327 100644 --- a/nifty/pipeline/steps/nifsSort.py +++ b/nifty/pipeline/steps/nifsSort.py @@ -127,7 +127,11 @@ def start(): sortConfig = options['sortConfig'] rawPath = sortConfig['rawPath'] program = sortConfig['program'] - dataSource = sortConfig['dataSource'] + # Backwards compatability with old config files + try: + dataSource = sortConfig['dataSource'] + except KeyError: + dataSource = 'GSA' proprietaryCookie = sortConfig['proprietaryCookie'] skyThreshold = sortConfig['skyThreshold'] sortTellurics = sortConfig['sortTellurics']