From 6b5b38c8e7076daca7bde454b8855bd6f44ba420 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Mon, 9 Mar 2020 10:42:23 -0700
Subject: [PATCH 01/22] Fixed mispelled module name

---
 nifty/pipeline/nifsLowMemoryPipeline.py | 2 +-
 nifty/pipeline/nifsPipeline.py          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/nifty/pipeline/nifsLowMemoryPipeline.py b/nifty/pipeline/nifsLowMemoryPipeline.py
index 247c94f..90c2c5a 100644
--- a/nifty/pipeline/nifsLowMemoryPipeline.py
+++ b/nifty/pipeline/nifsLowMemoryPipeline.py
@@ -42,7 +42,7 @@
 # Import config parsing.
 # Import config parsing.
 from configobj.configobj import ConfigObj
-from objectoriented.getConfig import GetConfig
+from objectoriented.GetConfig import GetConfig
 # Import custom Nifty functions.
 from nifsUtils import datefmt, printDirectoryLists, writeList, getParam, interactiveNIFSInput
 
diff --git a/nifty/pipeline/nifsPipeline.py b/nifty/pipeline/nifsPipeline.py
index 4fdd110..dec2ae2 100644
--- a/nifty/pipeline/nifsPipeline.py
+++ b/nifty/pipeline/nifsPipeline.py
@@ -46,7 +46,7 @@
 # Import configuration file parsing.
 from configobj.configobj import ConfigObj
 # Import custom pipeline setup Class.
-from objectoriented.getConfig import GetConfig
+from objectoriented.GetConfig import GetConfig
 # Conveniently import some utility functions so we don't have to type the full name.
 from nifsUtils import datefmt, printDirectoryLists, writeList, getParam, interactiveNIFSInput
 

From b70612ef24a8b8807f525be785fad4bbd979dfe9 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Wed, 11 Mar 2020 20:35:22 -0700
Subject: [PATCH 02/22] Experimental fix for sorting problem.

---
 nifty/pipeline/steps/nifsSort.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/nifty/pipeline/steps/nifsSort.py b/nifty/pipeline/steps/nifsSort.py
index e6f6ccc..97daa65 100644
--- a/nifty/pipeline/steps/nifsSort.py
+++ b/nifty/pipeline/steps/nifsSort.py
@@ -53,7 +53,7 @@
 # Paths to Nifty data.
 RECIPES_PATH = pkg_resources.resource_filename('nifty', 'recipes/')
 RUNTIME_DATA_PATH = pkg_resources.resource_filename('nifty', 'runtimeData/')
-
+    
 
 def start():
     """
@@ -423,12 +423,15 @@ def makePythonLists(rawPath, skyThreshold):
         date = header[0].header['DATE'].replace('-','')
         # Make sure no duplicate dates are being entered.
         if flatlist.index(flat)==0 or not oldobsid==obsid:
-            if date in sciDateList:
-                list1 = [date, obsid]
-            else:
-                list1 = [sciDateList[n], obsid]
+            #if date in sciDateList:
+            list1 = [date, obsid]
             obsidDateList.append(list1)
-            n+=1
+            #else:
+                # Ugly fix, we have to check there aren't more flats than science dates.
+            #    if n < len(sciDateList):
+            #        list1 = [sciDateList[n], obsid]
+             #       obsidDateList.append(list1)
+            #n+=1
         oldobsid = obsid
 
     os.chdir(path)
@@ -848,11 +851,11 @@ def sortCalibrations(arcdarklist, arclist, flatlist, flatdarklist, ronchilist, o
                                     if path1+'/'+entry[0]+'/'+entry[1]+'/Calibrations_'+grating not in calDirList:
                                         calDirList.append(path1+'/'+entry[0]+'/'+entry[1]+'/Calibrations_'+grating)
                                 # Copy lamps on flats to appropriate directory.
-                                shutil.copy('./'+flatlist[i][0], objDir+'/Calibrations_'+grating+'/')
+                                shutil.copy('./'+flatlist[i][0], path1+'/'+entry[0]+'/'+entry[1]+'/Calibrations_'+grating)
                                 flatlist[i][1] = 0
                                 logging.info(flatlist[i][0])
                                 count += 1
-                                path = objDir+'/Calibrations_'+grating+'/'
+                                path = path1+'/'+entry[0]+'/'+entry[1]+'/Calibrations_'+grating+'/'
                                 # Create a flatlist in the relevent directory.
                                 # Create a text file called flatlist to store the names of the
                                 # lamps on flats for later use by the pipeline.

From 3f67ce352508f85f029a0859e589eedae0c9ca95 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Wed, 11 Mar 2020 22:16:55 -0700
Subject: [PATCH 03/22] Fixed some non-standard wavelength crashes.

---
 .../pipeline/steps/nifsBaselineCalibration.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/nifty/pipeline/steps/nifsBaselineCalibration.py b/nifty/pipeline/steps/nifsBaselineCalibration.py
index a95b518..bcb2430 100755
--- a/nifty/pipeline/steps/nifsBaselineCalibration.py
+++ b/nifty/pipeline/steps/nifsBaselineCalibration.py
@@ -28,7 +28,7 @@
 
 # STDLIB
 
-import logging, os, pkg_resources, glob, shutil
+import logging, os, pkg_resources, glob, shutil, sys
 import astropy.io.fits
 from pyraf import iraf, iraffunctions
 
@@ -648,17 +648,18 @@ def makeWaveCal(arclist, arc, arcdarklist, arcdark, grating, log, over, path):
 
     # Set interactive mode. Default False for standard configurations (and True for non-standard wavelength configurations ).
     pauseFlag = False
+    interative = 'no'
 
     if band == "K" and central_wavelength == 2.20:
         clist=RUNTIME_DATA_PATH+"k_ar.dat"
         my_thresh = 50.0
-    elif band == "J":
+    elif band == "J" and central_wavelength == 1.25:
         clist=RUNTIME_DATA_PATH+"j_ar.dat"
         my_thresh=100.0
-    elif band == "H":
+    elif band == "H" and central_wavelength == 1.65:
         clist=RUNTIME_DATA_PATH+"h_ar.dat"
         my_thresh=100.0
-    elif band == "Z":
+    elif band == "Z" and central_wavelength == 1.05:
         clist="nifs$data/ArXe_Z.dat"
         my_thresh=100.0
     else:
@@ -676,6 +677,7 @@ def makeWaveCal(arclist, arc, arcdarklist, arcdark, grating, log, over, path):
         clist="gnirs$data/argon.dat"
         my_thresh=100.0
         interactive = 'yes'
+        pauseFlag = True
 
     # TODO(nat): I don't like this nesting at all
     if not pauseFlag:
@@ -686,19 +688,18 @@ def makeWaveCal(arclist, arc, arcdarklist, arcdark, grating, log, over, path):
             if over:
                 iraf.delete("wrgn"+arc+".fits")
                 iraf.nswavelength("rgn"+arc, coordli=clist, nsum=10, thresho=my_thresh, \
-                                  trace='yes', fwidth=2.0, match=-6,cradius=8.0,fl_inter='no',nfound=10,nlost=10, \
+                                  trace='yes', fwidth=2.0, match=-6,cradius=8.0,fl_inter=interactive,nfound=10,nlost=10, \
                                   logfile=log)
             else:
                 print "\nOutput file exists and -over not set - ",\
                 "not determining wavelength solution and recreating the wavelength reference arc.\n"
         else:
             iraf.nswavelength("rgn"+arc, coordli=clist, nsum=10, thresho=my_thresh, \
-                              trace='yes', fwidth=2.0, match=-6,cradius=8.0,fl_inter='no',nfound=10,nlost=10, \
+                              trace='yes', fwidth=2.0, match=-6,cradius=8.0,fl_inter=interactive,nfound=10,nlost=10, \
                               logfile=log)
     else:
-        a = raw_input("For now, interactive Z or non-standard wavelength calibrations are unsupported. " + \
-        "Bugs running IRAF tasks interactively from python mean iraf.nswavelength cannot be activated automatically. " + \
-        "Therefore please run iraf.nswavelength() interactively from Pyraf to do a wavelength calibration by hand.")
+        print "ERROR: For now, only some wavelength configurations are supported. The grating/central wavelength(microns) possibilities are Z/1.05, J/1.25, H/1.65, K/2.20."
+        sys.exit(1)
 
     # Copy to relevant science observation/calibrations/ directories
     for item in glob.glob('database/idwrgn*'):

From 40f63733572f08fe77ca5cad4307b9f86c1cdd99 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Sat, 21 Mar 2020 21:51:27 -0700
Subject: [PATCH 04/22] Added exception handlers in nifsMerge.

Bad information in the config.cfg (a science directory that didn't exist) was causing nifsMerge to fail. I added better exception handling so hint to users why it is failing.
---
 nifty/pipeline/steps/nifsMerge.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/nifty/pipeline/steps/nifsMerge.py b/nifty/pipeline/steps/nifsMerge.py
index bdd3900..56e9475 100644
--- a/nifty/pipeline/steps/nifsMerge.py
+++ b/nifty/pipeline/steps/nifsMerge.py
@@ -226,6 +226,9 @@ def mergeCubes(obsDirList, cubeType, mergeType, use_pq_offsets, im3dtran, over="
         # temp3 == ('/Users/ncomeau/research/newer-nifty/hd165459', '20160705')
         # temp4 == ('/Users/ncomeau/research/newer-nifty', 'hd165459')
 
+        if not obsDir:
+            raise ValueError("nifsMerge: There was a problem with the science directory list.")
+
         # TODO: make this clearer.
         temp1 = os.path.split(obsDir)
         temp2 = os.path.split(temp1[0])
@@ -236,7 +239,10 @@ def mergeCubes(obsDirList, cubeType, mergeType, use_pq_offsets, im3dtran, over="
         obsid = temp1[1]
         obsPath = temp3[0]
         targetDirectory = temp4[0]
-        os.chdir(obsDir + '/'+unmergedDirectory)
+        try:
+            os.chdir(obsDir + '/'+unmergedDirectory)
+        except OSError:
+            raise OSError("nifsMerge: a science directory didn't exist.")
 
         obsidlist.append(obsPath+'/Merged'+suffix+'/'+date+'_'+obsid)
 

From 8f12c2f47cf90609740edfbbc07f47dd4f32ebec Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Wed, 6 May 2020 10:52:14 -0700
Subject: [PATCH 05/22] Fixes misspelled variable

---
 nifty/pipeline/steps/nifsBaselineCalibration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nifty/pipeline/steps/nifsBaselineCalibration.py b/nifty/pipeline/steps/nifsBaselineCalibration.py
index bcb2430..8b42b22 100755
--- a/nifty/pipeline/steps/nifsBaselineCalibration.py
+++ b/nifty/pipeline/steps/nifsBaselineCalibration.py
@@ -648,7 +648,7 @@ def makeWaveCal(arclist, arc, arcdarklist, arcdark, grating, log, over, path):
 
     # Set interactive mode. Default False for standard configurations (and True for non-standard wavelength configurations ).
     pauseFlag = False
-    interative = 'no'
+    interactive = 'no'
 
     if band == "K" and central_wavelength == 2.20:
         clist=RUNTIME_DATA_PATH+"k_ar.dat"

From 8c59195985eab17af54bad5615b1924310ebac1e Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 7 May 2020 08:52:57 -0700
Subject: [PATCH 06/22] Adds CADC download option, plus fixes bugs.

Users can now specify -c (eg; runNifty nifsPipeline -c ...)
to download raw data from the Canadian Astronomy Data Centre.
This has been tested to work from the interactive config session
(runNifty nifsPipeline -i) and the fully automatic mode
(runNifty nifsPipeline -c -f <PROGRAM ID>).
---
 nifty/pipeline/nifsPipeline.py             |  7 +-
 nifty/pipeline/nifsUtils.py                | 76 ++++++++++++++++++++--
 nifty/pipeline/objectoriented/GetConfig.py | 14 ++++
 nifty/pipeline/steps/nifsSort.py           | 21 +++---
 nifty/recipes/defaultConfig.cfg            |  1 +
 setup.py                                   |  8 +--
 6 files changed, 105 insertions(+), 22 deletions(-)

diff --git a/nifty/pipeline/nifsPipeline.py b/nifty/pipeline/nifsPipeline.py
index dec2ae2..5e8dd29 100644
--- a/nifty/pipeline/nifsPipeline.py
+++ b/nifty/pipeline/nifsPipeline.py
@@ -77,7 +77,7 @@
 
 # The current version:
 # TODO(nat): fix this to import the version from setup.py.
-__version__ = "1.0.0"
+__version__ = "2.0.0"
 
 # The time when Nifty was started is:
 startTime = str(datetime.now())
@@ -101,7 +101,6 @@ def start(args):
     """
     # Save starting path for later use and change one directory up.
     path = os.getcwd()
-    print "IT WORKED!"
     # Get paths to built-in Nifty data. Special code in setup.py makes sure recipes/ and
     # runtimeData/ will be installed when someone installs Nifty, and accessible in this way.
     RECIPES_PATH = pkg_resources.resource_filename('nifty', 'recipes/')
@@ -127,9 +126,7 @@ def start(args):
     logging.info("#             NIFTY                #")
     logging.info("#   NIFS Data Reduction Pipeline   #")
     logging.info("#         Version "+ __version__+ "            #")
-    logging.info("#         July 25th, 2017          #")
-    logging.info("#     Marie Lemoine-Busserolle     #")
-    logging.info("# Gemini Observatory, Hilo, Hawaii #")
+    logging.info("#              2020                #")
     logging.info("#                                  #")
     logging.info("####################################\n")
 
diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index 6965cfd..4c195b3 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -26,10 +26,11 @@
 
 # STDLIB
 
-import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re
+import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections
 import numpy as np
 from xml.dom.minidom import parseString
 from pyraf import iraf
+from astroquery.cadc import Cadc
 
 # LOCAL
 
@@ -74,7 +75,7 @@ def interactiveNIFSInput():
 
     """
 
-    logging.info("\nWelcome to Nifty! The current mode is NIFS data reduction.\n\nPress enter to accept default data reduction options.")
+    logging.info("\nWelcome to Nifty! The current mode is NIFS data reduction.\n\nPress enter to accept default data reduction options. Type 'yes' or 'no' when prompted.")
 
     fullReduction = getParam(
                 "Do a full data reduction with default parameters loaded from recipes/defaultConfig.cfg? [no]: ",
@@ -196,7 +197,7 @@ def interactiveNIFSInput():
         rawPath = getParam(
         "Path to raw files directory? []: ",
         "",
-        "An example of a valid raw files path string: \"/Users/nat/data/spaceMonster\""
+        "An example of a valid raw files path string: \"/Users/nat/data/\""
         )
         program = getParam(
         "Gemini Program ID? []: ",
@@ -207,9 +208,14 @@ def interactiveNIFSInput():
         proprietaryCookie = getParam(
         "Cookie for proprietary downloads? []: ",
         '',
-        "You can provide a cookie from you Gemini public archive login session to automatically "
+        "You can provide a cookie from your Gemini public archive login session to automatically " + \
         "download proprietary data."
         )
+        cadc = getParam(
+        "Download from CADC? If no download will be from Gemini. [no]: ",
+        'no',
+        "Automatic downloads can happen from either the Gemini Science Archive or the Canadian Astronomy Data Centre."
+        )
         skyThreshold = getParam(
         "Sky threshold? [2.0]: ",
         2.0,
@@ -291,7 +297,7 @@ def interactiveNIFSInput():
         # Some of these are disabled (for now!) because of bugs in interactive Pyraf tasks.
         # TODO(nat): when interactive is fixed re-enable this.
         # Temp fix:
-        hlineinter = getParam(
+        hLineInter = getParam(
         "Interative H-line removal? [no]: ",
         False,
         "WARNING: This is currently broken due to bugs in interactive PyRAF tasks. Use with caution."
@@ -413,6 +419,7 @@ def interactiveNIFSInput():
         config['sortConfig']['rawPath'] = rawPath
         config['sortConfig']['program'] = program
         config['sortConfig']['proprietaryCookie'] = proprietaryCookie
+        config['sortConfig']['cadc'] = cadc
         config['sortConfig']['skyThreshold'] = skyThreshold
         config['sortConfig']['sortTellurics'] = sortTellurics
         config['sortConfig']['telluricTimeThreshold'] = telluricTimeThreshold
@@ -456,6 +463,21 @@ def interactiveNIFSInput():
         config['mergeConfig']['use_pq_offsets'] = use_pq_offsets
         config['mergeConfig']['im3dtran'] = im3dtran
 
+        # Convert yes/no responses to True/False
+        def update(u):
+            for k, v in u.iteritems():
+                if isinstance(v, collections.Mapping):
+                    u[k] = update(u.get(k))
+                else:
+                    if u[k] == 'yes':
+                        u[k] = True
+                    elif u[k] == 'no':
+                        u[k] = False
+            return u
+
+        update(config)
+
+
         with open('./config.cfg', 'w') as outfile:
             config.write(outfile)
 
@@ -1163,3 +1185,47 @@ def MEFarith(MEF, image, op, result):
             iraf.imarith(operand1=result+'['+str(i)+']', op=op, operand2 = image, result = result+'['+str(i)+', overwrite]', divzero = 0.0)
 
 #-----------------------------------------------------------------------------#
+
+def download_query_cadc(program, directory='./rawData'):
+    """
+    Finds and downloads all CADC files for a particular gemini program ID to
+    the current working directory.
+    """
+
+    cadc = Cadc()
+    job = cadc.create_async("SELECT observationID, publisherID, productID FROM caom2.Observation \
+                             AS o JOIN caom2.Plane AS p ON o.obsID=p.obsID \
+                             WHERE instrument_name='NIFS' AND proposal_id={}".format("'"+program+"'"))
+    job.run().wait()
+    job.raise_if_error()
+    result = job.fetch_result().to_table()
+
+    # Store product id's for later
+    pids = list(result['productID'])
+
+    urls = cadc.get_data_urls(result)
+    for url, pid in zip(urls, pids):
+        try:
+            urllib.urlretrieve(url, directory+'/'+pid+'.fits')
+            logging.debug("Downloaded {}".format(directory+'/'+pid+'.fits'))
+        except Exception as e:
+            logging.error("A frame failed to download.")
+            raise e
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#-----------------------------------------------------------------------------#
+
diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py
index 6fe1d0c..82e8515 100644
--- a/nifty/pipeline/objectoriented/GetConfig.py
+++ b/nifty/pipeline/objectoriented/GetConfig.py
@@ -72,6 +72,8 @@ def makeConfig(self):
         self.parser.add_argument('-i', '--interactive', dest = 'interactive', default = False, action = 'store_true', help = 'Create a config.cfg file interactively.')
         # Ability to repeat the last data reduction
         self.parser.add_argument('-r', '--repeat', dest = 'repeat', default = False, action = 'store_true', help = 'Repeat the last data reduction, loading saved reduction parameters from runtimeData/config.cfg.')
+        # Specify where downloads come from; either Gemini or CADC.
+        self.parser.add_argument('-c', '--cadc', dest = 'cadc', default = False, action = 'store_true', help = 'Download raw data from Canadian Astronomy Data Centre rather than the Gemini Science Archive.')
         # Ability to load a built-in configuration file (recipe)
         self.parser.add_argument('-l', '--recipe', dest = 'recipe', action = 'store', help = 'Load data reduction parameters from the a provided recipe. Default is default_input.cfg.')
         # Ability to load your own configuration file
@@ -85,6 +87,7 @@ def makeConfig(self):
         self.repeat = self.args.repeat
         self.fullReduction = self.args.fullReduction
         self.inputfile = self.args.inputfile
+        self.cadc = self.args.cadc
 
         if self.inputfile:
             # Load input from a .cfg file user specified at command line.
@@ -121,3 +124,14 @@ def makeConfig(self):
             with open('./' + self.configFile, 'w') as self.outfile:
                 self.config.write(self.outfile)
             logging.info("\nData reduction parameters for this reduction were copied from recipes/defaultConfig.cfg to ./config.cfg.")
+
+        if self.cadc:
+            try:
+                with open('./' + self.configFile, 'r') as self.config_file:
+                    self.config = ConfigObj(self.config_file, unrepr=True)
+                    self.config['sortConfig']['cadc'] = self.cadc
+                with open('./' + self.configFile, 'w') as self.outfile:
+                    self.config.write(self.outfile)
+                logging.debug("Set CADC flag in config file.")
+            except:
+                raise ValueError("Failed to set CADC download option.")
diff --git a/nifty/pipeline/steps/nifsSort.py b/nifty/pipeline/steps/nifsSort.py
index 97daa65..568a793 100644
--- a/nifty/pipeline/steps/nifsSort.py
+++ b/nifty/pipeline/steps/nifsSort.py
@@ -44,7 +44,7 @@
 # TODO(nat): goodness, this is a lot of functions. It would be nice to split this up somehow.
 from ..nifsUtils import getUrlFiles, getFitsHeader, FitsKeyEntry, stripString, stripNumber, \
 datefmt, checkOverCopy, checkQAPIreq, checkDate, writeList, checkEntry, timeCalc, checkSameLengthFlatLists, \
-rewriteSciImageList, datefmt
+rewriteSciImageList, datefmt, download_query_cadc
 
 # Import NDMapper gemini data download, by James E.H. Turner.
 from ..downloadFromGeminiPublicArchive import download_query_gemini
@@ -127,6 +127,7 @@ def start():
         sortConfig = options['sortConfig']
         rawPath = sortConfig['rawPath']
         program = sortConfig['program']
+        cadc = sortConfig['cadc']
         proprietaryCookie = sortConfig['proprietaryCookie']
         skyThreshold = sortConfig['skyThreshold']
         sortTellurics = sortConfig['sortTellurics']
@@ -141,18 +142,22 @@ def start():
 
     # Download data from gemini public archive to ./rawData/.
     if program:
-        url = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical'
         if not os.path.exists('./rawData'):
             os.mkdir('./rawData')
-        logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.')
-        logging.info('\nURL used for the download: \n' + str(url))
-        if proprietaryCookie:
-            download_query_gemini(url, './rawData', proprietaryCookie)
+        if cadc:
+            logging.info('\nDownloading data from the CADC archive to ./rawData. This will take a few minutes.')
+            download_query_cadc(program, os.getcwd()+'/rawData')
         else:
-            download_query_gemini(url, './rawData')
+            url = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical'
+            logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.')
+            logging.info('\nURL used for the download: \n' + str(url))
+            if proprietaryCookie:
+                download_query_gemini(url, './rawData', proprietaryCookie)
+            else:
+                download_query_gemini(url, './rawData')
+        
         rawPath = os.getcwd()+'/rawData'
 
-
     ############################################################################
     ############################################################################
     #                                                                          #
diff --git a/nifty/recipes/defaultConfig.cfg b/nifty/recipes/defaultConfig.cfg
index 6b4a931..5b6afe3 100644
--- a/nifty/recipes/defaultConfig.cfg
+++ b/nifty/recipes/defaultConfig.cfg
@@ -28,6 +28,7 @@ mergeMethod = ''
 [sortConfig]
 rawPath = ''
 program = ''
+cadc=False
 proprietaryCookie = ''
 skyThreshold = 2.0
 sortTellurics = True
diff --git a/setup.py b/setup.py
index 30fd4e5..679bb61 100644
--- a/setup.py
+++ b/setup.py
@@ -22,12 +22,12 @@
 
 setup(
     name=NAME,
-    version="1.0.1",
-    author='mbusserolle',
-    author_email='mbussero@gemini.edu',
+    version="2.0.0",
+    author='ncomeau',
+    author_email='ncomeau@uvic.ca',
     description='Gemini Instruments Data Reduction Framework.',
     long_description = README_TEXT,
-    url='http://www.gemini.edu',
+    url='https://github.com/Nat1405/Nifty4Gemini',
     license='MIT',
     classifiers=[
         'Development Status :: 5 - Production/Stable',

From 3e7ffeae118ab8cc800b535e8b4c8ebacd4b121f Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 7 May 2020 11:23:30 -0700
Subject: [PATCH 07/22] Adds support for downloads using requests.

---
 nifty/pipeline/nifsUtils.py | 14 ++++++++++++--
 setup.py                    |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index 4c195b3..615250e 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -26,7 +26,7 @@
 
 # STDLIB
 
-import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections
+import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests
 import numpy as np
 from xml.dom.minidom import parseString
 from pyraf import iraf
@@ -1206,7 +1206,17 @@ def download_query_cadc(program, directory='./rawData'):
     urls = cadc.get_data_urls(result)
     for url, pid in zip(urls, pids):
         try:
-            urllib.urlretrieve(url, directory+'/'+pid+'.fits')
+            r = requests.get(url, stream=True)
+            # Parse out filename from header
+            filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0]
+            # Check that filename makes sense (ie starts with N and ends with .fits)
+            pattern = re.compile(r"N.*\.fits")
+            if not pattern.match(filename):
+                raise ValueError("Bad download filename.")
+            # Write the fits file
+            with open(directory+'/'+filename, 'wb') as f:
+                for chunk in r.iter_content(chunk_size=128):
+                    f.write(chunk)
             logging.debug("Downloaded {}".format(directory+'/'+pid+'.fits'))
         except Exception as e:
             logging.error("A frame failed to download.")
diff --git a/setup.py b/setup.py
index 679bb61..9c994c1 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@
         'Topic :: Scientific/Engineering :: Physics',
     ],
     keywords='Gemini NIFS nifs pipeline reduction data IRAF iraf PYRAF pyraf astronomy integral field spectroscopy ifs ifu',
-    python_requires='~=2.7',
+    python_requires='<=2.7.17',
     scripts=SCRIPTS, # TODO(nat): Update this to use entry_points instead of scripts for better cross-platform performance
     packages=find_packages(),
     package_data=PACKAGE_DATA

From 443662ea98c9b9bc1bfc67de2b9bed3023c05f66 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 7 May 2020 13:12:31 -0700
Subject: [PATCH 08/22] Splits out getting of file to new method.

---
 nifty/pipeline/nifsUtils.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index 615250e..6fa3b60 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -1206,24 +1206,27 @@ def download_query_cadc(program, directory='./rawData'):
     urls = cadc.get_data_urls(result)
     for url, pid in zip(urls, pids):
         try:
-            r = requests.get(url, stream=True)
-            # Parse out filename from header
-            filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0]
-            # Check that filename makes sense (ie starts with N and ends with .fits)
-            pattern = re.compile(r"N.*\.fits")
-            if not pattern.match(filename):
-                raise ValueError("Bad download filename.")
-            # Write the fits file
-            with open(directory+'/'+filename, 'wb') as f:
-                for chunk in r.iter_content(chunk_size=128):
-                    f.write(chunk)
-            logging.debug("Downloaded {}".format(directory+'/'+pid+'.fits'))
+            filename = get_file(url)
+            shutil.move(filename, directory+'/'+filename)
+            logging.debug("Downloaded {}".format(filename))
         except Exception as e:
             logging.error("A frame failed to download.")
             raise e
 
 
-
+def get_file(url):
+    """
+    Gets a file from the specified url and returns the filename.
+    """
+    r = requests.get(url, stream=True)
+    # Parse out filename from header
+    filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0]
+    # Write the fits file
+    with open(filename, 'wb') as f:
+        for chunk in r.iter_content(chunk_size=128):
+            f.write(chunk)
+
+    return filename
 
 
 

From 0aae71e6f266c041caa3ae02f3034c76f3bbe54b Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 7 May 2020 13:15:23 -0700
Subject: [PATCH 09/22] Fixes function naming conventions to match original
 project.

---
 nifty/pipeline/nifsUtils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index 6fa3b60..acd3d21 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -1186,7 +1186,7 @@ def MEFarith(MEF, image, op, result):
 
 #-----------------------------------------------------------------------------#
 
-def download_query_cadc(program, directory='./rawData'):
+def downloadQueryCadc(program, directory='./rawData'):
     """
     Finds and downloads all CADC files for a particular gemini program ID to
     the current working directory.
@@ -1214,7 +1214,7 @@ def download_query_cadc(program, directory='./rawData'):
             raise e
 
 
-def get_file(url):
+def getFile(url):
     """
     Gets a file from the specified url and returns the filename.
     """

From 22c3f62f73f2f4db3147e7fae1d6e20c439f1e4f Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 7 May 2020 13:41:22 -0700
Subject: [PATCH 10/22] Changes '-c' CADC flag to '-d/--data-source' option.

Rather than just choosing between Gemini and CADC downloads,
--data-source adds support for more than just two data sources.
To add support for a new archive at minimum nifsSort.py needs
changed.
---
 nifty/pipeline/nifsUtils.py                | 10 +++++-----
 nifty/pipeline/objectoriented/GetConfig.py | 21 ++++++++++++++-------
 nifty/pipeline/steps/nifsSort.py           | 12 +++++++-----
 nifty/recipes/defaultConfig.cfg            |  2 +-
 4 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index acd3d21..1620e74 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -211,9 +211,9 @@ def interactiveNIFSInput():
         "You can provide a cookie from your Gemini public archive login session to automatically " + \
         "download proprietary data."
         )
-        cadc = getParam(
-        "Download from CADC? If no download will be from Gemini. [no]: ",
-        'no',
+        dataSource = getParam(
+        "Select a raw data source; 'GSA' for Gemini Science Archive, 'CADC' for Canadian Astronomy Data Centre. [GSA]: ",
+        'GSA',
         "Automatic downloads can happen from either the Gemini Science Archive or the Canadian Astronomy Data Centre."
         )
         skyThreshold = getParam(
@@ -419,7 +419,7 @@ def interactiveNIFSInput():
         config['sortConfig']['rawPath'] = rawPath
         config['sortConfig']['program'] = program
         config['sortConfig']['proprietaryCookie'] = proprietaryCookie
-        config['sortConfig']['cadc'] = cadc
+        config['sortConfig']['dataSource'] = dataSource
         config['sortConfig']['skyThreshold'] = skyThreshold
         config['sortConfig']['sortTellurics'] = sortTellurics
         config['sortConfig']['telluricTimeThreshold'] = telluricTimeThreshold
@@ -1206,7 +1206,7 @@ def downloadQueryCadc(program, directory='./rawData'):
     urls = cadc.get_data_urls(result)
     for url, pid in zip(urls, pids):
         try:
-            filename = get_file(url)
+            filename = getFile(url)
             shutil.move(filename, directory+'/'+filename)
             logging.debug("Downloaded {}".format(filename))
         except Exception as e:
diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py
index 82e8515..919710d 100644
--- a/nifty/pipeline/objectoriented/GetConfig.py
+++ b/nifty/pipeline/objectoriented/GetConfig.py
@@ -60,6 +60,14 @@ def __init__(self, args, script):
 
         self.makeConfig()
 
+    def checkConfigExists(self, configFile):
+        """
+        Checks that a config file exists and if not, sets Nifty to use default configuration.
+        """
+        if os.path.exists(configFile):
+            os.remove(configFile)
+            shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile)
+
     def makeConfig(self):
         """
         Make a configuration file.
@@ -73,7 +81,7 @@ def makeConfig(self):
         # Ability to repeat the last data reduction
         self.parser.add_argument('-r', '--repeat', dest = 'repeat', default = False, action = 'store_true', help = 'Repeat the last data reduction, loading saved reduction parameters from runtimeData/config.cfg.')
         # Specify where downloads come from; either Gemini or CADC.
-        self.parser.add_argument('-c', '--cadc', dest = 'cadc', default = False, action = 'store_true', help = 'Download raw data from Canadian Astronomy Data Centre rather than the Gemini Science Archive.')
+        self.parser.add_argument('-d', '--data-source', dest = 'dataSource', default = 'GSA', action = 'store', help = 'Download raw data from the Canadian Astronomy Data Centre or the Gemini Science Archive. Valid options are "GSA" or "CADC".')
         # Ability to load a built-in configuration file (recipe)
         self.parser.add_argument('-l', '--recipe', dest = 'recipe', action = 'store', help = 'Load data reduction parameters from the a provided recipe. Default is default_input.cfg.')
         # Ability to load your own configuration file
@@ -87,7 +95,7 @@ def makeConfig(self):
         self.repeat = self.args.repeat
         self.fullReduction = self.args.fullReduction
         self.inputfile = self.args.inputfile
-        self.cadc = self.args.cadc
+        self.dataSource = self.args.dataSource
 
         if self.inputfile:
             # Load input from a .cfg file user specified at command line.
@@ -105,10 +113,7 @@ def makeConfig(self):
             self.fullReduction = interactiveNIFSInput()
 
         if self.fullReduction:
-            # Copy default input and use it
-            if os.path.exists('./' + self.configFile):
-                os.remove('./' + self.configFile)
-            shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', './'+ self.configFile)
+            self.checkConfigExists(self.configFile)
             # Update default config file with path to raw data or program ID.
             with open('./' + self.configFile, 'r') as self.config_file:
                 self.config = ConfigObj(self.config_file, unrepr=True)
@@ -125,8 +130,10 @@ def makeConfig(self):
                 self.config.write(self.outfile)
             logging.info("\nData reduction parameters for this reduction were copied from recipes/defaultConfig.cfg to ./config.cfg.")
 
-        if self.cadc:
+        # If user selects a non-default data source, change it in the config file.
+        if self.dataSource != 'GSA':
             try:
+                self.checkConfigExists(self.configFile)
                 with open('./' + self.configFile, 'r') as self.config_file:
                     self.config = ConfigObj(self.config_file, unrepr=True)
                     self.config['sortConfig']['cadc'] = self.cadc
diff --git a/nifty/pipeline/steps/nifsSort.py b/nifty/pipeline/steps/nifsSort.py
index 568a793..711b8eb 100644
--- a/nifty/pipeline/steps/nifsSort.py
+++ b/nifty/pipeline/steps/nifsSort.py
@@ -44,7 +44,7 @@
 # TODO(nat): goodness, this is a lot of functions. It would be nice to split this up somehow.
 from ..nifsUtils import getUrlFiles, getFitsHeader, FitsKeyEntry, stripString, stripNumber, \
 datefmt, checkOverCopy, checkQAPIreq, checkDate, writeList, checkEntry, timeCalc, checkSameLengthFlatLists, \
-rewriteSciImageList, datefmt, download_query_cadc
+rewriteSciImageList, datefmt, downloadQueryCadc
 
 # Import NDMapper gemini data download, by James E.H. Turner.
 from ..downloadFromGeminiPublicArchive import download_query_gemini
@@ -127,7 +127,7 @@ def start():
         sortConfig = options['sortConfig']
         rawPath = sortConfig['rawPath']
         program = sortConfig['program']
-        cadc = sortConfig['cadc']
+        dataSource = sortConfig['dataSource']
         proprietaryCookie = sortConfig['proprietaryCookie']
         skyThreshold = sortConfig['skyThreshold']
         sortTellurics = sortConfig['sortTellurics']
@@ -144,10 +144,10 @@ def start():
     if program:
         if not os.path.exists('./rawData'):
             os.mkdir('./rawData')
-        if cadc:
+        if dataSource == 'CADC':
             logging.info('\nDownloading data from the CADC archive to ./rawData. This will take a few minutes.')
-            download_query_cadc(program, os.getcwd()+'/rawData')
-        else:
+            downloadQueryCadc(program, os.getcwd()+'/rawData')
+        elif dataSource == 'GSA':
             url = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical'
             logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.')
             logging.info('\nURL used for the download: \n' + str(url))
@@ -155,6 +155,8 @@ def start():
                 download_query_gemini(url, './rawData', proprietaryCookie)
             else:
                 download_query_gemini(url, './rawData')
+        else:
+            raise ValueError("Invalid dataSource in config file.")
         
         rawPath = os.getcwd()+'/rawData'
 
diff --git a/nifty/recipes/defaultConfig.cfg b/nifty/recipes/defaultConfig.cfg
index 5b6afe3..94be885 100644
--- a/nifty/recipes/defaultConfig.cfg
+++ b/nifty/recipes/defaultConfig.cfg
@@ -28,7 +28,7 @@ mergeMethod = ''
 [sortConfig]
 rawPath = ''
 program = ''
-cadc=False
+dataSource='GSA'
 proprietaryCookie = ''
 skyThreshold = 2.0
 sortTellurics = True

From 6dd7cdd6bda77d2441f90fd06339295584e9bc1d Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 7 May 2020 14:18:22 -0700
Subject: [PATCH 11/22] Cleans up url construction code.

---
 .../ndmapperDownloader.py                        | 16 +++++++++++++---
 nifty/pipeline/nifsUtils.py                      |  1 -
 nifty/pipeline/steps/nifsSort.py                 |  7 ++-----
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/nifty/pipeline/downloadFromGeminiPublicArchive/ndmapperDownloader.py b/nifty/pipeline/downloadFromGeminiPublicArchive/ndmapperDownloader.py
index 7a8bcbd..49342b6 100644
--- a/nifty/pipeline/downloadFromGeminiPublicArchive/ndmapperDownloader.py
+++ b/nifty/pipeline/downloadFromGeminiPublicArchive/ndmapperDownloader.py
@@ -31,6 +31,7 @@
 
 import os, os.path
 import sys
+import logging
 from contextlib import closing
 from StringIO import StringIO
 import urllib2
@@ -38,7 +39,7 @@
 import tarfile
 import hashlib
 
-def download_query_gemini(query, dirname='', cookieName=''):
+def download_query_gemini(program, dirname='', cookieName=''):
     """
     Perform a user-specified Gemini science archive query and save the files
     returned to a specified directory.
@@ -54,16 +55,25 @@ def download_query_gemini(query, dirname='', cookieName=''):
     which should be optimal as long as the archive isn't unreasonably large
     (to do: consider adding an option to write it to a temporary file).
 
+    # Modified 2020 by Nat Comeau
+
     Parameters
     ----------
 
-    query : str
-        The query URL (or just the path component) to request from the server.
+    program : str 
+        The Gemini program ID to request from the server.
 
     dirname : str, optional
         The (absolute or relative) directory path in which to place the files.
 
     """
+
+    # Modified 2020 by Nat Comeau
+    query = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical'
+    logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.')
+    logging.info('\nURL used for the download: \n' + str(query))
+
+
     checksum_fn = 'md5sums.txt'
     aux_fn = [checksum_fn, 'README.txt']
 
diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index 1620e74..d7ee548 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -1206,7 +1206,6 @@ def downloadQueryCadc(program, directory='./rawData'):
     urls = cadc.get_data_urls(result)
     for url, pid in zip(urls, pids):
         try:
-            filename = getFile(url)
             shutil.move(filename, directory+'/'+filename)
             logging.debug("Downloaded {}".format(filename))
         except Exception as e:
diff --git a/nifty/pipeline/steps/nifsSort.py b/nifty/pipeline/steps/nifsSort.py
index 711b8eb..e878a57 100644
--- a/nifty/pipeline/steps/nifsSort.py
+++ b/nifty/pipeline/steps/nifsSort.py
@@ -148,13 +148,10 @@ def start():
             logging.info('\nDownloading data from the CADC archive to ./rawData. This will take a few minutes.')
             downloadQueryCadc(program, os.getcwd()+'/rawData')
         elif dataSource == 'GSA':
-            url = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical'
-            logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.')
-            logging.info('\nURL used for the download: \n' + str(url))
             if proprietaryCookie:
-                download_query_gemini(url, './rawData', proprietaryCookie)
+                download_query_gemini(program, './rawData', proprietaryCookie)
             else:
-                download_query_gemini(url, './rawData')
+                download_query_gemini(program, './rawData')
         else:
             raise ValueError("Invalid dataSource in config file.")
         

From ec027de263581daf9b5734539c1e6a59129980b3 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Fri, 8 May 2020 08:09:27 -0700
Subject: [PATCH 12/22] Fixes silly bugs introduced by 74a11e9.

---
 nifty/pipeline/nifsUtils.py                |  1 +
 nifty/pipeline/objectoriented/GetConfig.py | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index d7ee548..1620e74 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -1206,6 +1206,7 @@ def downloadQueryCadc(program, directory='./rawData'):
     urls = cadc.get_data_urls(result)
     for url, pid in zip(urls, pids):
         try:
+            filename = getFile(url)
             shutil.move(filename, directory+'/'+filename)
             logging.debug("Downloaded {}".format(filename))
         except Exception as e:
diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py
index 919710d..c761d43 100644
--- a/nifty/pipeline/objectoriented/GetConfig.py
+++ b/nifty/pipeline/objectoriented/GetConfig.py
@@ -64,10 +64,18 @@ def checkConfigExists(self, configFile):
         """
         Checks that a config file exists and if not, sets Nifty to use default configuration.
         """
+        if not os.path.exists(configFile):
+            shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile)
+
+    def overwriteWithDefault(self, configFile):
+        """
+        Overwrites with default configuration.
+        """
         if os.path.exists(configFile):
             os.remove(configFile)
             shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile)
 
+
     def makeConfig(self):
         """
         Make a configuration file.
@@ -113,7 +121,7 @@ def makeConfig(self):
             self.fullReduction = interactiveNIFSInput()
 
         if self.fullReduction:
-            self.checkConfigExists(self.configFile)
+            self.overwriteWithDefault(self.configFile)
             # Update default config file with path to raw data or program ID.
             with open('./' + self.configFile, 'r') as self.config_file:
                 self.config = ConfigObj(self.config_file, unrepr=True)
@@ -136,9 +144,9 @@ def makeConfig(self):
                 self.checkConfigExists(self.configFile)
                 with open('./' + self.configFile, 'r') as self.config_file:
                     self.config = ConfigObj(self.config_file, unrepr=True)
-                    self.config['sortConfig']['cadc'] = self.cadc
+                    self.config['sortConfig']['dataSource'] = self.dataSource
                 with open('./' + self.configFile, 'w') as self.outfile:
                     self.config.write(self.outfile)
-                logging.debug("Set CADC flag in config file.")
+                logging.debug("Set dataSource option in config file.")
             except:
-                raise ValueError("Failed to set CADC download option.")
+                raise ValueError("Failed to set dataSource option.")

From 42f70dfa4ad3de71e486e8ca764e7c301e691b08 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Fri, 8 May 2020 09:04:15 -0700
Subject: [PATCH 13/22] Adds better error handling to get_file().

---
 nifty/pipeline/nifsUtils.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index 1620e74..4c3e474 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -1220,7 +1220,13 @@ def getFile(url):
     """
     r = requests.get(url, stream=True)
     # Parse out filename from header
-    filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0]
+    try:
+        filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0]
+    except KeyError:
+        # 'Content-Disposition' header wasn't found, so parse filename from URL
+        # Typical URL looks like:
+        # https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/data/pub/GEM/N20140505S0114.fits?RUNID=mf731ukqsipqpdgk
+        filename = (url.split('/')[-1]).split('?')[0]
     # Write the fits file
     with open(filename, 'wb') as f:
         for chunk in r.iter_content(chunk_size=128):

From 453c787490cf8b00495cd7e3eeccf25587700e39 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Mon, 11 May 2020 16:13:32 -0700
Subject: [PATCH 14/22] Fixes silly bugs in d5ead22e7.

There were problems with finding the config file.
---
 nifty/pipeline/objectoriented/GetConfig.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py
index c761d43..14a3788 100644
--- a/nifty/pipeline/objectoriented/GetConfig.py
+++ b/nifty/pipeline/objectoriented/GetConfig.py
@@ -73,7 +73,7 @@ def overwriteWithDefault(self, configFile):
         """
         if os.path.exists(configFile):
             os.remove(configFile)
-            shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile)
+        shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile)
 
 
     def makeConfig(self):

From e006513898aa377eea6e810df73ac6a3f965352d Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 14 May 2020 08:27:10 -0700
Subject: [PATCH 15/22] Fixes path seperators (wasn't using os.path.join) in
 GetConfig.py.

---
 nifty/pipeline/objectoriented/GetConfig.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py
index 14a3788..ad06b76 100644
--- a/nifty/pipeline/objectoriented/GetConfig.py
+++ b/nifty/pipeline/objectoriented/GetConfig.py
@@ -65,7 +65,7 @@ def checkConfigExists(self, configFile):
         Checks that a config file exists and if not, sets Nifty to use default configuration.
         """
         if not os.path.exists(configFile):
-            shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile)
+            shutil.copy(os.path.join(self.RECIPES_PATH,'defaultConfig.cfg'), configFile)
 
     def overwriteWithDefault(self, configFile):
         """
@@ -73,7 +73,7 @@ def overwriteWithDefault(self, configFile):
         """
         if os.path.exists(configFile):
             os.remove(configFile)
-        shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', configFile)
+        shutil.copy(os.path.join(self.RECIPES_PATH,'defaultConfig.cfg'), configFile)
 
 
     def makeConfig(self):
@@ -107,9 +107,9 @@ def makeConfig(self):
 
         if self.inputfile:
             # Load input from a .cfg file user specified at command line.
-            if self.inputfile != self.configFile and os.path.exists('./'+ self.configFile):
-                os.remove('./'+ self.configFile)
-                shutil.copy(self.inputfile, './'+ self.configFile)
+            if self.inputfile != self.configFile and os.path.exists(self.configFile):
+                os.remove(self.configFile)
+                shutil.copy(self.inputfile, self.configFile)
             logging.info("\nPipeline configuration for this data reduction was read from " + str(self.inputfile) + \
             ", and if not named config.cfg, copied to ./config.cfg.")
 
@@ -123,7 +123,7 @@ def makeConfig(self):
         if self.fullReduction:
             self.overwriteWithDefault(self.configFile)
             # Update default config file with path to raw data or program ID.
-            with open('./' + self.configFile, 'r') as self.config_file:
+            with open(self.configFile, 'r') as self.config_file:
                 self.config = ConfigObj(self.config_file, unrepr=True)
                 self.sortConfig = self.config['sortConfig']
                 if self.fullReduction[0] == "G":
@@ -134,7 +134,7 @@ def makeConfig(self):
                     # Else treat it as a path.
                     self.sortConfig['program'] = ""
                     self.sortConfig['rawPath'] = self.fullReduction
-            with open('./' + self.configFile, 'w') as self.outfile:
+            with open(self.configFile, 'w') as self.outfile:
                 self.config.write(self.outfile)
             logging.info("\nData reduction parameters for this reduction were copied from recipes/defaultConfig.cfg to ./config.cfg.")
 
@@ -142,10 +142,10 @@ def makeConfig(self):
         if self.dataSource != 'GSA':
             try:
                 self.checkConfigExists(self.configFile)
-                with open('./' + self.configFile, 'r') as self.config_file:
+                with open(self.configFile, 'r') as self.config_file:
                     self.config = ConfigObj(self.config_file, unrepr=True)
                     self.config['sortConfig']['dataSource'] = self.dataSource
-                with open('./' + self.configFile, 'w') as self.outfile:
+                with open(self.configFile, 'w') as self.outfile:
                     self.config.write(self.outfile)
                 logging.debug("Set dataSource option in config file.")
             except:

From 061fd560f710d559a8511eda4d472ecba716c1dc Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 14 May 2020 08:52:19 -0700
Subject: [PATCH 16/22] Sends downloaded CADC files to a temp location first.

---
 nifty/pipeline/nifsUtils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index 4c3e474..bb036b5 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -1207,8 +1207,8 @@ def downloadQueryCadc(program, directory='./rawData'):
     for url, pid in zip(urls, pids):
         try:
             filename = getFile(url)
-            shutil.move(filename, directory+'/'+filename)
-            logging.debug("Downloaded {}".format(filename))
+            shutil.move(filename, os.path.join(directory, filename.lstrip('.temp-')))
+            logging.debug("Downloaded {}".format(filename.lstrip('.temp-')))
         except Exception as e:
             logging.error("A frame failed to download.")
             raise e
@@ -1221,12 +1221,12 @@ def getFile(url):
     r = requests.get(url, stream=True)
     # Parse out filename from header
     try:
-        filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0]
+        filename = '.temp-' + re.findall("filename=(.+)", r.headers['Content-Disposition'])[0]
     except KeyError:
         # 'Content-Disposition' header wasn't found, so parse filename from URL
         # Typical URL looks like:
         # https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/data/pub/GEM/N20140505S0114.fits?RUNID=mf731ukqsipqpdgk
-        filename = (url.split('/')[-1]).split('?')[0]
+        filename = '.temp-' + (url.split('/')[-1]).split('?')[0]
     # Write the fits file
     with open(filename, 'wb') as f:
         for chunk in r.iter_content(chunk_size=128):

From fa13680b2e7606b983177bc9bff1836798f0a431 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 14 May 2020 10:13:10 -0700
Subject: [PATCH 17/22] Adds CADC downloads MD5 verification.

---
 nifty/pipeline/nifsUtils.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index bb036b5..f693270 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -26,7 +26,7 @@
 
 # STDLIB
 
-import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests
+import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests, hashlib
 import numpy as np
 from xml.dom.minidom import parseString
 from pyraf import iraf
@@ -1232,6 +1232,19 @@ def getFile(url):
         for chunk in r.iter_content(chunk_size=128):
             f.write(chunk)
 
+    # Do MD5 Verification of the file; raise IO error if a problem happened.
+    try:
+        server_checksum = r.headers['Content-MD5']
+        with open(filename, 'rb') as f:
+            download_checksum = hashlib.md5(f.read()).hexdigest()
+        if server_checksum != download_checksum:
+            logging.error("Problem downloading {} from {}.".format(filename, url))
+            raise IOError
+
+    except KeyError:
+        # Catch case that header didn't contain a 'content-md5' header
+        logging.warning("'Content-MD5 header not found for file {}. Skipping checksum validation.")
+
     return filename
 
 

From ba4b95cfc933617f968f6df8f7e7a0349d3d8a6c Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 14 May 2020 10:28:52 -0700
Subject: [PATCH 18/22] Changes -d/--data-source flag to -s/--data-source flag

---
 nifty/pipeline/objectoriented/GetConfig.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py
index ad06b76..c6f1166 100644
--- a/nifty/pipeline/objectoriented/GetConfig.py
+++ b/nifty/pipeline/objectoriented/GetConfig.py
@@ -89,7 +89,7 @@ def makeConfig(self):
         # Ability to repeat the last data reduction
         self.parser.add_argument('-r', '--repeat', dest = 'repeat', default = False, action = 'store_true', help = 'Repeat the last data reduction, loading saved reduction parameters from runtimeData/config.cfg.')
         # Specify where downloads come from; either Gemini or CADC.
-        self.parser.add_argument('-d', '--data-source', dest = 'dataSource', default = 'GSA', action = 'store', help = 'Download raw data from the Canadian Astronomy Data Centre or the Gemini Science Archive. Valid options are "GSA" or "CADC".')
+        self.parser.add_argument('-s', '--data-source', dest = 'dataSource', default = 'GSA', action = 'store', help = 'Download raw data from the Canadian Astronomy Data Centre or the Gemini Science Archive. Valid options are "GSA" or "CADC".')
         # Ability to load a built-in configuration file (recipe)
         self.parser.add_argument('-l', '--recipe', dest = 'recipe', action = 'store', help = 'Load data reduction parameters from the a provided recipe. Default is default_input.cfg.')
         # Ability to load your own configuration file

From 7e57b99df5278818d62f48a906785375e2da4236 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 14 May 2020 11:00:54 -0700
Subject: [PATCH 19/22] Moves temp files to download directory.

---
 nifty/pipeline/nifsUtils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index f693270..9598a33 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -1204,14 +1204,18 @@ def downloadQueryCadc(program, directory='./rawData'):
     pids = list(result['productID'])
 
     urls = cadc.get_data_urls(result)
+    cwd = os.getcwd()
+    os.chdir(directory)
     for url, pid in zip(urls, pids):
         try:
             filename = getFile(url)
-            shutil.move(filename, os.path.join(directory, filename.lstrip('.temp-')))
+            shutil.move(filename, filename.lstrip('.temp-'))
             logging.debug("Downloaded {}".format(filename.lstrip('.temp-')))
         except Exception as e:
             logging.error("A frame failed to download.")
+            os.chdir(cwd)
             raise e
+    os.chdir(cwd)
 
 
 def getFile(url):

From ad6d231bbe381c9eebad0a5a07df00a0e5810851 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 14 May 2020 14:44:45 -0700
Subject: [PATCH 20/22] Changes md5 verification to happen at the same time as
 downloads.

---
 nifty/pipeline/nifsUtils.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index 9598a33..aa77c90 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -1231,17 +1231,17 @@ def getFile(url):
         # Typical URL looks like:
         # https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/data/pub/GEM/N20140505S0114.fits?RUNID=mf731ukqsipqpdgk
         filename = '.temp-' + (url.split('/')[-1]).split('?')[0]
-    # Write the fits file
-    with open(filename, 'wb') as f:
-        for chunk in r.iter_content(chunk_size=128):
-            f.write(chunk)
-
-    # Do MD5 Verification of the file; raise IO error if a problem happened.
+    
+    # Write the fits file, verifying the md5 hash as we go
     try:
         server_checksum = r.headers['Content-MD5']
-        with open(filename, 'rb') as f:
-            download_checksum = hashlib.md5(f.read()).hexdigest()
-        if server_checksum != download_checksum:
+        download_checksum = hashlib.md5()
+        with open(filename, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=128):
+                f.write(chunk)
+                download_checksum.update(chunk)
+        
+        if server_checksum != download_checksum.hexdigest():
             logging.error("Problem downloading {} from {}.".format(filename, url))
             raise IOError
 

From 79a8f5972e45caebb3ff40574ad94d7b961816d4 Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Thu, 14 May 2020 15:36:45 -0700
Subject: [PATCH 21/22] Makes CADC downloads use temp files (via
 tempfile.TemporaryFile).

---
 nifty/pipeline/nifsUtils.py | 56 ++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
index aa77c90..bd49ceb 100644
--- a/nifty/pipeline/nifsUtils.py
+++ b/nifty/pipeline/nifsUtils.py
@@ -26,7 +26,7 @@
 
 # STDLIB
 
-import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests, hashlib
+import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests, hashlib, tempfile
 import numpy as np
 from xml.dom.minidom import parseString
 from pyraf import iraf
@@ -1209,8 +1209,7 @@ def downloadQueryCadc(program, directory='./rawData'):
     for url, pid in zip(urls, pids):
         try:
             filename = getFile(url)
-            shutil.move(filename, filename.lstrip('.temp-'))
-            logging.debug("Downloaded {}".format(filename.lstrip('.temp-')))
+            logging.debug("Downloaded {}".format(filename))
         except Exception as e:
             logging.error("A frame failed to download.")
             os.chdir(cwd)
@@ -1225,38 +1224,45 @@ def getFile(url):
     r = requests.get(url, stream=True)
     # Parse out filename from header
     try:
-        filename = '.temp-' + re.findall("filename=(.+)", r.headers['Content-Disposition'])[0]
+        filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0]
     except KeyError:
         # 'Content-Disposition' header wasn't found, so parse filename from URL
         # Typical URL looks like:
         # https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/data/pub/GEM/N20140505S0114.fits?RUNID=mf731ukqsipqpdgk
-        filename = '.temp-' + (url.split('/')[-1]).split('?')[0]
+        filename = (url.split('/')[-1]).split('?')[0]
     
-    # Write the fits file, verifying the md5 hash as we go
-    try:
-        server_checksum = r.headers['Content-MD5']
-        download_checksum = hashlib.md5()
-        with open(filename, 'wb') as f:
-            for chunk in r.iter_content(chunk_size=128):
-                f.write(chunk)
-                download_checksum.update(chunk)
-        
-        if server_checksum != download_checksum.hexdigest():
-            logging.error("Problem downloading {} from {}.".format(filename, url))
-            raise IOError
-
-    except KeyError:
-        # Catch case that header didn't contain a 'content-md5' header
-        logging.warning("'Content-MD5 header not found for file {}. Skipping checksum validation.")
+    # Write the fits file to the current directory, verifying the md5 hash as we go. Store partial results in a temporary file.
+    writeWithTempFile(r, filename)
 
     return filename
 
 
+def writeWithTempFile(request, filename):
+    """ Write the fits file, verifying the md5 hash as we go. Store partial results in a temporary file. """
+    temp_downloads_path = '.temp-downloads'
+    if not os.path.exists(temp_downloads_path):
+        os.mkdir(temp_downloads_path)
+    try:
+        server_checksum = request.headers['Content-MD5']
+    except KeyError:
+        # Catch case that header didn't contain a 'content-md5' header
+        logging.warning("Content-MD5 header not found for file {}. Skipping checksum validation.".format(filename))
+        server_checksum = None
+
+    # Write out content (first to a temp file) optionally doing an md5 verification.
+    download_checksum = hashlib.md5()
+    with tempfile.TemporaryFile(mode='w+b', prefix=filename, dir=temp_downloads_path) as f:
+        for chunk in request.iter_content(chunk_size=128):
+            f.write(chunk)
+            download_checksum.update(chunk)
+        if server_checksum and (server_checksum != download_checksum.hexdigest()):
+            logging.error("Problem downloading {} from {}.".format(filename, url))
+            raise IOError
+        f.seek(0)
+        with open(filename, 'w') as out_fp:
+            out_fp.write(f.read())
 
-
-
-
-
+    return filename
 
 
 

From f788b67b7d41b078bdb98d32f7d8e9b90d2877ff Mon Sep 17 00:00:00 2001
From: Nat1405 <nathanc1945@gmail.com>
Date: Mon, 8 Jun 2020 12:56:45 -0700
Subject: [PATCH 22/22] Adds backwards compatibility for old config files for
 dataSource option.

---
 nifty/pipeline/steps/nifsSort.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/nifty/pipeline/steps/nifsSort.py b/nifty/pipeline/steps/nifsSort.py
index e878a57..73c9327 100644
--- a/nifty/pipeline/steps/nifsSort.py
+++ b/nifty/pipeline/steps/nifsSort.py
@@ -127,7 +127,11 @@ def start():
         sortConfig = options['sortConfig']
         rawPath = sortConfig['rawPath']
         program = sortConfig['program']
-        dataSource = sortConfig['dataSource']
+        # Backwards compatability with old config files
+        try:
+            dataSource = sortConfig['dataSource']
+        except KeyError:
+            dataSource = 'GSA'
         proprietaryCookie = sortConfig['proprietaryCookie']
         skyThreshold = sortConfig['skyThreshold']
         sortTellurics = sortConfig['sortTellurics']