mrlb05 · Nat1405 · Feb 11, 2018 · Mar 9, 2020 · Mar 12, 2020 · Mar 12, 2020
diff --git a/nifty/pipeline/downloadFromGeminiPublicArchive/ndmapperDownloader.py b/nifty/pipeline/downloadFromGeminiPublicArchive/ndmapperDownloader.py
@@ -31,14 +31,15 @@
 
 import os, os.path
 import sys
+import logging
 from contextlib import closing
 from StringIO import StringIO
 import urllib2
 import xml.dom.minidom as xmd
 import tarfile
 import hashlib
 
-def download_query_gemini(query, dirname='', cookieName=''):
+def download_query_gemini(program, dirname='', cookieName=''):
     """
     Perform a user-specified Gemini science archive query and save the files
     returned to a specified directory.
@@ -54,16 +55,25 @@ def download_query_gemini(query, dirname='', cookieName=''):
     which should be optimal as long as the archive isn't unreasonably large
     (to do: consider adding an option to write it to a temporary file).
 
+    # Modified 2020 by Nat Comeau
+
     Parameters
     ----------
 
-    query : str
-        The query URL (or just the path component) to request from the server.
+    program : str 
+        The Gemini program ID to request from the server.
 
     dirname : str, optional
         The (absolute or relative) directory path in which to place the files.
 
     """
+
+    # Modified 2020 by Nat Comeau
+    query = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical'
+    logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.')
+    logging.info('\nURL used for the download: \n' + str(query))
+
+
     checksum_fn = 'md5sums.txt'
     aux_fn = [checksum_fn, 'README.txt']
 

diff --git a/nifty/pipeline/nifsLowMemoryPipeline.py b/nifty/pipeline/nifsLowMemoryPipeline.py
@@ -42,7 +42,7 @@
 # Import config parsing.
 # Import config parsing.
 from configobj.configobj import ConfigObj
-from objectoriented.getConfig import GetConfig
+from objectoriented.GetConfig import GetConfig
 # Import custom Nifty functions.
 from nifsUtils import datefmt, printDirectoryLists, writeList, getParam, interactiveNIFSInput
 

diff --git a/nifty/pipeline/nifsPipeline.py b/nifty/pipeline/nifsPipeline.py
@@ -46,7 +46,7 @@
 # Import configuration file parsing.
 from configobj.configobj import ConfigObj
 # Import custom pipeline setup Class.
-from objectoriented.getConfig import GetConfig
+from objectoriented.GetConfig import GetConfig
 # Conveniently import some utility functions so we don't have to type the full name.
 from nifsUtils import datefmt, printDirectoryLists, writeList, getParam, interactiveNIFSInput
 
@@ -77,7 +77,7 @@
 
 # The current version:
 # TODO(nat): fix this to import the version from setup.py.
-__version__ = "1.0.0"
+__version__ = "2.0.0"
 
 # The time when Nifty was started is:
 startTime = str(datetime.now())
@@ -101,7 +101,6 @@ def start(args):
     """
     # Save starting path for later use and change one directory up.
     path = os.getcwd()
-    print "IT WORKED!"
     # Get paths to built-in Nifty data. Special code in setup.py makes sure recipes/ and
     # runtimeData/ will be installed when someone installs Nifty, and accessible in this way.
     RECIPES_PATH = pkg_resources.resource_filename('nifty', 'recipes/')
@@ -127,9 +126,7 @@ def start(args):
     logging.info("#             NIFTY                #")
     logging.info("#   NIFS Data Reduction Pipeline   #")
     logging.info("#         Version "+ __version__+ "            #")
-    logging.info("#         July 25th, 2017          #")
-    logging.info("#     Marie Lemoine-Busserolle     #")
-    logging.info("# Gemini Observatory, Hilo, Hawaii #")
+    logging.info("#              2020                #")
     logging.info("#                                  #")
     logging.info("####################################\n")
 

diff --git a/nifty/pipeline/nifsUtils.py b/nifty/pipeline/nifsUtils.py
@@ -26,10 +26,11 @@
 
 # STDLIB
 
-import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re
+import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests, hashlib, tempfile
 import numpy as np
 from xml.dom.minidom import parseString
 from pyraf import iraf
+from astroquery.cadc import Cadc
 
 # LOCAL
 
@@ -74,7 +75,7 @@ def interactiveNIFSInput():
 
     """
 
-    logging.info("\nWelcome to Nifty! The current mode is NIFS data reduction.\n\nPress enter to accept default data reduction options.")
+    logging.info("\nWelcome to Nifty! The current mode is NIFS data reduction.\n\nPress enter to accept default data reduction options. Type 'yes' or 'no' when prompted.")
 
     fullReduction = getParam(
                 "Do a full data reduction with default parameters loaded from recipes/defaultConfig.cfg? [no]: ",
@@ -196,7 +197,7 @@ def interactiveNIFSInput():
         rawPath = getParam(
         "Path to raw files directory? []: ",
         "",
-        "An example of a valid raw files path string: \"/Users/nat/data/spaceMonster\""
+        "An example of a valid raw files path string: \"/Users/nat/data/\""
         )
         program = getParam(
         "Gemini Program ID? []: ",
@@ -207,9 +208,14 @@ def interactiveNIFSInput():
         proprietaryCookie = getParam(
         "Cookie for proprietary downloads? []: ",
         '',
-        "You can provide a cookie from you Gemini public archive login session to automatically "
+        "You can provide a cookie from your Gemini public archive login session to automatically " + \
         "download proprietary data."
         )
+        dataSource = getParam(
+        "Select a raw data source; 'GSA' for Gemini Science Archive, 'CADC' for Canadian Astronomy Data Centre. [GSA]: ",
+        'GSA',
+        "Automatic downloads can happen from either the Gemini Science Archive or the Canadian Astronomy Data Centre."
+        )
         skyThreshold = getParam(
         "Sky threshold? [2.0]: ",
         2.0,
@@ -291,7 +297,7 @@ def interactiveNIFSInput():
         # Some of these are disabled (for now!) because of bugs in interactive Pyraf tasks.
         # TODO(nat): when interactive is fixed re-enable this.
         # Temp fix:
-        hlineinter = getParam(
+        hLineInter = getParam(
         "Interative H-line removal? [no]: ",
         False,
         "WARNING: This is currently broken due to bugs in interactive PyRAF tasks. Use with caution."
@@ -413,6 +419,7 @@ def interactiveNIFSInput():
         config['sortConfig']['rawPath'] = rawPath
         config['sortConfig']['program'] = program
         config['sortConfig']['proprietaryCookie'] = proprietaryCookie
+        config['sortConfig']['dataSource'] = dataSource
         config['sortConfig']['skyThreshold'] = skyThreshold
         config['sortConfig']['sortTellurics'] = sortTellurics
         config['sortConfig']['telluricTimeThreshold'] = telluricTimeThreshold
@@ -456,6 +463,21 @@ def interactiveNIFSInput():
         config['mergeConfig']['use_pq_offsets'] = use_pq_offsets
         config['mergeConfig']['im3dtran'] = im3dtran
 
+        # Convert yes/no responses to True/False
+        def update(u):
+            for k, v in u.iteritems():
+                if isinstance(v, collections.Mapping):
+                    u[k] = update(u.get(k))
+                else:
+                    if u[k] == 'yes':
+                        u[k] = True
+                    elif u[k] == 'no':
+                        u[k] = False
+            return u
+
+        update(config)
+
+
         with open('./config.cfg', 'w') as outfile:
             config.write(outfile)
 
@@ -1163,3 +1185,89 @@ def MEFarith(MEF, image, op, result):
             iraf.imarith(operand1=result+'['+str(i)+']', op=op, operand2 = image, result = result+'['+str(i)+', overwrite]', divzero = 0.0)
 
 #-----------------------------------------------------------------------------#
+
+def downloadQueryCadc(program, directory='./rawData'):
+    """
+    Finds and downloads all CADC files for a particular gemini program ID to
+    the current working directory.
+    """
+
+    cadc = Cadc()
+    job = cadc.create_async("SELECT observationID, publisherID, productID FROM caom2.Observation \
+                             AS o JOIN caom2.Plane AS p ON o.obsID=p.obsID \
+                             WHERE instrument_name='NIFS' AND proposal_id={}".format("'"+program+"'"))
+    job.run().wait()
+    job.raise_if_error()
+    result = job.fetch_result().to_table()
+
+    # Store product id's for later
+    pids = list(result['productID'])
+
+    urls = cadc.get_data_urls(result)
+    cwd = os.getcwd()
+    os.chdir(directory)
+    for url, pid in zip(urls, pids):
+        try:
+            filename = getFile(url)
+            logging.debug("Downloaded {}".format(filename))
+        except Exception as e:
+            logging.error("A frame failed to download.")
+            os.chdir(cwd)
+            raise e
+    os.chdir(cwd)
+
+
+def getFile(url):
+    """
+    Gets a file from the specified url and returns the filename.
+    """
+    r = requests.get(url, stream=True)
+    # Parse out filename from header
+    try:
+        filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0]
+    except KeyError:
+        # 'Content-Disposition' header wasn't found, so parse filename from URL
+        # Typical URL looks like:
+        # https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/data/pub/GEM/N20140505S0114.fits?RUNID=mf731ukqsipqpdgk
+        filename = (url.split('/')[-1]).split('?')[0]
+
+    # Write the fits file to the current directory, verifying the md5 hash as we go. Store partial results in a temporary file.
+    writeWithTempFile(r, filename)
+
+    return filename
+
+
+def writeWithTempFile(request, filename):
+    """ Write the fits file, verifying the md5 hash as we go. Store partial results in a temporary file. """
+    temp_downloads_path = '.temp-downloads'
+    if not os.path.exists(temp_downloads_path):
+        os.mkdir(temp_downloads_path)
+    try:
+        server_checksum = request.headers['Content-MD5']
+    except KeyError:
+        # Catch case that header didn't contain a 'content-md5' header
+        logging.warning("Content-MD5 header not found for file {}. Skipping checksum validation.".format(filename))
+        server_checksum = None
+
+    # Write out content (first to a temp file) optionally doing an md5 verification.
+    download_checksum = hashlib.md5()
+    with tempfile.TemporaryFile(mode='w+b', prefix=filename, dir=temp_downloads_path) as f:
+        for chunk in request.iter_content(chunk_size=128):
+            f.write(chunk)
+            download_checksum.update(chunk)
+        if server_checksum and (server_checksum != download_checksum.hexdigest()):
+            logging.error("Problem downloading {} from {}.".format(filename, url))
+            raise IOError
+        f.seek(0)
+        with open(filename, 'w') as out_fp:
+            out_fp.write(f.read())
+
+    return filename
+
+
+
+
+
+
+#-----------------------------------------------------------------------------#
+
diff --git a/nifty/pipeline/objectoriented/GetConfig.py b/nifty/pipeline/objectoriented/GetConfig.py
@@ -60,6 +60,22 @@ def __init__(self, args, script):
 
         self.makeConfig()
 
+    def checkConfigExists(self, configFile):
+        """
+        Checks that a config file exists and if not, sets Nifty to use default configuration.
+        """
+        if not os.path.exists(configFile):
+            shutil.copy(os.path.join(self.RECIPES_PATH,'defaultConfig.cfg'), configFile)
+
+    def overwriteWithDefault(self, configFile):
+        """
+        Overwrites with default configuration.
+        """
+        if os.path.exists(configFile):
+            os.remove(configFile)
+        shutil.copy(os.path.join(self.RECIPES_PATH,'defaultConfig.cfg'), configFile)
+
+
     def makeConfig(self):
         """
         Make a configuration file.
@@ -72,6 +88,8 @@ def makeConfig(self):
         self.parser.add_argument('-i', '--interactive', dest = 'interactive', default = False, action = 'store_true', help = 'Create a config.cfg file interactively.')
         # Ability to repeat the last data reduction
         self.parser.add_argument('-r', '--repeat', dest = 'repeat', default = False, action = 'store_true', help = 'Repeat the last data reduction, loading saved reduction parameters from runtimeData/config.cfg.')
+        # Specify where downloads come from; either Gemini or CADC.
+        self.parser.add_argument('-s', '--data-source', dest = 'dataSource', default = 'GSA', action = 'store', help = 'Download raw data from the Canadian Astronomy Data Centre or the Gemini Science Archive. Valid options are "GSA" or "CADC".')
         # Ability to load a built-in configuration file (recipe)
         self.parser.add_argument('-l', '--recipe', dest = 'recipe', action = 'store', help = 'Load data reduction parameters from the a provided recipe. Default is default_input.cfg.')
         # Ability to load your own configuration file
@@ -85,12 +103,13 @@ def makeConfig(self):
         self.repeat = self.args.repeat
         self.fullReduction = self.args.fullReduction
         self.inputfile = self.args.inputfile
+        self.dataSource = self.args.dataSource
 
         if self.inputfile:
             # Load input from a .cfg file user specified at command line.
-            if self.inputfile != self.configFile and os.path.exists('./'+ self.configFile):
-                os.remove('./'+ self.configFile)
-                shutil.copy(self.inputfile, './'+ self.configFile)
+            if self.inputfile != self.configFile and os.path.exists(self.configFile):
+                os.remove(self.configFile)
+                shutil.copy(self.inputfile, self.configFile)
             logging.info("\nPipeline configuration for this data reduction was read from " + str(self.inputfile) + \
             ", and if not named config.cfg, copied to ./config.cfg.")
 
@@ -102,12 +121,9 @@ def makeConfig(self):
             self.fullReduction = interactiveNIFSInput()
 
         if self.fullReduction:
-            # Copy default input and use it
-            if os.path.exists('./' + self.configFile):
-                os.remove('./' + self.configFile)
-            shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', './'+ self.configFile)
+            self.overwriteWithDefault(self.configFile)
             # Update default config file with path to raw data or program ID.
-            with open('./' + self.configFile, 'r') as self.config_file:
+            with open(self.configFile, 'r') as self.config_file:
                 self.config = ConfigObj(self.config_file, unrepr=True)
                 self.sortConfig = self.config['sortConfig']
                 if self.fullReduction[0] == "G":
@@ -118,6 +134,19 @@ def makeConfig(self):
                     # Else treat it as a path.
                     self.sortConfig['program'] = ""
                     self.sortConfig['rawPath'] = self.fullReduction
-            with open('./' + self.configFile, 'w') as self.outfile:
+            with open(self.configFile, 'w') as self.outfile:
                 self.config.write(self.outfile)
             logging.info("\nData reduction parameters for this reduction were copied from recipes/defaultConfig.cfg to ./config.cfg.")
+
+        # If user selects a non-default data source, change it in the config file.
+        if self.dataSource != 'GSA':
+            try:
+                self.checkConfigExists(self.configFile)
+                with open(self.configFile, 'r') as self.config_file:
+                    self.config = ConfigObj(self.config_file, unrepr=True)
+                    self.config['sortConfig']['dataSource'] = self.dataSource
+                with open(self.configFile, 'w') as self.outfile:
+                    self.config.write(self.outfile)
+                logging.debug("Set dataSource option in config file.")
+            except:
+                raise ValueError("Failed to set dataSource option.")