Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add additions from CADC work #6

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c37893a
Merge pull request #1 from mrlb05/master
Nat1405 Feb 11, 2018
6b5b38c
Fixed mispelled module name
Nat1405 Mar 9, 2020
b70612e
Experimental fix for sorting problem.
Nat1405 Mar 12, 2020
3f67ce3
Fixed some non-standard wavelength crashes.
Nat1405 Mar 12, 2020
40f6373
Added exception handlers in nifsMerge.
Nat1405 Mar 22, 2020
8f12c2f
Fixes misspelled variable
Nat1405 May 6, 2020
8c59195
Adds CADC download option, plus fixes bugs.
Nat1405 May 7, 2020
3e7ffea
Adds support for downloads using requests.
Nat1405 May 7, 2020
443662e
Splits out getting of file to new method.
Nat1405 May 7, 2020
0aae71e
Fixes function naming conventions to match original project.
Nat1405 May 7, 2020
22c3f62
Changes '-c' CADC flag to '-d/--data-source' option.
Nat1405 May 7, 2020
6dd7cdd
Cleans up url construction code.
Nat1405 May 7, 2020
ec027de
Fixes silly bugs introduced by 74a11e9.
Nat1405 May 8, 2020
42f70df
Adds better error handling to get_file().
Nat1405 May 8, 2020
453c787
Fixes silly bugs in d5ead22e7.
Nat1405 May 11, 2020
e006513
Fixes path seperators (wasn't using os.path.join) in GetConfig.py.
Nat1405 May 14, 2020
061fd56
Sends downloaded CADC files to a temp location first.
Nat1405 May 14, 2020
fa13680
Adds CADC downloads MD5 verification.
Nat1405 May 14, 2020
ba4b95c
Changes -d/--data-source flag to -s/--data-source flag
Nat1405 May 14, 2020
7e57b99
Moves temp files to download directory.
Nat1405 May 14, 2020
ad6d231
Changes md5 verification to happen at the same time as downloads.
Nat1405 May 14, 2020
79a8f59
Makes CADC downloads use temp files (via tempfile.TemporaryFile).
Nat1405 May 14, 2020
f788b67
Adds backwards compatibility for old config files for dataSource option.
Nat1405 Jun 8, 2020
7ec3d72
Merge pull request #4 from Nat1405/cadc_downloads
Nat1405 Jul 9, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@

import os, os.path
import sys
import logging
from contextlib import closing
from StringIO import StringIO
import urllib2
import xml.dom.minidom as xmd
import tarfile
import hashlib

def download_query_gemini(query, dirname='', cookieName=''):
def download_query_gemini(program, dirname='', cookieName=''):
"""
Perform a user-specified Gemini science archive query and save the files
returned to a specified directory.
Expand All @@ -54,16 +55,25 @@ def download_query_gemini(query, dirname='', cookieName=''):
which should be optimal as long as the archive isn't unreasonably large
(to do: consider adding an option to write it to a temporary file).

# Modified 2020 by Nat Comeau

Parameters
----------

query : str
The query URL (or just the path component) to request from the server.
program : str
The Gemini program ID to request from the server.

dirname : str, optional
The (absolute or relative) directory path in which to place the files.

"""

# Modified 2020 by Nat Comeau
query = 'https://archive.gemini.edu/download/'+ str(program) + '/notengineering/NotFail/present/canonical'
logging.info('\nDownloading data from Gemini public archive to ./rawData. This will take a few minutes.')
logging.info('\nURL used for the download: \n' + str(query))


checksum_fn = 'md5sums.txt'
aux_fn = [checksum_fn, 'README.txt']

Expand Down
2 changes: 1 addition & 1 deletion nifty/pipeline/nifsLowMemoryPipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
# Import config parsing.
# Import config parsing.
from configobj.configobj import ConfigObj
from objectoriented.getConfig import GetConfig
from objectoriented.GetConfig import GetConfig
# Import custom Nifty functions.
from nifsUtils import datefmt, printDirectoryLists, writeList, getParam, interactiveNIFSInput

Expand Down
9 changes: 3 additions & 6 deletions nifty/pipeline/nifsPipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
# Import configuration file parsing.
from configobj.configobj import ConfigObj
# Import custom pipeline setup Class.
from objectoriented.getConfig import GetConfig
from objectoriented.GetConfig import GetConfig
# Conveniently import some utility functions so we don't have to type the full name.
from nifsUtils import datefmt, printDirectoryLists, writeList, getParam, interactiveNIFSInput

Expand Down Expand Up @@ -77,7 +77,7 @@

# The current version:
# TODO(nat): fix this to import the version from setup.py.
__version__ = "1.0.0"
__version__ = "2.0.0"

# The time when Nifty was started is:
startTime = str(datetime.now())
Expand All @@ -101,7 +101,6 @@ def start(args):
"""
# Save starting path for later use and change one directory up.
path = os.getcwd()
print "IT WORKED!"
# Get paths to built-in Nifty data. Special code in setup.py makes sure recipes/ and
# runtimeData/ will be installed when someone installs Nifty, and accessible in this way.
RECIPES_PATH = pkg_resources.resource_filename('nifty', 'recipes/')
Expand All @@ -127,9 +126,7 @@ def start(args):
logging.info("# NIFTY #")
logging.info("# NIFS Data Reduction Pipeline #")
logging.info("# Version "+ __version__+ " #")
logging.info("# July 25th, 2017 #")
logging.info("# Marie Lemoine-Busserolle #")
logging.info("# Gemini Observatory, Hilo, Hawaii #")
logging.info("# 2020 #")
logging.info("# #")
logging.info("####################################\n")

Expand Down
118 changes: 113 additions & 5 deletions nifty/pipeline/nifsUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@

# STDLIB

import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re
import time, sys, calendar, astropy.io.fits, urllib, shutil, glob, os, fileinput, logging, smtplib, pkg_resources, math, re, collections, requests, hashlib, tempfile
import numpy as np
from xml.dom.minidom import parseString
from pyraf import iraf
from astroquery.cadc import Cadc

# LOCAL

Expand Down Expand Up @@ -74,7 +75,7 @@ def interactiveNIFSInput():

"""

logging.info("\nWelcome to Nifty! The current mode is NIFS data reduction.\n\nPress enter to accept default data reduction options.")
logging.info("\nWelcome to Nifty! The current mode is NIFS data reduction.\n\nPress enter to accept default data reduction options. Type 'yes' or 'no' when prompted.")

fullReduction = getParam(
"Do a full data reduction with default parameters loaded from recipes/defaultConfig.cfg? [no]: ",
Expand Down Expand Up @@ -196,7 +197,7 @@ def interactiveNIFSInput():
rawPath = getParam(
"Path to raw files directory? []: ",
"",
"An example of a valid raw files path string: \"/Users/nat/data/spaceMonster\""
"An example of a valid raw files path string: \"/Users/nat/data/\""
)
program = getParam(
"Gemini Program ID? []: ",
Expand All @@ -207,9 +208,14 @@ def interactiveNIFSInput():
proprietaryCookie = getParam(
"Cookie for proprietary downloads? []: ",
'',
"You can provide a cookie from you Gemini public archive login session to automatically "
"You can provide a cookie from your Gemini public archive login session to automatically " + \
"download proprietary data."
)
dataSource = getParam(
"Select a raw data source; 'GSA' for Gemini Science Archive, 'CADC' for Canadian Astronomy Data Centre. [GSA]: ",
'GSA',
"Automatic downloads can happen from either the Gemini Science Archive or the Canadian Astronomy Data Centre."
)
skyThreshold = getParam(
"Sky threshold? [2.0]: ",
2.0,
Expand Down Expand Up @@ -291,7 +297,7 @@ def interactiveNIFSInput():
# Some of these are disabled (for now!) because of bugs in interactive Pyraf tasks.
# TODO(nat): when interactive is fixed re-enable this.
# Temp fix:
hlineinter = getParam(
hLineInter = getParam(
"Interative H-line removal? [no]: ",
False,
"WARNING: This is currently broken due to bugs in interactive PyRAF tasks. Use with caution."
Expand Down Expand Up @@ -413,6 +419,7 @@ def interactiveNIFSInput():
config['sortConfig']['rawPath'] = rawPath
config['sortConfig']['program'] = program
config['sortConfig']['proprietaryCookie'] = proprietaryCookie
config['sortConfig']['dataSource'] = dataSource
config['sortConfig']['skyThreshold'] = skyThreshold
config['sortConfig']['sortTellurics'] = sortTellurics
config['sortConfig']['telluricTimeThreshold'] = telluricTimeThreshold
Expand Down Expand Up @@ -456,6 +463,21 @@ def interactiveNIFSInput():
config['mergeConfig']['use_pq_offsets'] = use_pq_offsets
config['mergeConfig']['im3dtran'] = im3dtran

# Convert yes/no responses to True/False
def update(u):
for k, v in u.iteritems():
if isinstance(v, collections.Mapping):
u[k] = update(u.get(k))
else:
if u[k] == 'yes':
u[k] = True
elif u[k] == 'no':
u[k] = False
return u

update(config)


with open('./config.cfg', 'w') as outfile:
config.write(outfile)

Expand Down Expand Up @@ -1163,3 +1185,89 @@ def MEFarith(MEF, image, op, result):
iraf.imarith(operand1=result+'['+str(i)+']', op=op, operand2 = image, result = result+'['+str(i)+', overwrite]', divzero = 0.0)

#-----------------------------------------------------------------------------#

def downloadQueryCadc(program, directory='./rawData'):
"""
Finds and downloads all CADC files for a particular gemini program ID to
the current working directory.
"""

cadc = Cadc()
job = cadc.create_async("SELECT observationID, publisherID, productID FROM caom2.Observation \
AS o JOIN caom2.Plane AS p ON o.obsID=p.obsID \
WHERE instrument_name='NIFS' AND proposal_id={}".format("'"+program+"'"))
job.run().wait()
job.raise_if_error()
result = job.fetch_result().to_table()

# Store product id's for later
pids = list(result['productID'])

urls = cadc.get_data_urls(result)
cwd = os.getcwd()
os.chdir(directory)
for url, pid in zip(urls, pids):
try:
filename = getFile(url)
logging.debug("Downloaded {}".format(filename))
except Exception as e:
logging.error("A frame failed to download.")
os.chdir(cwd)
raise e
os.chdir(cwd)


def getFile(url):
"""
Gets a file from the specified url and returns the filename.
"""
r = requests.get(url, stream=True)
# Parse out filename from header
try:
filename = re.findall("filename=(.+)", r.headers['Content-Disposition'])[0]
except KeyError:
# 'Content-Disposition' header wasn't found, so parse filename from URL
# Typical URL looks like:
# https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/data/pub/GEM/N20140505S0114.fits?RUNID=mf731ukqsipqpdgk
filename = (url.split('/')[-1]).split('?')[0]

# Write the fits file to the current directory, verifying the md5 hash as we go. Store partial results in a temporary file.
writeWithTempFile(r, filename)

return filename


def writeWithTempFile(request, filename):
""" Write the fits file, verifying the md5 hash as we go. Store partial results in a temporary file. """
temp_downloads_path = '.temp-downloads'
if not os.path.exists(temp_downloads_path):
os.mkdir(temp_downloads_path)
try:
server_checksum = request.headers['Content-MD5']
except KeyError:
# Catch case that header didn't contain a 'content-md5' header
logging.warning("Content-MD5 header not found for file {}. Skipping checksum validation.".format(filename))
server_checksum = None

# Write out content (first to a temp file) optionally doing an md5 verification.
download_checksum = hashlib.md5()
with tempfile.TemporaryFile(mode='w+b', prefix=filename, dir=temp_downloads_path) as f:
for chunk in request.iter_content(chunk_size=128):
f.write(chunk)
download_checksum.update(chunk)
if server_checksum and (server_checksum != download_checksum.hexdigest()):
logging.error("Problem downloading {} from {}.".format(filename, url))
raise IOError
f.seek(0)
with open(filename, 'w') as out_fp:
out_fp.write(f.read())

return filename






#-----------------------------------------------------------------------------#

47 changes: 38 additions & 9 deletions nifty/pipeline/objectoriented/GetConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,22 @@ def __init__(self, args, script):

self.makeConfig()

def checkConfigExists(self, configFile):
"""
Checks that a config file exists and if not, sets Nifty to use default configuration.
"""
if not os.path.exists(configFile):
shutil.copy(os.path.join(self.RECIPES_PATH,'defaultConfig.cfg'), configFile)

def overwriteWithDefault(self, configFile):
"""
Overwrites with default configuration.
"""
if os.path.exists(configFile):
os.remove(configFile)
shutil.copy(os.path.join(self.RECIPES_PATH,'defaultConfig.cfg'), configFile)


def makeConfig(self):
"""
Make a configuration file.
Expand All @@ -72,6 +88,8 @@ def makeConfig(self):
self.parser.add_argument('-i', '--interactive', dest = 'interactive', default = False, action = 'store_true', help = 'Create a config.cfg file interactively.')
# Ability to repeat the last data reduction
self.parser.add_argument('-r', '--repeat', dest = 'repeat', default = False, action = 'store_true', help = 'Repeat the last data reduction, loading saved reduction parameters from runtimeData/config.cfg.')
# Specify where downloads come from; either Gemini or CADC.
self.parser.add_argument('-s', '--data-source', dest = 'dataSource', default = 'GSA', action = 'store', help = 'Download raw data from the Canadian Astronomy Data Centre or the Gemini Science Archive. Valid options are "GSA" or "CADC".')
# Ability to load a built-in configuration file (recipe)
self.parser.add_argument('-l', '--recipe', dest = 'recipe', action = 'store', help = 'Load data reduction parameters from the a provided recipe. Default is default_input.cfg.')
# Ability to load your own configuration file
Expand All @@ -85,12 +103,13 @@ def makeConfig(self):
self.repeat = self.args.repeat
self.fullReduction = self.args.fullReduction
self.inputfile = self.args.inputfile
self.dataSource = self.args.dataSource

if self.inputfile:
# Load input from a .cfg file user specified at command line.
if self.inputfile != self.configFile and os.path.exists('./'+ self.configFile):
os.remove('./'+ self.configFile)
shutil.copy(self.inputfile, './'+ self.configFile)
if self.inputfile != self.configFile and os.path.exists(self.configFile):
os.remove(self.configFile)
shutil.copy(self.inputfile, self.configFile)
logging.info("\nPipeline configuration for this data reduction was read from " + str(self.inputfile) + \
", and if not named config.cfg, copied to ./config.cfg.")

Expand All @@ -102,12 +121,9 @@ def makeConfig(self):
self.fullReduction = interactiveNIFSInput()

if self.fullReduction:
# Copy default input and use it
if os.path.exists('./' + self.configFile):
os.remove('./' + self.configFile)
shutil.copy(self.RECIPES_PATH+'defaultConfig.cfg', './'+ self.configFile)
self.overwriteWithDefault(self.configFile)
# Update default config file with path to raw data or program ID.
with open('./' + self.configFile, 'r') as self.config_file:
with open(self.configFile, 'r') as self.config_file:
self.config = ConfigObj(self.config_file, unrepr=True)
self.sortConfig = self.config['sortConfig']
if self.fullReduction[0] == "G":
Expand All @@ -118,6 +134,19 @@ def makeConfig(self):
# Else treat it as a path.
self.sortConfig['program'] = ""
self.sortConfig['rawPath'] = self.fullReduction
with open('./' + self.configFile, 'w') as self.outfile:
with open(self.configFile, 'w') as self.outfile:
self.config.write(self.outfile)
logging.info("\nData reduction parameters for this reduction were copied from recipes/defaultConfig.cfg to ./config.cfg.")

# If user selects a non-default data source, change it in the config file.
if self.dataSource != 'GSA':
try:
self.checkConfigExists(self.configFile)
with open(self.configFile, 'r') as self.config_file:
self.config = ConfigObj(self.config_file, unrepr=True)
self.config['sortConfig']['dataSource'] = self.dataSource
with open(self.configFile, 'w') as self.outfile:
self.config.write(self.outfile)
logging.debug("Set dataSource option in config file.")
except:
raise ValueError("Failed to set dataSource option.")
Loading