Skip to content

Commit

Permalink
Move to using data on eos public and update everware URL (#14)
Browse files Browse the repository at this point in the history
* Update everware link and move to use eospublic

* Make everything more robust
  • Loading branch information
chrisburr authored Sep 29, 2017
1 parent a0fb22f commit 80d64a3
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 40 deletions.
90 changes: 52 additions & 38 deletions Data/setup_main_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,58 @@
from __future__ import print_function
from __future__ import division

import functools
import hashlib
import os
import functools
import warnings
import urllib
warnings.filterwarnings("ignore")
from subprocess import check_output

import numpy
from ipywidgets import interact
import pandas
from root_pandas import read_root
from scipy import stats as st
from matplotlib import pyplot as plt

rcParams['image.cmap'] = 'Blues' # change default colormap

warnings.filterwarnings("ignore")
rcParams['image.cmap'] = 'Blues' # change default colormap
pandas.set_option('display.max_columns', None)

# TODO Prevent this happening twice
_hist = pandas.Series.hist
@functools.wraps(_hist)
def new_hist(self, *args, **kwargs):
kwargs['histtype'] = 'step'
kwargs['grid'] = False
return _hist(self, *args, **kwargs)
pandas.Series.hist = new_hist

from root_pandas import read_root
def check_hash(filename, fn_hash, block_size=65536):
if not os.path.isfile(filename):
return False

hasher = hashlib.sha256()
with open(filename, 'rb') as afile:
buf = afile.read(block_size)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(block_size)

if hasher.hexdigest() == fn_hash:
return True
else:
print(filename, 'did not match expected hash, retrying')
os.remove(filename)
return False


try:
# Ensure this doesn't run twice
new_hist
except NameError:
_hist = pandas.Series.hist

@functools.wraps(_hist)
def new_hist(self, *args, **kwargs):
kwargs['histtype'] = 'step'
kwargs['grid'] = False
return _hist(self, *args, **kwargs)

pandas.Series.hist = new_hist

from scipy import stats as st
from matplotlib import pyplot as plt

def get_plot_func(data):
def plot_hist(bins, x_min, x_max):
Expand All @@ -38,26 +62,16 @@ def plot_hist(bins, x_min, x_max):
plt.ylabel('Number of Events')
return plot_hist

def download_data(filename, url, expected_hash):
while not os.path.isfile(filename):
try:
print('Downloading', filename)
urllib.urlretrieve (url, filename)
except Exception:
if not os.path.isfile(filename):
continue
_hash = hashlib.md5(open(filename, 'rb').read()).hexdigest()
if _hash != expected_hash:
print('Hash does not match for', filename, '- retrying')
os.remove(filename)

download_data(
'B2HHH_MagnetDown.root',
'https://cernbox.cern.ch/index.php/s/gPi4yJkPZrSBenW/download',
'7901d0070a0c74a13755f6878f420e92'
)
download_data(
'B2HHH_MagnetUp.root',
'https://cernbox.cern.ch/index.php/s/8rckTojLRJuEfTF/download',
'a2ccdd0441b9942f92929390c3b5221e'
)

eos_server = 'root://eospublic.cern.ch/'
data_dir = '/eos/opendata/lhcb/AntimatterMatters2017/data/'
filenames = {
'B2HHH_MagnetDown.root': 'b98651b24f825979053544c37010cf7ef9ce5c56ee62357c7e4ae2c392068379',
'B2HHH_MagnetUp.root': 'c42ad9e47931e1404bf94ad82ea22a0acd10bc9cfbb58e77a6b0fff08ead7859',
}

for fn, fn_hash in filenames.items():
while not check_hash('Data/'+fn, fn_hash):
fn_url = eos_server + data_dir + fn
print('Downloading', fn_url)
check_output('xrdcp ' + fn_url + ' ./Data/', shell=True)
2 changes: 1 addition & 1 deletion LHCb_Open_Data_Project.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@
},
"outputs": [],
"source": [
"real_data = read_root(['./B2HHH_MagnetDown.root', './B2HHH_MagnetUp.root'], where=preselection)"
"real_data = read_root(['Data/B2HHH_MagnetDown.root', 'Data/B2HHH_MagnetUp.root'], where=preselection)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ to get started. If you have access to an [`everware`](https://github.com/everwar
you can try out this repository simply by surfing there and then pasting
a link to this repository or just by clicking this badge

[![run at everware](https://cdn.rawgit.com/chrisburr/opendata-project/master/Images/badge.svg)](https://everware.rep.school.yandex.net/hub/oauth_login?repourl=https://github.com/lhcb/opendata-project.git)
[![run at everware](https://cdn.rawgit.com/chrisburr/opendata-project/master/Images/badge.svg)](https://everware.ysda.yandex.net/hub/oauth_login?repourl=https://github.com/lhcb/opendata-project.git)


If you've come here through everware then just click the "LHCb open data portal project"
Expand Down

0 comments on commit 80d64a3

Please sign in to comment.