Skip to content

Commit 8d41acb

Browse files
remove all support for XML file support
closes gwpy#165
1 parent 0e62cba commit 8d41acb

File tree

10 files changed

+26
-159
lines changed

10 files changed

+26
-159
lines changed

docs/utilities/merge.rst

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Merge trigger files
22
###################
33

4-
In order to not end up with millions of small ``.root``, ``.hdf5``, and
5-
``.xml`` files each representing a
4+
In order to not end up with millions of small ``.root`` and ``.hdf5``
5+
files each representing a
66
small chunk of processed time, the ``omicron-process`` workflow will merge
77
contiguous files together using the ``omicron-merge-with-gaps`` command line utility.
88
The purpose of this control utility is to detect any gaps in the expected list
@@ -20,8 +20,6 @@ merge te contiguous trigger files:
2020
+------------+-----------+-------------------------------------------------+
2121
| HDF5 | ``.hdf5`` | ``omicron-hdf5-merge`` |
2222
+------------+-----------+-------------------------------------------------+
23-
| ligolw | ``.xml `` | ``ligolw_add`` and ``gzip`` |
24-
+------------+-----------+-------------------------------------------------+
2523
| Text | ``.txt `` | ``?`` |
2624
+------------+-----------+-------------------------------------------------+
2725

@@ -35,8 +33,6 @@ the :meth:`omicron.io.merge_hdf5_files` method:
3533

3634
.. automethod:: omicron.io.merge_hdf5_files
3735

38-
The ``ligolw_add`` is an external program contained in the ``lscsoft-glue`` package.
39-
4036

4137

4238
--------------------
@@ -52,8 +48,6 @@ message of each program:
5248

5349
.. command-output:: omicron-hdf5-merge --help
5450

55-
.. command-output:: ligolw_add --help
56-
5751
Reducing file count and disk space
5852
##################################
5953

docs/workflow/index.rst

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,7 @@ The ``omicron-process`` executable will do the following
5959
The DAG will normally do something like this:
6060

6161
#. process raw data using ``omicron.exe``
62-
#. merge contiguous output files with ``.root``, ``.h5``, and ``.xml`` extensions
63-
#. gzip ``.xml`` files to save space
62+
#. merge contiguous output files with ``.root`` and ``.h5`` extensions
6463
#. the merged files are copied to the archive directory, nominally
6564
``/home/detchar/triggers/<ifo>/<channel-filetag>/<metric day>``
6665
#. if everything completes successfully, trigger and log files are deleted
@@ -92,7 +91,7 @@ where the path components are as follows
9291

9392
e.g.::
9493

95-
~/triggers/L1/GDS_CALIB_STRAIN_OMICRON/12345/L1-GDS_CALIB_STRAIN_OMICRON-1234567890-100.xml.gz
94+
~/triggers/L1/GDS_CALIB_STRAIN_OMICRON/12345/L1-GDS_CALIB_STRAIN_OMICRON-1234567890-100.h5
9695

9796
-----------------------------------
9897
Processing a specific time interval

docs/workflow/simulations.rst

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -86,33 +86,3 @@ The results will be in the directory:
8686
.. code-block::
8787
8888
./run/merge/Z1:SIM-SINE_GAUSS/
89-
90-
One way to view them is to use ligolw_print. The results, formatted for readability:
91-
92-
.. code-block::
93-
94-
ligolw_print -t sngl_burst -c 'peak_time' -c 'snr' -c 'peak_frequency' \
95-
'run/merge/Z1:SIM-SINE_GAUSS/Z1-SIM_SINE_GAUSS_OMICRON-1346050820-196.xml.gz'
96-
97-
+------------+-----------+--------------+
98-
| peak_time | SNR | Frequency |
99-
+============+===========+==============|
100-
| 1346050868 | 37.1 | 236.4 |
101-
| 1346050875 | 22.1 | 480.7 |
102-
| 1346050882 | 15.2 | 771.3 |
103-
| 1346050888 | 37.1 | 243.9 |
104-
| 1346050894 | 21.4 | 459.1 |
105-
| 1346050902 | 13.7 | 699.8 |
106-
| 1346050908 | 36.5 | 266.1 |
107-
| 1346050915 | 22.4 | 480.7 |
108-
| 1346050922 | 13.9 | 771.3 |
109-
| 1346050928 | 36.0 | 236.4 |
110-
| 1346050935 | 22.1 | 480.6 |
111-
| 1346050942 | 14.2 | 771.3 |
112-
| 1346050948 | 38.7 | 243.9 |
113-
| 1346050954 | 24.3 | 459.1 |
114-
| 1346050962 | 15.0 | 699.7 |
115-
+------------+-----------+--------------+
116-
117-
118-

omicron/cli/merge_with_gaps.py

Lines changed: 6 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
prog_start_time = time.time()
2929
import argparse
3030
import glob
31-
import gzip
3231
import logging
3332
from .. import __version__
3433
from pathlib import Path
@@ -52,14 +51,12 @@
5251
def get_merge_cmd(ext):
5352
"""
5453
Determine the command used to coalescew individual trigger files
55-
:param str ext: file extension: xml, h5 or root
54+
:param str ext: file extension: h5 or root
5655
"""
5756
if ext == 'root':
5857
ret = 'omicron-root-merge'
5958
elif ext == 'h5':
6059
ret = 'omicron-hdf5-merge'
61-
elif 'xml' in ext:
62-
ret = 'ligolw_add'
6360
else:
6461
raise AttributeError(f'Unknown trigger file typr {ext}')
6562
ret_path = shutil.which(ret)
@@ -68,23 +65,7 @@ def get_merge_cmd(ext):
6865
return ret_path
6966

7067

71-
def is_old_ligolw(path):
72-
flag = "ilwd:char"
73-
if 'gz' in path.name:
74-
with gzip.open(str(path.absolute()), 'r') as gz:
75-
for line in gz:
76-
if flag in str(line):
77-
return True
78-
return False
79-
else:
80-
with path.open('r') as fp:
81-
for line in fp:
82-
if flag in line:
83-
return True
84-
return False
85-
86-
87-
def do_merge(opath, curfiles, chan, stime, etime, ext, skip_gzip):
68+
def do_merge(opath, curfiles, chan, stime, etime, ext):
8869
"""
8970
Given the list of trigger files merge them all into a single file
9071
:param Path opath: output directory
@@ -93,7 +74,6 @@ def do_merge(opath, curfiles, chan, stime, etime, ext, skip_gzip):
9374
:param int stime: Start GPS time for file list
9475
:param int etime: End GPS time
9576
:param str ext: trigger file extension, identifying file type
96-
:param boolean skip_gzip: if type is xml do not compress merged file
9777
"""
9878
outfile_path = opath / f'{chan}-{stime}-{etime - stime}.{ext}'
9979
ret = None
@@ -108,44 +88,20 @@ def do_merge(opath, curfiles, chan, stime, etime, ext, skip_gzip):
10888
returncode = 0
10989
else:
11090
cmd = [get_merge_cmd(ext)]
111-
if 'xml' in ext: # also accept xml.gz
112-
outfile_path = Path(str(outfile_path.absolute()).replace('.xml.gz', '.xml'))
113-
cmd.append(f'--output={outfile_path}')
114-
if is_old_ligolw(curfiles[0]):
115-
cmd.append('--ilwdchar-compat')
116-
logger.debug('Working with old ligolw format')
11791
for cur in curfiles:
11892
cmd.append(str(cur.absolute()))
119-
if 'xml' not in ext:
120-
cmd.append(str(outfile_path.absolute()))
93+
cmd.append(str(outfile_path.absolute()))
12194

12295
logger.info(f'Merging {len(curfiles)} {ext} files into {outfile_path}')
12396
logger.debug(f'Merge command:\n {" ".join(cmd)}')
12497
result = subprocess.run(cmd, capture_output=True)
12598
returncode = result.returncode
126-
err_old_fmt = b"invalid type 'ilwd:char'"
127-
if returncode == 1 and 'xml' in ext and err_old_fmt in result.stderr:
128-
# old ligolw format seems to be the problem
129-
cmd = [get_merge_cmd(ext), '--ilwdchar-compat', f'--output={outfile_path}']
130-
cmd.extend(curfiles)
131-
logger.info(f'Retry merging {len(curfiles)} into {outfile_path} using old xml format')
132-
result = subprocess.run(cmd, capture_output=True)
133-
returncode = result.returncode
134-
13599
if returncode == 0:
136100
logger.debug(f'Merge of {ext} files succeeded')
137101
else:
138102
logger.error(f'Return code:{returncode}, stderr:\n{result.stderr.decode("UTF-8")}')
139103

140-
if 'xml' in ext and returncode == 0 and not skip_gzip and outfile_path.suffix != '.gz':
141-
logger.info(f'Compressing {outfile_path} with gzip')
142-
res2 = subprocess.run(['gzip', '-9', '--force', outfile_path], capture_output=True)
143-
if res2.returncode == 0:
144-
ret = str(outfile_path.absolute()) + '.gz'
145-
else:
146-
logger.error(f'gzip error on {outfile_path}:\n {res2.stderr.decode("UTF-8")}')
147-
else:
148-
ret = str(outfile_path.absolute())
104+
ret = str(outfile_path.absolute())
149105

150106
return ret
151107

@@ -163,11 +119,6 @@ def valid_file(path, uint_bug):
163119
if path.exists():
164120
if path.name.endswith('.h5'):
165121
table = EventTable.read(path, path='/triggers')
166-
elif path.name.endswith('.xml.gz') or path.name.endswith('.xml'):
167-
if uint_bug:
168-
sed_cmd = ['sed', '-i', '', '-e', 's/uint_8s/int_8u/g', str(path.absolute())]
169-
subprocess.run(sed_cmd)
170-
table = EventTable.read(path, tablename='sngl_burst')
171122
elif path.name.endswith('.root'):
172123
# reading root files fail if there is a : in the name
173124
cwd = Path.cwd()
@@ -200,10 +151,6 @@ def main():
200151
parser.add_argument('-o', '--out-dir', help='Path to output directory for merged files')
201152
parser.add_argument('-n', '--no-merge', action='store_true', default=False,
202153
help='Do not merge files, only copy to output indir')
203-
parser.add_argument('--no-gzip', action='store_true', default=False,
204-
help='Do not compress the ligolw xml files')
205-
parser.add_argument('--uint-bug', default=False, action='store_true',
206-
help='Fix problem XML files created by old version of Omicron beforew merging.')
207154
parser.add_argument('--file-list', help='File with list of input file paths, one per line')
208155
parser.add_argument('infiles', nargs='*', help='List of paths to files to merge or copy')
209156

@@ -309,7 +256,7 @@ def main():
309256
curfiles.append(inpath)
310257
else:
311258
# break in continuity or start of a new metric day
312-
outfile = do_merge(out_dir, curfiles, name, start_time, end_time, ext, args.no_gzip)
259+
outfile = do_merge(out_dir, curfiles, name, start_time, end_time, ext)
313260
if outfile:
314261
outfiles.append(outfile)
315262
else:
@@ -320,7 +267,7 @@ def main():
320267
end_time = etime
321268
curfiles = [inpath]
322269
if curfiles:
323-
outfile = do_merge(out_dir, curfiles, name, start_time, end_time, ext, args.no_gzip)
270+
outfile = do_merge(out_dir, curfiles, name, start_time, end_time, ext)
324271
if outfile:
325272
outfiles.append(outfile)
326273
else:

omicron/cli/process.py

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -422,25 +422,12 @@ def create_parser():
422422
default=False,
423423
help='skip running omicron-hdf5-merge (default: %(default)s)',
424424
)
425-
pipeg.add_argument(
426-
'--skip-ligolw_add',
427-
action='store_true',
428-
default=False,
429-
help='skip running ligolw_add (default: %(default)s)',
430-
)
431-
pipeg.add_argument(
432-
'--skip-gzip',
433-
action='store_true',
434-
default=False,
435-
help='skip running gzip (default: %(default)s)',
436-
)
437425
pipeg.add_argument(
438426
'--skip-postprocessing',
439427
action='store_true',
440428
default=False,
441429
help='skip all post-processing, equivalent to '
442430
'--skip-root-merge --skip-hdf5-merge '
443-
'--skip-ligolw_add --skip-gzip '
444431
'(default: %(default)s)',
445432
)
446433
pipeg.add_argument(
@@ -484,13 +471,14 @@ def main(args=None):
484471
"--executable on the command line")
485472

486473
# validate processing options
487-
if all((args.skip_root_merge, args.skip_hdf5_merge, args.skip_ligolw_add,
488-
args.skip_gzip, not args.archive)):
474+
if all((args.skip_root_merge, args.skip_hdf5_merge, not args.archive)):
489475
args.skip_postprocessing = True
490476
if args.archive:
491477
argsd = vars(args)
492-
for arg in ['skip-root-merge', 'skip-hdf5-merge',
493-
'skip-ligolw-add', 'skip-gzip']:
478+
for arg in [
479+
'skip-root-merge',
480+
'skip-hdf5-merge',
481+
]:
494482
if argsd[arg.replace('-', '_')]:
495483
parser.error(f"Cannot use --{arg} with --archive")
496484

@@ -1050,8 +1038,6 @@ def main(args=None):
10501038
prog_path['omicron-merge'] = find_executable('omicron-merge-with-gaps')
10511039
prog_path['rootmerge'] = find_executable('omicron-root-merge')
10521040
prog_path['hdf5merge'] = find_executable('omicron-hdf5-merge')
1053-
prog_path['ligolw_add'] = find_executable('ligolw_add')
1054-
prog_path['gzip'] = find_executable('gzip')
10551041
prog_path['omicron_archive'] = find_executable('omicron-archive')
10561042

10571043
goterr = list()
@@ -1175,20 +1161,6 @@ def main(args=None):
11751161
f' --out-dir {mergepath} {hdf5files} ')
11761162
rmfiles.append(hdf5files)
11771163

1178-
# add LIGO_LW operations
1179-
if 'xml' in fileformats:
1180-
xmlfiles = ' '.join(omicronfiles[c]['xml'])
1181-
for f in omicronfiles[c]['xml']:
1182-
ppnode.add_input_file(f)
1183-
1184-
no_merge = '--no-merge' if args.skip_ligolw_add else ''
1185-
no_gzip = '--no-gzip' if args.skip_gzip else ''
1186-
operations.append(
1187-
f' {prog_path["omicron-merge"]} {no_merge} {no_gzip} --uint-bug '
1188-
f' --out-dir {mergepath} {xmlfiles} ')
1189-
1190-
rmfiles.append(xmlfiles)
1191-
11921164
# add ASCII operations
11931165
if 'txt' in fileformats:
11941166
txtfiles = ' '.join(omicronfiles[c]['txt'])

omicron/cli/show.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def create_parser():
116116
'-t',
117117
'--file-type',
118118
default='xml.gz',
119-
choices=['root', 'xml.gz', 'h5'],
119+
choices=['root', 'h5'],
120120
help='type of files to find',
121121
)
122122

@@ -236,25 +236,11 @@ def main(args=None):
236236
# -- read events ----------------------------------------------------------
237237

238238
# set default columns
239-
if not args.column and args.file_type == 'xml.gz':
240-
args.column = ['peak', 'peak_frequency', 'snr']
241-
elif not args.column:
239+
if not args.column:
242240
args.column = ['time', 'frequency', 'snr']
243241

244242
# read events (with simple filter on segments)
245-
if args.file_type == 'xml.gz':
246-
cname = args.channel.split(':', 1)[1]
247-
events = EventTable.read(
248-
cache,
249-
format='ligolw',
250-
tablename='sngl_burst',
251-
selection=[
252-
('peak', in_segmentlist, segs),
253-
'channel == "{0}"'.format(cname),
254-
],
255-
columns=set(args.column + ['peak', 'channel']),
256-
)
257-
elif args.file_type == 'root':
243+
if args.file_type == 'root':
258244
events = EventTable.read(
259245
cache,
260246
format='root',

omicron/nagios.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def find_archive_latency(channel, padding, frametype=None, state=None,
168168
-------
169169
latency : `dict`
170170
a `dict` of `(ext, latency)` pairs for each file extension stored in
171-
the archive ('root', 'xml.gz')
171+
the archive ('root', 'h5')
172172
"""
173173
ifo = channel[:2]
174174
obs = ifo[0]
@@ -182,7 +182,7 @@ def find_archive_latency(channel, padding, frametype=None, state=None,
182182
target -= padding
183183
# find latest file
184184
latency = {}
185-
for ext in ['root', 'xml.gz']:
185+
for ext in ['root', 'h5']:
186186
f = find_latest_omicron_file(channel, base, ext=ext)
187187
end = file_segment(f)[1]
188188
latency[ext] = (int(target - end), f)

omicron/parameters.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ def distribute_segment(self, start, end, nperjob=1):
403403
return out
404404

405405
def output_formats(self):
406-
return [fmt for fmt in ('root', 'txt', 'xml', 'hdf5') if
406+
return [fmt for fmt in ('root', 'txt', 'hdf5') if
407407
fmt in self.get('OUTPUT', 'FORMAT')]
408408

409409
def output_files(self, start, end, flatten=False):
@@ -432,7 +432,6 @@ def output_files(self, start, end, flatten=False):
432432
extension = {
433433
'root': 'root',
434434
'txt': 'txt',
435-
'xml': 'xml',
436435
'hdf5': 'h5',
437436
}
438437

omicron/tests/test_io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
def test_get_archive_filename():
2626
assert io.get_archive_filename('L1:GDS-CALIB_STRAIN', 0, 100) == (
2727
'%s/L1/GDS_CALIB_STRAIN_OMICRON/00000/'
28-
'L1-GDS_CALIB_STRAIN_OMICRON-0-100.xml.gz' % const.OMICRON_ARCHIVE)
28+
'L1-GDS_CALIB_STRAIN_OMICRON-0-100.h5' % const.OMICRON_ARCHIVE)
2929
assert io.get_archive_filename(
3030
'L1:GDS-CALIB_STRAIN', 1234567890, 123, archive='/triggers',
3131
filetag='TEST-TAg', ext='root') == (

0 commit comments

Comments
 (0)