Skip to content

Commit

Permalink
hxlquickmeta (#9): created bin/hxlquickmeta based on hxl2example
Browse files Browse the repository at this point in the history
  • Loading branch information
fititnt committed Feb 17, 2021
1 parent ad337aa commit eb8de4c
Show file tree
Hide file tree
Showing 2 changed files with 401 additions and 0 deletions.
385 changes: 385 additions & 0 deletions bin/hxlquickmeta
Original file line number Diff line number Diff line change
@@ -0,0 +1,385 @@
#!/usr/bin/env python3
# ==============================================================================
#
# FILE: hxlquickmeta
#
# USAGE: hxlquickmeta hxlated-data.hxl my-exported-file.example
# cat hxlated-data.hxl | hxlquickmeta > my-exported-file.example
# # Via web, in two different terminals, do it
# hug -f bin/hxlquickmeta
# ngrok http 8000
#
# DESCRIPTION: hxlquickmeta is an example script to create other scripts with
# some bare minimum command line interface that could work.
# With exception of external libraries, the hxlquickmeta is
# meant to be somewhat self-contained one-file executable ready
# to just be added to the path.
#
# Hug API can be used to create an ad-hoc web interface to your
# script. This can be both useful if you are using an software
# that accepts an URL as data source and you don't want to use
# this script to save a file locally.
#
# OPTIONS: ---
#
# REQUIREMENTS: - python3
# - libhxl (https://pypi.org/project/libhxl/)
# - hug (https://github.com/hugapi/hug/)
# BUGS: ---
# NOTES: ---
# AUTHOR: Emerson Rocha <rocha[at]ieee.org>
# COMPANY: EticaAI
# LICENSE: Public Domain dedication
# SPDX-License-Identifier: Unlicense
# VERSION: v0.6.5
# CREATED: 2021-02-17 03:55 UTC
# REVISION: ---
# ==============================================================================

import sys
import os
import logging
import argparse

# @see https://github.com/HXLStandard/libhxl-python
# pip3 install libhxl --upgrade
# Do not import hxl, to avoid circular imports
import hxl.converters
import hxl.filters
import hxl.io

import tempfile

# @see https://github.com/hugapi/hug
# pip3 install hug --upgrade
import hug

# In Python2, sys.stdin is a byte stream; in Python3, it's a text stream
STDIN = sys.stdin.buffer


class HXLQuickMeta:
"""
HXLQuickMeta is a classe to export already HXLated data in the format
example.
"""

def __init__(self):
"""
Constructs all the necessary attributes for the HXLQuickMeta object.
"""
self.hxlhelper = None
self.args = None

# Posix exit codes
self.EXIT_OK = 0
self.EXIT_ERROR = 1
self.EXIT_SYNTAX = 2

def make_args_hxlquickmeta(self):

self.hxlhelper = HXLUtils()
parser = self.hxlhelper.make_args(
description=("hxlquickmeta is an example script to create other "
"scripts with some bare minimum command line "
"interfaces that could work to export HXL files to "
"other formats."))

self.args = parser.parse_args()
return self.args

def execute_cli(self, args,
stdin=STDIN, stdout=sys.stdout, stderr=sys.stderr):
"""
The execute_cli is the main entrypoint of HXLQuickMeta. When
called will convert the HXL source to example format.
"""

# NOTE: the next lines, in fact, only generate an csv outut. So you
# can use as starting point.
with self.hxlhelper.make_source(args, stdin) as source, \
self.hxlhelper.make_output(args, stdout) as output:
hxl.io.write_hxl(output.output, source,
show_tags=not args.strip_tags)

return self.EXIT_OK

def execute_web(self, source_url, stdin=STDIN, stdout=sys.stdout,
stderr=sys.stderr, hxlmeta=False):
"""
The execute_web is the main entrypoint of HXL2Tab when this class is
called outside command line interface, like the build in HTTP use with
hug
"""

# TODO: the execute_web needs to output the tabfile with correct
# mimetype, compression, etc
# (fititnt, 2021-02-07 15:59 UTC)

self.hxlhelper = HXLUtils()

try:
temp_input = tempfile.NamedTemporaryFile('w')
temp_output = tempfile.NamedTemporaryFile('w')

webargs = type('obj', (object,), {
"infile": source_url,
"sheet_index": None,
"selector": None,
'sheet': None,
'http_header': None,
'ignore_certs': False
})

with self.hxlhelper.make_source(webargs, stdin) as source:
for line in source.gen_csv(True, True):
temp_input.write(line)

temp_input.seek(0)
# self.hxl2tab(temp_input.name, temp_output.name, False)

result_file = open(temp_input.name, 'r')
return result_file.read()

finally:
temp_input.close()
temp_output.close()

return self.EXIT_OK


class HXLUtils:
"""
HXLUtils contains functions from the Console scripts of libhxl-python
(HXLStandard/libhxl-python/blob/master/hxl/scripts.py) with few changes
to be used as class (and have one single place to change).
Last update on this class was 2021-01-25.
Author: David Megginson
License: Public Domain
"""

def __init__(self):

self.logger = logging.getLogger(__name__)

# Posix exit codes
self.EXIT_OK = 0
self.EXIT_ERROR = 1
self.EXIT_SYNTAX = 2

def make_args(self, description, hxl_output=True):
"""Set up parser with default arguments.
@param description: usage description to show
@param hxl_output: if True (default), include options for HXL output.
@returns: an argument parser, partly set up.
"""
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
'infile',
help='HXL file to read (if omitted, use standard input).',
nargs='?'
)
if hxl_output:
parser.add_argument(
'outfile',
help='HXL file to write (if omitted, use standard output).',
nargs='?'
)
parser.add_argument(
'--sheet',
help='Select sheet from a workbook (1 is first sheet)',
metavar='number',
type=int,
nargs='?'
)
parser.add_argument(
'--selector',
help='JSONPath expression for starting point in JSON input',
metavar='path',
nargs='?'
)
parser.add_argument(
'--http-header',
help='Custom HTTP header to send with request',
metavar='header',
action='append'
)
if hxl_output:
parser.add_argument(
'--remove-headers',
help='Strip text headers from the CSV output',
action='store_const',
const=True,
default=False
)
parser.add_argument(
'--strip-tags',
help='Strip HXL tags from the CSV output',
action='store_const',
const=True,
default=False
)
parser.add_argument(
"--ignore-certs",
help="Don't verify SSL connections (useful for self-signed)",
action='store_const',
const=True,
default=False
)
parser.add_argument(
'--log',
help='Set minimum logging level',
metavar='debug|info|warning|error|critical|none',
choices=['debug', 'info', 'warning', 'error', 'critical'],
default='error'
)
return parser

def add_queries_arg(
self,
parser,
help='Apply only to rows matching at least one query.'
):
parser.add_argument(
'-q',
'--query',
help=help,
metavar='<tagspec><op><value>',
action='append'
)
return parser

def do_common_args(self, args):
"""Process standard args"""
logging.basicConfig(
format='%(levelname)s (%(name)s): %(message)s',
level=args.log.upper())

def make_source(self, args, stdin=STDIN):
"""Create a HXL input source."""

# construct the input object
input = self.make_input(args, stdin)
return hxl.io.data(input)

def make_input(self, args, stdin=sys.stdin, url_or_filename=None):
"""Create an input object"""

if url_or_filename is None:
url_or_filename = args.infile

# sheet index
sheet_index = args.sheet
if sheet_index is not None:
sheet_index -= 1

# JSONPath selector
selector = args.selector

http_headers = self.make_headers(args)

return hxl.io.make_input(
url_or_filename or stdin,
sheet_index=sheet_index,
selector=selector,
allow_local=True, # TODO: consider change this for execute_web
http_headers=http_headers,
verify_ssl=(not args.ignore_certs)
)

def make_output(self, args, stdout=sys.stdout):
"""Create an output stream."""
if args.outfile:
return FileOutput(args.outfile)
else:
return StreamOutput(stdout)

def make_headers(self, args):
# get custom headers
header_strings = []
header = os.environ.get("HXL_HTTP_HEADER")
if header is not None:
header_strings.append(header)
if args.http_header is not None:
header_strings += args.http_header
http_headers = {}
for header in header_strings:
parts = header.partition(':')
http_headers[parts[0].strip()] = parts[2].strip()
return http_headers


class FileOutput(object):
"""
FileOutput contains is based on libhxl-python with no changes..
Last update on this class was 2021-01-25.
Author: David Megginson
License: Public Domain
"""

def __init__(self, filename):
self.output = open(filename, 'w')

def __enter__(self):
return self

def __exit__(self, value, type, traceback):
self.output.close()


class StreamOutput(object):
"""
StreamOutput contains is based on libhxl-python with no changes..
Last update on this class was 2021-01-25.
Author: David Megginson
License: Public Domain
"""

def __init__(self, output):
self.output = output

def __enter__(self):
return self

def __exit__(self, value, type, traceback):
pass

def write(self, s):
self.output.write(s)


if __name__ == "__main__":

hxlquickmeta = HXLQuickMeta()
args = hxlquickmeta.make_args_hxlquickmeta()

hxlquickmeta.execute_cli(args)


@hug.format.content_type('text/csv')
def output_csv(data, response):
if isinstance(data, dict) and 'errors' in data:
response.content_type = 'application/json'
return hug.output_format.json(data)
response.content_type = 'text/csv'
if hasattr(data, "read"):
return data

return str(data).encode("utf8")


@hug.get('/hxlquickmeta.csv', output=output_csv)
def api_hxl2tab(source_url):
"""hxlquickmeta (@see https://github.com/EticaAI/HXL-Data-Science-file-formats)
Example:
http://localhost:8000/hxl2tab.tab?source_url=https://docs.google.com/spreadsheets/u/1/d/1l7POf1WPfzgJb-ks4JM86akFSvaZOhAUWqafSJsm3Y4/edit#gid=634938833
"""

hxlquickmeta = HXLQuickMeta()

return hxlquickmeta.execute_web(source_url)
Loading

0 comments on commit eb8de4c

Please sign in to comment.