Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
* upstream/master:
  test: Add travis config file
  Preprocess: Display table row separators via extra lines
  Preprocess: Remove invisible elements
  Preprocess: Convert CSS font-size values to pt
  Preprocess: Workaround bad formatting by using non-zero width
  Preprocess: Make remove_display_property_if_needed() more generic
  Preprocess: Convert subsequent inline block elements to tables
  Preprocess: Update cssless tests
  Preprocess: Fix span to table conversion when tr children have style
  Add tests for convert_span_tables_to_tr_td()
  Preprocess: Convert span tables to html tables
  Preprocess: extract file operations from preprocess_html_merge_cssless()
  • Loading branch information
PeterFeicht committed Aug 24, 2018
2 parents 74bc0b2 + 02da161 commit d27a540
Show file tree
Hide file tree
Showing 9 changed files with 2,999 additions and 306 deletions.
33 changes: 33 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Travis CI configuration file
# http://about.travis-ci.org/docs/
dist: trusty

language: python

# Available Python versions:
# http://about.travis-ci.org/docs/user/ci-environment/#Python-VM-images
python:
- "3.5"
- "3.6"

# Dependencies installation commands
install:
- pip install -U pip
- pip install -r requirements.txt

#before_script:
# - "if [[ $TRAVIS_PYTHON_VERSION != 2.6 ]]; then flake8 --config=config/flake8rc asm_collect.py asmtest tests; fi"
# - "if [[ $TRAVIS_PYTHON_VERSION != 2.6 ]]; then pylint --rcfile=config/pylintrc asm_collect.py asmtest tests; fi"

script:
- "python -m unittest discover"

notifications:
email: false

sudo: false
branches:
only:
- master
git:
depth: 300
2 changes: 1 addition & 1 deletion commands/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def preprocess_html_file(root, fn, rename_map):
for el in html.xpath('//*'):
if has_class(el, ['noprint', 'editsection']) and el.get('id') != 'cpp-footer-base':
el.getparent().remove(el)
if el.get('id') == 'toc':
if el.get('id') in ['toc', 'catlinks']:
el.getparent().remove(el)

# remove external links to unused resources
Expand Down
263 changes: 245 additions & 18 deletions commands/preprocess_cssless.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,39 +26,266 @@
import warnings
import io

def preprocess_html_merge_css(src_path, dst_path):
def preprocess_html_merge_cssless(src_path, dst_path):
with open(src_path, 'r') as a_file:
content = a_file.read()
parser = etree.HTMLParser()
stripped = content.strip()
root = etree.fromstring(stripped, parser)

output = preprocess_html_merge_css(root, src_path)
strip_style_tags(root)
remove_display_none(root)
convert_span_tables_to_tr_td(root)
convert_inline_block_elements_to_table(root)
convert_zero_td_width_to_nonzero(root)
convert_font_size_property_to_pt(root, 16)
convert_table_border_top_to_tr_background(root)

head = os.path.dirname(dst_path)
os.makedirs(head, exist_ok=True)

with open(dst_path, 'wb') as a_file:
root.getroottree().write(a_file, pretty_print=True, method="html",
encoding='utf-8')
return output

def silence_cssutils_warnings():
log = logging.Logger('ignore')
output = io.StringIO()
handler = logging.StreamHandler(stream=output)
formatter = logging.Formatter('%(levelname)s, %(message)s')
handler.setFormatter(formatter)
log.addHandler(handler)
# cssutils_logging_handler of Premailer.__init__ is insufficient to silence
# warnings to stderr in non-verbose mode
cssutils.log.setLog(log)

with open(src_path, 'r') as a_file:
content = a_file.read()
parser = etree.HTMLParser()
stripped = content.strip()
root = etree.fromstring(stripped, parser)
return output

def preprocess_html_merge_css(root, src_path):
# cssutils_logging_handler of Premailer.__init__ is insufficient to silence
# warnings to stderr in non-verbose mode
output = silence_cssutils_warnings()

with warnings.catch_warnings():
warnings.simplefilter("ignore")
premailer = Premailer(root, base_url=src_path,
disable_link_rewrites=True, remove_classes=True)

root = premailer.transform().getroot()

head = os.path.dirname(dst_path)
os.makedirs(head, exist_ok=True)
return output.getvalue()

# completely remove content of style tags and tags
nondata_tags = ['style']
strip_elements(root, *nondata_tags)
def strip_style_tags(root):
strip_elements(root, 'style')

with open(dst_path, 'wb') as a_file:
root.getroottree().write(a_file, pretty_print=True, method="html",
encoding='utf-8')
def needs_td_wrapper(element):
# element has table:row
if len(element.getchildren()) == 0:
return True
for el in element.getchildren():
if has_css_property_value(el, 'display', 'table-row') or \
has_css_property_value(el, 'display', 'table-cell'):
return False
return True

return output.getvalue()
def remove_css_property(element, property_name):
atrib = cssutils.parseStyle(element.get('style'))
atrib.removeProperty(property_name)
element.set('style', atrib.getCssText(separator=''))
if len(element.get('style')) == 0:
element.attrib.pop('style')


def get_css_property_value(el, prop_name):
atrib = cssutils.parseStyle(el.get('style'))
value = atrib.getPropertyCSSValue(prop_name)
if value:
return value.cssText
return None

def has_css_property_value(el, prop_name, prop_value):
value = get_css_property_value(el, prop_name)
if value and value == prop_value:
return True
return False

def set_css_property_value(el, prop_name, prop_value):
atrib = cssutils.parseStyle(el.get('style'))
atrib.setProperty(prop_name, prop_value)
el.set('style', atrib.getCssText(separator=''))

def convert_display_property_to_html_tag(element, element_tag, display_value):
str_attrib_value = element.get('style')
if str_attrib_value is None:
return False
if has_css_property_value(element, 'display', display_value):
element.tag = element_tag
remove_css_property(element, 'display')
return True

def convert_span_table_to_tr_td(table_el):
table_el.tag = 'table'
remove_css_property(table_el, 'display')

for element in table_el.getchildren():
tag_renamed = convert_display_property_to_html_tag(element, 'tr',
'table-row')
if tag_renamed:
if needs_td_wrapper(element):
td = etree.Element('td')
for el in element.getchildren():
element.remove(el)
td.append(el)
element.append(td)
td.text = element.text
element.text = None
else:
for child in element:
convert_display_property_to_html_tag(child, 'td',
'table-cell')

def wrap_element(el, tag_name, style):
new_el = etree.Element(tag_name)
new_el.set('style', style)
el.addprevious(new_el)
new_el.insert(0, el)

def remove_display_none(root_el):
for el in root_el.xpath('//*[contains(@style, "display")]'):
if has_css_property_value(el, 'display', 'none'):
el.getparent().remove(el)

def convert_span_tables_to_tr_td(root_el):

# note that the following xpath expressions match only the prefix of the
# CSS property value
table_els = root_el.xpath('//span[contains(@style, "display:table")]')

for table_el in table_els:
if has_css_property_value(table_el, 'display', 'table'):
convert_span_table_to_tr_td(table_el)
#wrap_element(table_el, 'div', 'display:inline')
#convert_span_table_to_tr_td(table_el)

return root_el

def convert_inline_block_elements_to_table(root_el):
for el in root_el.xpath('//*[contains(@style, "display")]'):
if not has_css_property_value(el, 'display', 'inline-block') and \
not has_css_property_value(el, 'display', 'inline-table'):
continue

elements_to_put_into_table = [el]
el = el.getnext()

# find subsequent inline block elements
while el is not None:
if has_css_property_value(el, 'display', 'inline-block') or \
has_css_property_value(el, 'display', 'inline-table'):
elements_to_put_into_table.append(el)
else:
break
el = el.getnext()

# only makes sense to put two or more to table
if len(elements_to_put_into_table) < 2:
continue

# create table and put elements into it
table_el = etree.Element('table')
table_el.set('style', 'padding:0; margin:0; border:none;')

elements_to_put_into_table[0].addprevious(table_el)

tr = etree.SubElement(table_el, 'tr')

for el in elements_to_put_into_table:
td = etree.SubElement(tr, 'td')
el.getparent().remove(el)
td.append(el)

def get_table_rows(table_el):
for el in table_el.iterchildren(['th', 'tr']):
yield el
for el in table_el.iterchildren(['thead', 'tbody', 'tfoot']):
for el2 in el.iterchildren(['th', 'tr']):
yield el2

def get_max_number_of_columns(table_el):
max_tds = 0
for row_el in get_table_rows(table_el):
max_tds = max(max_tds, len(list(row_el.iterchildren('td'))))
return max_tds

def clear_tr_border_top(tr_el):
for td_el in tr_el.iterchildren('td'):
remove_css_property(td_el, 'border-top')

def has_tr_border_top(tr_el):
for td_el in tr_el.iterchildren('td'):
if get_css_property_value(td_el, 'border-top') is not None:
return True
return False

def has_table_border_top(table_el):
for tr_el in get_table_rows(table_el):
if has_tr_border_top(tr_el):
return True
return False

def convert_table_border_top_to_tr_background(root_el):
for table_el in root_el.iter('table'):
if not has_table_border_top(table_el):
continue

td_count = get_max_number_of_columns(table_el)
for tr_el in get_table_rows(table_el):
if has_tr_border_top(tr_el):
# TODO: handle border properties
clear_tr_border_top(tr_el)
border_tr = etree.Element('tr')
border_td = etree.SubElement(border_tr, 'td')
border_td.set('colspan', str(td_count))
border_td.set('style', 'height:1px; font-size:1px; '
'background-color: #ccc;')
tr_el.addprevious(border_tr)


def convert_zero_td_width_to_nonzero(root_el):
for el in root_el.xpath('//*[contains(@style, "width")]'):
if has_css_property_value(el, 'width', '0%'):
el.attrib['width'] = "1px"
remove_css_property(el, 'width')

for el in root_el.xpath('//*[contains(@width, "0%")]'):
el.attrib['width'] = "1px"

def apply_font_size(size, parent_size_pt):
size = size.strip()

if size[-2:] == 'em':
value_number = float(size[:-2].strip())
return value_number*parent_size_pt

if size[-2:] in ['pt', 'px']:
return float(size[:-2].strip())

if size[-1] == '%':
value_number = float(size[:-1].strip())/100
return value_number*parent_size_pt

return parent_size_pt

def convert_font_size_property_to_pt_recurse(el, parent_size_pt):
size_value = get_css_property_value(el,"font-size")

if size_value:
el_size_pt = apply_font_size(size_value, parent_size_pt)
set_css_property_value(el, "font-size", "{}pt".format(el_size_pt))
else:
el_size_pt = parent_size_pt

for child in el.getchildren():
convert_font_size_property_to_pt_recurse(child, el_size_pt)

def convert_font_size_property_to_pt(root_el, default_size):
convert_font_size_property_to_pt_recurse(root_el, default_size)
3 changes: 2 additions & 1 deletion devhelp2qch.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from copy import deepcopy
import sys
import argparse
from index_transform.devhelp_qch import convert_devhelp_to_qch

def main():
parser = argparse.ArgumentParser(prog='devhelp2qch.py')
Expand All @@ -43,7 +44,7 @@ def main():
file_tree = etree.parse(file_path, parser)

out_f = open(dst_path, 'wb')
out_f.write(convert_devhelp_to_qch(in_tree.getroot(), file_tree.getroot(), v_folder)
out_f.write(convert_devhelp_to_qch(in_tree.getroot(), file_tree.getroot(), v_folder))
out_f.close()

if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion preprocess_qch.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def main():
paths_list.append(tuple)

with concurrent.futures.ProcessPoolExecutor() as executor:
futures = [ (executor.submit(preprocess_html_merge_css,
futures = [ (executor.submit(preprocess_html_merge_cssless,
src_path, dst_path), i)
for i, (src_path, dst_path) in enumerate(paths_list) ]

Expand Down
Loading

0 comments on commit d27a540

Please sign in to comment.