Skip to content

Latest commit

 

History

History
158 lines (123 loc) · 3.01 KB

general.adoc

File metadata and controls

158 lines (123 loc) · 3.01 KB

Python General Code

from HTMLParser import HTMLParser

class MLStripper(HTMLParser):
    def __init__(self):
        self.reset()
        self.fed = []

    def handle_data(self, d):
        self.fed.append(d)

    def get_data(self):
        return ' '.join(self.fed)


def strip_tags(html):
    s = MLStripper()
    s.feed(html)
    return s.get_data()

# now use strip_tags to remove html tags from string
def read_json_from_file(file_path):
    with open(file_path, 'r') as infile:
        return json.load(infile)
def write_json_to_file(file_path, token_map):
    with open(file_path, 'w') as f:
        json.dump(token_map, f)
import csv
with open(file, 'r') as infile:
    reader = csv.reader(infile, delimiter=',')
    for row in reader:
        print row[0], row[1]
import csv

numbers = range(10)

with open(file, 'r') as outfile:
    writer = csv.writer(outfile, delimiter=',')
    for number in numbers:
        writer.writerow([number, ])
dict_as_list = sorted(dict_object.items(), key=lambda x: x[1], reverse=True)
from os.path import join

import requests


def download_file(url, file_path):
    try:
        response = requests.get(url)
    except Exception as e:
        print 'Failed to download file from {} with error as {}'.format(url, e)
        return False
    if response.status_code == requests.codes.ok:
        with open(file_path, 'wb') as infile:
            infile.write(response.content)
        return True
    return False
import re


def remove_multiple_space(s):
    return re.sub('\s+', ' ', s)
import functools
import multiprocessing.pool

def timeout(max_timeout):
    """Timeout decorator, parameter in seconds."""
    def timeout_decorator(item):
        """Wrap the original function."""
        @functools.wraps(item)
        def func_wrapper(*args, **kwargs):
            """Closure for function."""
            pool = multiprocessing.pool.ThreadPool(processes=1)
            async_result = pool.apply_async(item, args, kwargs)
            # raises a TimeoutError if execution exceeds max_timeout
            return async_result.get(max_timeout)
        return func_wrapper
    return timeout_decorator


# value is in seconds
@timeout(2)
def my_function():
    print 'Function which requires timeout'