Skip to content

Commit

Permalink
Merge branch 'release/0.18.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
dan-blanchard committed Oct 24, 2013
2 parents 833b7da + 85bc9de commit cd67355
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 45 deletions.
13 changes: 8 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ notifications:
email: false

# Install stuff
virtualenv:
system_site_packages: true
before_install:
- if [ $GRIDMAP == "true" ]; then travis/install_sge.sh; fi
- export SGE_ROOT=/var/lib/gridengine
Expand All @@ -22,10 +20,15 @@ before_install:
- sudo mkdir /scratch/
- sudo chmod 777 /scratch/
- sudo apt-get update -qq
- sudo apt-get install libatlas-dev libatlas-base-dev liblapack-dev gfortran
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "3" ]; then sudo apt-get install libatlas-dev libatlas-base-dev liblapack-dev gfortran; fi
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then wget http://repo.continuum.io/miniconda/Miniconda-2.0.0-Linux-x86_64.sh -O miniconda.sh; else wget http://repo.continuum.io/miniconda/Miniconda3-2.0.0-Linux-x86_64.sh -O miniconda.sh; fi
- chmod +x miniconda.sh
- ./miniconda.sh -b
- export PATH=/home/travis/anaconda/bin:$PATH
install:
- conda install --yes pip python=$TRAVIS_PYTHON_VERSION numpy scipy
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then conda install --yes scikit-learn; fi
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then pip install --use-mirrors configparser; fi
- pip install -q numpy --use-mirrors
- pip install -r requirements.txt --use-mirrors
- pip install python-coveralls --use-mirrors
- pip install nose-cov --use-mirrors
Expand All @@ -36,7 +39,7 @@ install:

# Run test
script:
- nosetests --with-cov --cov skll --cov-config .coveragerc --logging-level=INFO
- nosetests --with-cov --cov skll --cov-config .coveragerc --logging-level=WARNING

# Calculate coverage
after_success:
Expand Down
7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ Requirements
Changelog
~~~~~~~~~

- v0.18.1

+ Updated `generate_predictions` to use latest API.
+ Switched to using multiprocessing-compatible logging. This should fix some
intermittent deadlocks.
+ Switched to using miniconda for install Python on Travis-CI.

- v0.18.0

+ Fixed crash when ``modelpath`` is blank and ``task`` is not
Expand Down
39 changes: 19 additions & 20 deletions scripts/generate_predictions
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Loads a trained model and outputs predictions based on input feature files.
:date: February 2013
'''

from __future__ import print_function, unicode_literals
from __future__ import absolute_import, print_function, unicode_literals

import argparse

Expand All @@ -40,13 +40,12 @@ class Predictor(object):
predictions for feature strings.
"""

def __init__(self, model_prefix, threshold=None, positive_class=1):
def __init__(self, model_path, threshold=None, positive_class=1):
'''
Initialize the predictor.
:param model_prefix: Prefix to use when loading trained model (and its
vocab).
:type model_prefix: basestring
:param model_path: Path to use when loading trained model.
:type model_path: str
:param threshold: If the model we're using is generating probabilities
of the positive class, return 1 if it meets/exceeds
the given threshold and 0 otherwise.
Expand All @@ -58,18 +57,14 @@ class Predictor(object):
for binary classification.
:type positive_class: int
'''
self._learner = Learner()
self._learner.load('{}.model'.format(model_prefix))
self._learner = Learner.from_file(model_path)
self._pos_index = positive_class
self.threshold = threshold

def predict(self, data):
'''
Return a list of predictions for a given numpy array of examples
(which are dicts)
Return a list of predictions for a given ExamplesTuple of examples.
'''
# Must make a list around a dictionary to fit format that
# Learner.predict expects
preds = self._learner.predict(data).tolist()

if self._learner.probability:
Expand All @@ -92,18 +87,14 @@ def main():
on input feature files.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
conflict_handler='resolve')
parser.add_argument('model_prefix', help='Prefix to use when loading \
trained model (and its vocab).')
parser.add_argument('model_file',
help='Model file to load and use for generating \
predictions.')
parser.add_argument('input_file',
help='A csv file, json file, or megam file \
(with or without the label column), \
with the appropriate suffix.',
nargs='+')
parser.add_argument('-l', '--has_labels',
help="Indicates that the input file includes \
labels and that the features start at the \
2nd column for csv and megam files.",
action='store_true')
parser.add_argument('-p', '--positive_class',
help="If the model is only being used to predict the \
probability of a particular class, this \
Expand All @@ -112,23 +103,31 @@ def main():
for binary classification. Keep in mind that \
classes are sorted lexicographically.",
default=1, type=int)
parser.add_argument('-q', '--quiet',
help='Suppress printing of "Loading..." messages.',
action='store_true')
parser.add_argument('-t', '--threshold',
help="If the model we're using is generating \
probabilities of the positive class, return 1 \
if it meets/exceeds the given threshold and 0 \
otherwise.",
type=float)
parser.add_argument('--tsv_label',
help='Name of the column which contains \
the class labels in TSV files.',
default='y')
parser.add_argument('--version', action='version',
version='%(prog)s {0}'.format(__version__))
args = parser.parse_args()

# Create the classifier and load the model
predictor = Predictor(args.model_prefix,
predictor = Predictor(args.model_file,
positive_class=args.positive_class,
threshold=args.threshold)

for input_file in args.input_file:
data = load_examples(input_file, has_labels=args.has_labels)
data = load_examples(input_file, quiet=args.quiet,
tsv_label=args.tsv_label)
for pred in predictor.predict(data):
print(pred)

Expand Down
4 changes: 0 additions & 4 deletions scripts/run_experiment
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,6 @@ def main():
version='%(prog)s {0}'.format(__version__))
args = parser.parse_args()

# initialize the logger
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s',
level=logging.INFO)

machines = None
if args.machines:
machines = args.machines.split(',')
Expand Down
6 changes: 3 additions & 3 deletions skll/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@
from __future__ import print_function, unicode_literals

import json
import logging
import os
import sys
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from csv import DictReader, DictWriter, excel_tab
from decimal import Decimal
from itertools import islice
from io import open
from multiprocessing import log_to_stderr
from operator import itemgetter

import numpy as np
Expand Down Expand Up @@ -413,7 +413,7 @@ def load_examples(path, quiet=False, sparse=True, tsv_label='y',
the feature matrix.
'''
# Setup logger
logger = logging.getLogger(__name__)
logger = log_to_stderr()

logger.debug('Path: {}'.format(path))

Expand Down Expand Up @@ -544,7 +544,7 @@ def write_feature_file(path, ids, classes, features, feat_vectorizer=None,
:type tsv_label: str
'''
# Setup logger
logger = logging.getLogger(__name__)
logger = log_to_stderr()

logger.debug('Feature vectorizer: {}'.format(feat_vectorizer))
logger.debug('Features: {}'.format(features))
Expand Down
13 changes: 6 additions & 7 deletions skll/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,14 @@
import datetime
import errno
import json
import logging
import math
import os
import sys
import tempfile
from collections import defaultdict
from io import open
from itertools import chain
from multiprocessing import Pool
from multiprocessing import log_to_stderr, Pool

import configparser # Backported version from Python 3
import numpy as np
Expand Down Expand Up @@ -114,7 +113,7 @@ def _write_summary_file(result_json_paths, output_file, ablation=False):
'''
learner_result_dicts = []
all_features = set()
logger = logging.getLogger(__name__)
logger = log_to_stderr()
for json_path in result_json_paths:
if not os.path.exists(json_path):
logger.error(('JSON results file {} not found. Skipping summary ' +
Expand Down Expand Up @@ -714,7 +713,7 @@ def run_configuration(config_file, local=False, overwrite=True, queue='all.q',
# Read configuration
config = _parse_config_file(config_file)

logger = logging.getLogger(__name__)
logger = log_to_stderr()
if not local and not _HAVE_GRIDMAP:
local = True
logger.warning('gridmap 0.10.1+ not available. Forcing local ' +
Expand Down Expand Up @@ -969,7 +968,7 @@ def _check_job_results(job_results):
'''
See if we have a complete results dictionary for every job.
'''
logger = logging.getLogger(__name__)
logger = log_to_stderr()
logger.info('checking job results')
for result_dicts in job_results:
if not result_dicts or 'task' not in result_dicts[0]:
Expand Down Expand Up @@ -1060,7 +1059,7 @@ def run_ablation(config_path, local=False, overwrite=True, queue='all.q',
# Read configuration
config = _parse_config_file(config_path)

logger = logging.getLogger(__name__)
logger = log_to_stderr()

featuresets = json.loads(_fix_json(config.get("Input", "featuresets")))
featureset_names = json.loads(_fix_json(config.get("Input",
Expand Down Expand Up @@ -1094,7 +1093,7 @@ def run_ablation(config_path, local=False, overwrite=True, queue='all.q',
try:
result_json_paths.extend(chain(*pool.map(_run_experiment_without_feature,
list(arg_tuples))))
# If we run_ablation is run via a subprocess (like nose does),
# If run_experiment is run via a subprocess (like nose does),
# this will fail, so just do things serially then.
except AssertionError:
del pool
Expand Down
5 changes: 2 additions & 3 deletions skll/learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@
from __future__ import absolute_import, print_function, unicode_literals

import inspect
import logging
import os
import sys
from collections import defaultdict
from functools import wraps
from multiprocessing import cpu_count
from multiprocessing import cpu_count, log_to_stderr

import numpy as np
import scipy.sparse as sp
Expand Down Expand Up @@ -107,7 +106,7 @@ def __init__(self, labels, keep, examples):
self._warned = False

def __iter__(self):
logger = logging.getLogger(__name__)
logger = log_to_stderr()
for train_index, test_index in super(FilteredLeaveOneLabelOut,
self).__iter__():
train_len = len(train_index)
Expand Down
4 changes: 2 additions & 2 deletions skll/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from __future__ import print_function, unicode_literals

import logging
from multiprocessing import log_to_stderr

import numpy as np
from scipy.stats import kendalltau, spearmanr, pearsonr
Expand Down Expand Up @@ -75,7 +75,7 @@ def kappa(y_true, y_pred, weights=None, allow_off_by_one=False):
for when building the weights matrix.
:type allow_off_by_one: bool
'''
logger = logging.getLogger(__name__)
logger = log_to_stderr()

# Ensure that the lists are both the same length
assert(len(y_true) == len(y_pred))
Expand Down
2 changes: 1 addition & 1 deletion skll/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@
:organization: ETS
'''

__version__ = '0.18.0'
__version__ = '0.18.1'
VERSION = tuple(int(x) for x in __version__.split('.'))

0 comments on commit cd67355

Please sign in to comment.