Merge branch 'release/1.1.1' into stable

dan-blanchard · dan-blanchard · commit dcd08e1bff1f · 2015-10-23T13:12:52.000-04:00
diff --git a/doc/run_experiment.rst b/doc/run_experiment.rst
@@ -381,12 +381,12 @@ example, if you wanted to collapse the labels ``beagle`` and ``dachsund`` into a
 
 Any labels not included in the dictionary will be left untouched.
 
-.. _cv_folds:
+.. _num_cv_folds:
 
-cv_folds *(Optional)*
+num_cv_folds *(Optional)*
 """"""""""""""""""""""
 
-The number of folds to use for cross-validation. Defaults to 10.
+The number of folds to use for cross validation. Defaults to 10.
 
 .. _random_folds:
 
diff --git a/skll/config.py b/skll/config.py
@@ -194,14 +194,14 @@ def validate(self):
                                                   incorrectly_specified_options]))
 
 
-def _locate_file(file_path, config_path):
+def _locate_file(file_path, config_dir):
     if not file_path:
         return ''
-    path_to_check = file_path if isabs(file_path) else normpath(join(dirname(config_path), file_path))
+    path_to_check = file_path if isabs(file_path) else normpath(join(config_dir,
+                                                                     file_path))
     ans = exists(path_to_check)
     if not ans:
-        raise IOError(errno.ENOENT, "File does not exist",
-                      path_to_check)
+        raise IOError(errno.ENOENT, "File does not exist", path_to_check)
     else:
         return path_to_check
 
@@ -234,6 +234,7 @@ def _parse_config_file(config_path):
 
     # compute the absolute path for the config file
     config_path = realpath(config_path)
+    config_dir = dirname(config_path)
 
     # set up a config parser with the above default values
     config = _setup_config_parser(config_path)
@@ -419,10 +420,10 @@ def _parse_config_file(config_path):
         featuresets[0][0] += '_test_{}'.format(basename(test_file))
 
     # make sure all the specified paths/files exist
-    train_path = _locate_file(train_path, config_path)
-    test_path = _locate_file(test_path, config_path)
-    train_file = _locate_file(train_file, config_path)
-    test_file = _locate_file(test_file, config_path)
+    train_path = _locate_file(train_path, config_dir)
+    test_path = _locate_file(test_path, config_dir)
+    train_file = _locate_file(train_file, config_dir)
+    test_file = _locate_file(test_file, config_dir)
 
     # Get class mapping dictionary if specified
     class_map_string = config.get("Input", "class_map")
@@ -443,27 +444,31 @@ def _parse_config_file(config_path):
 
     # do we want to keep the predictions?
     prediction_dir = config.get("Output", "predictions")
-    if prediction_dir and not exists(prediction_dir):
-        prediction_dir = join(dirname(config_path), prediction_dir)
-        os.makedirs(prediction_dir)
+    if prediction_dir:
+        prediction_dir = join(config_dir, prediction_dir)
+        if not exists(prediction_dir):
+            os.makedirs(prediction_dir)
 
     # make sure log path exists
     log_path = config.get("Output", "log")
-    if log_path and not exists(log_path):
-        log_path = join(dirname(config_path), log_path)
-        os.makedirs(log_path)
+    if log_path:
+        log_path = join(config_dir, log_path)
+        if not exists(log_path):
+            os.makedirs(log_path)
 
     # make sure model path exists
     model_path = config.get("Output", "models")
-    if model_path and not exists(model_path):
-        model_path = join(dirname(config_path), model_path)
-        os.makedirs(model_path)
+    if model_path:
+        model_path = join(config_dir, model_path)
+        if not exists(model_path):
+            os.makedirs(model_path)
 
     # make sure results path exists
     results_path = config.get("Output", "results")
-    if results_path and not exists(results_path):
-        results_path = join(dirname(config_path), results_path)
-        os.makedirs(results_path)
+    if results_path:
+        results_path = join(config_dir, results_path)
+        if not exists(results_path):
+            os.makedirs(results_path)
 
     # 4. Tuning
     # do we need to run a grid search for the hyperparameters or are we just
diff --git a/skll/data/readers.py b/skll/data/readers.py
@@ -183,7 +183,7 @@ def read(self):
         ids = []
         labels = []
         with open(self.path_or_list, 'r' if PY3 else 'rb') as f:
-            for ex_num, (id_, class_, _) in enumerate(self._sub_read(f)):
+            for ex_num, (id_, class_, _) in enumerate(self._sub_read(f), start=1):
                 # Update lists of IDs, clases, and features
                 if self.ids_to_floats:
                     try:
@@ -212,9 +212,8 @@ def feat_dict_generator():
                 for ex_num, (_, _, feat_dict) in enumerate(self._sub_read(f)):
                     yield feat_dict
                     if ex_num % 100 == 0:
-                        self._print_progress('{:.8}%'.format(100 * ((ex_num +
-                                                                     1) /
-                                                                    total)))
+                        self._print_progress('{:.8}%'.format(100 * ((ex_num /
+                                                                    total))))
                 self._print_progress("100%")
 
         # Convert everything to numpy arrays
diff --git a/skll/learner.py b/skll/learner.py
@@ -709,7 +709,9 @@ def model_params(self):
                     if coef[idx]:
                         res['{}\t{}'.format(label, feat)] = coef[idx]
 
-            intercept = dict(zip(label_list, self.model.intercept_))
+            if self.model.intercept_:
+                intercept = dict(zip(label_list, self.model.intercept_))
+
         else:
             # not supported
             raise ValueError(("{} is not supported by" +
diff --git a/skll/utilities/print_model_weights.py b/skll/utilities/print_model_weights.py
@@ -14,6 +14,7 @@
 import sys
 
 from six import iteritems
+import numpy as np
 
 from skll import Learner
 from skll.version import __version__
@@ -63,7 +64,12 @@ def main(argv=None):
     if intercept is not None:
         # subclass of LinearModel
         if '_intercept_' in intercept:
-            print("intercept = {:.12f}".format(intercept['_intercept_']))
+            # Some learners (e.g. LinearSVR) may return a list of intercepts
+            if isinstance(intercept['_intercept_'], np.ndarray):
+                intercept_list = ["%.12f" % i for i in intercept['_intercept_']]
+                print("intercept = {}".format(intercept_list))
+            else:
+                print("intercept = {:.12f}".format(intercept['_intercept_']))
         else:
             print("== intercept values ==")
             for (label, val) in intercept.items():
diff --git a/skll/version.py b/skll/version.py
@@ -7,5 +7,5 @@
 :organization: ETS
 """
 
-__version__ = '1.1.0'
+__version__ = '1.1.1'
 VERSION = tuple(int(x) for x in __version__.split('.'))
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
@@ -8,6 +8,7 @@
 
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
+import ast
 
 import copy
 import itertools
@@ -27,7 +28,7 @@
     from mock import create_autospec, patch
 
 from nose.tools import eq_, assert_almost_equal, raises, assert_raises
-from numpy.testing import assert_array_equal, assert_allclose
+from numpy.testing import assert_array_equal, assert_allclose, assert_array_almost_equal
 
 import skll
 import skll.utilities.compute_eval_from_predictions as cefp
@@ -377,9 +378,12 @@ def check_print_model_weights(task='classification'):
     if task == 'classification':
         learner = Learner('LogisticRegression')
         learner.train(train_fs)
-    else:
+    elif task == 'regression':
         learner = Learner('LinearRegression')
         learner.train(train_fs, grid_objective='pearson')
+    else:
+        learner = Learner('LinearSVR')
+        learner.train(train_fs, grid_objective='pearson')
 
     # now save the model to disk
     model_file = join(_my_dir, 'output',
@@ -414,7 +418,7 @@ def check_print_model_weights(task='classification'):
         feature_values = [t[1] for t in sorted(feature_values)]
         assert_almost_equal(intercept, learner.model.intercept_[0])
         assert_allclose(learner.model.coef_[0], feature_values)
-    else:
+    elif task == 'regression':
         lines_to_parse = [l for l in out.split('\n') if l]
         intercept = safe_float(lines_to_parse[0].split('=')[1])
         feature_values = []
@@ -424,11 +428,28 @@ def check_print_model_weights(task='classification'):
         feature_values = [t[1] for t in sorted(feature_values)]
         assert_almost_equal(intercept, learner.model.intercept_)
         assert_allclose(learner.model.coef_, feature_values)
+    else:
+        lines_to_parse = [l for l in out.split('\n') if l]
+
+        intercept_list = ast.literal_eval(lines_to_parse[0].split('=')[1].strip())
+        intercept = []
+        for intercept_string in intercept_list:
+            intercept.append(safe_float(intercept_string))
+
+        feature_values = []
+        for ltp in lines_to_parse[1:]:
+            fields = ltp.split('\t')
+            feature_values.append((fields[1], safe_float(fields[0])))
+        feature_values = [t[1] for t in sorted(feature_values)]
+
+        assert_array_almost_equal(intercept, learner.model.intercept_)
+        assert_allclose(learner.model.coef_, feature_values)
 
 
 def test_print_model_weights():
     yield check_print_model_weights, 'classification'
     yield check_print_model_weights, 'regression'
+    yield check_print_model_weights, 'regression_linearSVR'
 
 
 def check_summarize_results_argparse(use_ablation=False):