diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 43129f6..fd46e91 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -700,7 +700,16 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False): if not dtype: dtype = PintType.construct_from_quantity_string(scalars[0]) - return cls._from_sequence([dtype.ureg.Quantity(x) for x in scalars], dtype) + # cache this lookup as it'll be used for every value + _Q = dtype.ureg.Quantity + + def quantity(value): + # Pandas seems to pass empty strings as NaNs + if isinstance(value, float) and np.isnan(value): + return np.nan + return _Q(value) + + return cls._from_sequence(list(map(quantity, scalars)), dtype) @classmethod def _from_factorized(cls, values, original): diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 61db471..3d62951 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -1,4 +1,6 @@ +import io import pickle +from textwrap import dedent import time import numpy as np @@ -344,3 +346,21 @@ class TestIssue247(BaseExtensionTests): result = a / a expected = pd.Series([1, 1, 1], dtype="pint[][Float64]") tm.assert_series_equal(result, expected) + + +class TestIssue267(BaseExtensionTests): + def test_missing_values(self): + "make sure that a missing values don't prevent the column from being imported" + data = dedent( + """\ + mass + 0,1lb + 1, + """ + ) + df = pd.read_csv(io.StringIO(data), dtype=dict(mass="pint[kg]")) + mass = df["mass"] + assert mass.dtype == PintType("kg") + + missing = pd.Series([False, True], name="mass") + tm.assert_equal(pd.isna(mass), missing)