PyEllips · MarJMue · Oct 1, 2024 · Oct 1, 2024 · domna · Oct 1, 2024
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -14,11 +14,12 @@ ipython
 ipywidgets
 sphinx-gallery
 sphinx-plotly-directive
-sphinxcontrib-mermaid 
+sphinxcontrib-mermaid
 matplotlib
 h5py
 pyyaml
 importlib-resources
 rapidfuzz
 lark>=1.1.5
-pint
+pint
+chardet
diff --git a/pyproject.toml b/pyproject.toml
@@ -8,7 +8,7 @@ description = "An ellipsometry analysis tool for reproducible and comprehensible
 dynamic = ["version"]
 authors = [
     { name = "Marius Müller", email = "marius.mueller@physik.uni-giessen.de" },
-    { name = "Florian Dobener", email = "pyelli@schroedingerscat.org" }
+    { name = "Florian Dobener", email = "pyelli@schroedingerscat.org" },
 ]
 requires-python = ">=3.8"
 license = { file = "LICENSE.txt" }
@@ -19,7 +19,7 @@ classifiers = [
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12"
+    "Programming Language :: Python :: 3.12",
 ]
 dependencies = [
     "scipy",
@@ -32,6 +32,7 @@ dependencies = [
     "rapidfuzz",
     "lark>=1.1.5",
     "pint",
+    "chardet",
 ]
 
 [project.optional-dependencies]
@@ -75,16 +76,16 @@ indent-width = 4
 
 [tool.ruff.lint]
 select = [
-    "E", # pycodestyle
-    "W", # pycodestyle
-    "PL", # pylint
+    "E",      # pycodestyle
+    "W",      # pycodestyle
+    "PL",     # pylint
     "NPY201", # numpy
 ]
 ignore = [
-    "E501", # Line too long ({width} > {limit} characters)
-    "E701", # Multiple statements on one line (colon)
-    "E731", # Do not assign a lambda expression, use a def
-    "E402",  # Module level import not at top of file
+    "E501",    # Line too long ({width} > {limit} characters)
+    "E701",    # Multiple statements on one line (colon)
+    "E731",    # Do not assign a lambda expression, use a def
+    "E402",    # Module level import not at top of file
     "PLR0911", # Too many return statements
     "PLR0912", # Too many branches
     "PLR0913", # Too many arguments in function definition

diff --git a/requirements/dev-requirements.txt b/requirements/dev-requirements.txt
@@ -28,6 +28,12 @@ cfgv==3.4.0 \
     --hash=sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9 \
     --hash=sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560
     # via pre-commit
+chardet==5.2.0 \
+    --hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \
+    --hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970
+    # via
+    #   -r requirements/fitting-requirements.txt
+    #   pyelli (pyproject.toml)
 comm==0.2.2 \
     --hash=sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e \
     --hash=sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3
@@ -1278,7 +1284,6 @@ typing-extensions==4.12.2 \
     #   -r requirements/fitting-requirements.txt
     #   flexcache
     #   flexparser
-    #   ipython
     #   pint
 tzdata==2024.1 \
     --hash=sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd \

diff --git a/requirements/fitting-requirements.txt b/requirements/fitting-requirements.txt
@@ -14,6 +14,12 @@ asttokens==2.4.1 \
     --hash=sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24 \
     --hash=sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0
     # via stack-data
+chardet==5.2.0 \
+    --hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \
+    --hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970
+    # via
+    #   -r requirements/requirements.txt
+    #   pyelli (pyproject.toml)
 comm==0.2.2 \
     --hash=sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e \
     --hash=sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3
@@ -764,7 +770,6 @@ typing-extensions==4.12.2 \
     #   -r requirements/requirements.txt
     #   flexcache
     #   flexparser
-    #   ipython
     #   pint
 tzdata==2024.1 \
     --hash=sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd \

diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -4,6 +4,10 @@ appdirs==1.4.4 \
     --hash=sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41 \
     --hash=sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128
     # via pint
+chardet==5.2.0 \
+    --hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \
+    --hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970
+    # via pyelli (pyproject.toml)
 flexcache==0.3 \
     --hash=sha256:18743bd5a0621bfe2cf8d519e4c3bfdf57a269c15d1ced3fb4b64e0ff4600656 \
     --hash=sha256:d43c9fea82336af6e0115e308d9d33a185390b8346a017564611f1466dcd2e32

diff --git a/src/elli/importer/encoding_detection.py b/src/elli/importer/encoding_detection.py
@@ -0,0 +1,14 @@
+import chardet
+
+
+def detect_encoding(fname: str) -> str:
+    r"""Detects the encoding of file fname.
+    Args:
+      fname (str): Filename
+    Returns:
+      str: Encoding identifier string.
+    """
+    with open(fname, "rb") as f:
+        raw_data = f.read()
+    result = chardet.detect(raw_data)
+    return result["encoding"]
diff --git a/src/elli/importer/spectraray.py b/src/elli/importer/spectraray.py
@@ -9,6 +9,7 @@
 from packaging.version import Version, parse
 
 from ..utils import calc_rho
+from .encoding_detection import detect_encoding
 
 
 def read_spectraray_psi_delta(
@@ -25,10 +26,13 @@ def read_spectraray_psi_delta(
         pd.DataFrame: DataFrame containing the psi/delta data in
         the format to be further processes inside pyElli.
     """
+    # detect encoding
+    encoding = detect_encoding(fname)
 
     # read data and drop empty column
     psi_delta_df = pd.read_csv(
         fname,
+        encoding=encoding,
         index_col=0,
         header=None,
         sep=sep,
@@ -82,9 +86,11 @@ def read_spectraray_mmatrix(
         pd.DataFrame: DataFrame containing the psi/delta data in
         the format to be further processes inside pyElli.
     """
-    mueller_matrix = pd.read_csv(fname, sep=sep, decimal=decimal, index_col=0).iloc[
-        :, -17:-1
-    ]
+    encoding = detect_encoding(fname)
+
+    mueller_matrix = pd.read_csv(
+        fname, encoding=encoding, sep=sep, decimal=decimal, index_col=0
+    ).iloc[:, -17:-1]
     mueller_matrix.index.name = "Wavelength"
     mueller_matrix.columns = [
         "M11",

diff --git a/src/elli/importer/woollam.py b/src/elli/importer/woollam.py
@@ -12,6 +12,7 @@
 
 from ..units import ureg
 from ..utils import calc_rho
+from .encoding_detection import detect_encoding
 
 logger = logging.getLogger(__name__)
 
@@ -167,7 +168,9 @@ def read_woollam_psi_delta(fname: str) -> pd.DataFrame:
         the format to be further processes inside pyElli.
     """
 
-    with open(fname, encoding="utf-8") as fobj:
+    encoding = detect_encoding(fname)
+
+    with open(fname, encoding=encoding) as fobj:
         line_number = fobj.tell()
         metadata = []
         file_format = ""