Updating MirParser to enable reading in pre-v3 MIR data

RadioAstronomySoftwareGroup · Aug 11, 2023 · 3f7738b · 3f7738b
1 parent 425ce89
commit 3f7738b
Show file tree

Hide file tree

Showing 3 changed files with 163 additions and 14 deletions.
diff --git a/pyuvdata/uvdata/mir.py b/pyuvdata/uvdata/mir.py
@@ -151,6 +151,10 @@ def read_mir(
         # Use the mir_parser to read in metadata, which can be used to select data.
         mir_data = mir_parser.MirParser(filepath)
 
+        # Make sure that the mir file is v3 compliant, since correctly filling values
+        # into a UVData object depends on that.
+        mir_data._make_v3_compliant()
+
         if select_where is None:
             select_where = []
 

diff --git a/pyuvdata/uvdata/mir_meta_data.py b/pyuvdata/uvdata/mir_meta_data.py
@@ -2068,14 +2068,30 @@ def read(self, filepath):
         filepath : str
             Path of the folder containing the metadata in question.
         """
-        if self._binary_dtype is None:
-            self._data = np.fromfile(
-                os.path.join(filepath, self._filetype), dtype=self.dtype
-            )
-        else:
-            self._data = np.fromfile(
-                os.path.join(filepath, self._filetype), dtype=self._binary_dtype
-            ).astype(self.dtype)
+        self._data = np.fromfile(
+            os.path.join(filepath, self._filetype),
+            dtype=self.dtype if self._binary_dtype is None else self._binary_dtype,
+        )
+
+        if self._binary_dtype is not None:
+            try:
+                self._data = self._data.astype(self.dtype)
+            except UnicodeDecodeError:
+                # If we get a unicode error, that means that we have chars that are not
+                # in the 'normal' ascii range. This is a not uncommon occurrence in the
+                # codes_read, where char arrays contained uninitialized memory values.
+                # We only need to do this check for string-like fields.
+                check_fields = [
+                    idx
+                    for idx in range(len(codes_dtype))
+                    if np.issubdtype(codes_dtype[idx], np.str_)
+                ]
+                for idx in range(len(self._data)):
+                    for jdx in check_fields:
+                        marker = self._data[idx][jdx].find(b"\x00")
+                        if marker > 0:
+                            self._data[idx][jdx] = self._data[idx][jdx][:marker]
+                self._data = self._data.astype(self.dtype)
 
         self._mask = np.ones(self._size, dtype=bool)
         self._set_header_key_index_dict()
@@ -2666,12 +2682,12 @@ def __init__(self, filepath=None):
             Optional argument specifying the path to the Mir data folder.
         """
         super().__init__(
-            "codes_read",
-            codes_dtype,
-            None,
-            codes_binary_dtype,
-            ("icode", "v_name"),
-            filepath,
+            filetype="codes_read",
+            dtype=codes_dtype,
+            header_key_name=None,
+            binary_dtype=codes_binary_dtype,
+            pseudo_header_key_names=("icode", "v_name"),
+            filepath=filepath,
         )
 
         self._mutable_codes = [

diff --git a/pyuvdata/uvdata/mir_parser.py b/pyuvdata/uvdata/mir_parser.py
@@ -3505,3 +3505,132 @@ def redoppler_data(
             self.vis_data = self._chanshift_vis(
                 self.vis_data, shift_tuple_list, inplace=True, flag_adj=flag_adj
             )
+
+    def _make_v3_compliant(self):
+        """
+        Update MIR metadata for export to UVData.
+
+        This is an internal helper function, not meant to be called by users. This
+        function will modify or otherwise fill metadata in fields that were not
+        populated in MIR file versions < 3, in order to make in minimally compliant
+        with what the Mir.read method needs for populating a UVData object. Only data
+        sets recorded prior to 2020 need these modifications.
+        """
+        if "filever" in self.codes_data.get_code_names():
+            if self.codes_data["filever"][0] != "2":
+                # If the file version is already >= 3.0, then this is basically a no-op
+                return
+
+        warnings.warn(
+            "Pre v.3 MIR file format detected, modifying metadata to make in minimally "
+            "compliant for reading in with pyuvdata."
+        )
+
+        from datetime import datetime
+
+        from astropy.time import Time
+
+        from pyuvdata import get_telescope
+
+        from .. import utils as uvutils
+
+        sma_lat, sma_lon, sma_alt = get_telescope("SMA").telescope_location_lat_lon_alt
+
+        # in_data updates: mjd, lst, ara, adec
+        # First sort out the time stamps using the day reference inside codes_data, and
+        # then adding the delta from midnight (dhrs)
+        mjd_day_dict = self.codes_data["ref_time"]
+        for key, value in mjd_day_dict.items():
+            if isinstance(value, str):
+                mjd_day_dict[key] = Time(
+                    datetime.strptime(value, "%b %d, %Y"), scale="tt"
+                ).tt.mjd
+
+        mjd_arr = (self.in_data["dhrs"] / 24.0) + np.array(
+            [mjd_day_dict[idx] for idx in self.in_data["iref_time"]]
+        )
+
+        # Tally the JD dates, since that's used for various helper functions
+        jd_arr = Time(mjd_arr, format="mjd", scale="tt").utc.jd
+
+        # Calculate the LST at the time of obs
+        lst_arr = (12.0 / np.pi) * uvutils.get_lst_for_time(
+            jd_array=jd_arr,
+            latitude=np.rad2deg(sma_lat),
+            longitude=np.rad2deg(sma_lon),
+            altitude=sma_alt,
+        )
+
+        # Finally, calculate the apparent coordinates based on what we have in the data
+        app_ra, app_dec = uvutils.calc_app_coords(
+            lon_coord=self.in_data["rar"],
+            lat_coord=self.in_data["decr"],
+            time_array=jd_arr,
+            telescope_loc=(sma_lat, sma_lon, sma_alt),
+        )
+
+        # Update the fields accordingly
+        self.in_data["mjd"] = mjd_arr
+        self.in_data["lst"] = lst_arr
+        self.in_data["ara"] = app_ra
+        self.in_data["adec"] = app_dec
+
+        # bl_data updates: ant1rx, ant2rx, u, v, w
+        # First, update the antenna receiver fields if this is a non-polarization track,
+        # since that's how we tell X/Y polarization data apart.
+        if np.all(self.bl_data["ipol"] == 0):
+            irec = self.bl_data["irec"]
+
+            # Make sure we recognized all the rx code values, otherwise we may
+            # misidentify RxA vs RxB data.
+            assert np.all(np.isin(irec, [0, 1, 2, 3])), "Forbidden RX code detected."
+            antrx = np.isin(irec, [2, 3]).astype("<i2")
+            self.bl_data["ant1rx"] = antrx
+            self.bl_data["ant2rx"] = antrx
+
+        # Next, the old data had uvws calculated in wavelengths by _sideband_, so we
+        # need to update those.
+        rx_idx = self.bl_data["ant1rx"]
+        sb_idx = self.bl_data["isb"]
+
+        # These isb and ant1rx should _only_ be either 0 or 1
+        assert np.all(np.isin(sb_idx, [0, 1])), "Forbidden SB index values detected."
+        assert np.all(np.isin(rx_idx, [0, 1])), "Forbidden RX index values detected."
+
+        u_vals = self.bl_data["u"]
+        v_vals = self.bl_data["v"]
+        w_vals = self.bl_data["w"]
+
+        # Antenna positions are always stored in meters, so we can use this to get the
+        # total distance between antennas. Note that we have to do this b/c of some
+        # ambiguity of which frequency exactly the uvws are calculated at.
+        ant_dict = {}
+        for ant1, pos1 in zip(*self.antpos_data[("antenna", "xyz_pos")]):
+            temp_dict = {}
+            for ant2, pos2 in zip(*self.antpos_data[("antenna", "xyz_pos")]):
+                temp_dict[ant2] = ((pos1 - pos2) ** 2.0).sum() ** 0.5
+            ant_dict[ant1] = temp_dict
+
+        exp_bl_length = np.array(
+            [ant_dict[idx][jdx] for idx, jdx in zip(*self.bl_data[["iant1", "iant2"]])]
+        )
+        meas_bl_length = ((u_vals**2) + (v_vals**2) + (w_vals**2)) ** 0.5
+
+        # Update the uvws by a scale-factor determined by the ratio of the known total
+        # lengths between antennas versus their calculated uvws. Note that the scale
+        # factor should be the same for all baselines in each receiver-sideband
+        # combination, so for the sake of robustness we use the median across all
+        # baselines to calculate said scale factor.
+        for idx in range(2):
+            for jdx in range(2):
+                mask = (rx_idx == idx) & (sb_idx == jdx)
+                if not np.any(mask):
+                    continue
+                scale_fac = np.median(exp_bl_length[mask] / meas_bl_length[mask])
+                u_vals[mask] *= scale_fac
+                v_vals[mask] *= scale_fac
+                w_vals[mask] *= scale_fac
+
+        self.bl_data["u"] = u_vals
+        self.bl_data["v"] = v_vals
+        self.bl_data["w"] = w_vals