From a2030238bcb7bd5d9ca534e70e5ac8558d12dd1d Mon Sep 17 00:00:00 2001 From: Yuan-Ming Hsu <48866415+technic960183@users.noreply.github.com> Date: Fri, 2 May 2025 23:27:43 +0800 Subject: [PATCH 01/10] Tune linters config --- .github/linters/.cspell.json | 49 ++++++++++++++++++++++++++++++++++ .github/linters/.isort.cfg | 8 ++++++ .github/linters/pyproject.toml | 2 -- .github/workflows/ci.yml | 4 +-- .mega-linter.yml | 34 +++++++++++++++++++++++ 5 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 .github/linters/.cspell.json create mode 100644 .github/linters/.isort.cfg delete mode 100644 .github/linters/pyproject.toml create mode 100644 .mega-linter.yml diff --git a/.github/linters/.cspell.json b/.github/linters/.cspell.json new file mode 100644 index 0000000..a2025e6 --- /dev/null +++ b/.github/linters/.cspell.json @@ -0,0 +1,49 @@ +{ + "version": "0.2", + "language": "en", + "ignorePaths": [ + "**/.git/**", + "**/.gitignore", + "**/docs/Makefile", + "**/docs/make.bat", + "**/docs/source/conf.py", + "**/.mega-linter.yml" + ], + "words": [ + "spherimatch", + "xmatch", + "quadtree", + "coor", + "radec", + "idxes", + "Rodrigues", + "numpy", + "allclose", + "arcsin", + "arctan", + "isscalar", + "linalg", + "ndarray", + "randn", + "rtol", + "setdiff", + "scipy", + "dataframe", + "groupby", + "inplace", + "multiindex", + "ipynb", + "pypa", + "pypi", + "MAINT", + "bibtex", + "howpublished", + "autoclass", + "autofunction", + "automodule", + "genindex", + "modindex", + "toctree", + "undoc" + ] +} \ No newline at end of file diff --git a/.github/linters/.isort.cfg b/.github/linters/.isort.cfg new file mode 100644 index 0000000..1e14fd3 --- /dev/null +++ b/.github/linters/.isort.cfg @@ -0,0 +1,8 @@ +[settings] +profile= +line_length=120 +wrap_length=100 +lines_between_sections=0 +multi_line_output=3 +known_first_party=spherimatch +treat_all_comments_as_code=true \ No newline at end of file diff --git a/.github/linters/pyproject.toml b/.github/linters/pyproject.toml deleted file mode 100644 index 956e3c7..0000000 --- a/.github/linters/pyproject.toml +++ /dev/null @@ -1,2 +0,0 @@ -[tool.black] -# line-length = 120 \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9facc73..3ec9577 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,6 +2,7 @@ name: CI on: push: + branches: ["main"] pull_request: branches: ["main"] workflow_dispatch: @@ -30,11 +31,10 @@ jobs: - name: MegaLinter id: ml - uses: oxsecurity/megalinter/flavors/python@v8.6.0 + uses: oxsecurity/megalinter/flavors/python@v8 env: VALIDATE_ALL_CODEBASE: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/main') || github.event_name == 'workflow_dispatch' }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - DISABLE_ERRORS_LINTERS: COPYPASTE_JSCPD - name: Archive production artifacts if: success() || failure() diff --git a/.mega-linter.yml b/.mega-linter.yml new file mode 100644 index 0000000..de0e9c7 --- /dev/null +++ b/.mega-linter.yml @@ -0,0 +1,34 @@ +DISABLE: + - RST + - REPOSITORY +DISABLE_LINTERS: + - JSON_PRETTIER + - YAML_PRETTIER + - PYTHON_PYRIGHT +DISABLE_ERRORS_LINTERS: + - COPYPASTE_JSCPD + - PYTHON_MYPY + - REPOSITORY_CHECKOV + - REPOSITORY_GRYPE + - REPOSITORY_SECRETLINT + - REPOSITORY_SYFT + - REPOSITORY_TRIVY_SBOM + - REPOSITORY_TRUFFLEHOG +ENABLE_ERRORS_LINTERS: + - PYTHON_ISORT + +MARKDOWN_MARKDOWNLINT_ARGUMENTS: --disable MD041 +PYTHON_BLACK_ARGUMENTS: --skip-string-normalization --line-length 120 + +PRE_COMMANDS: + + - command: cp requirements.txt /venvs/requirements.txt + cwd: workspace + continue_if_failed: false + + # Install dependencies for `pylint` + - command: python3 -m pip install --no-cache-dir -r /venvs/requirements.txt + venv: pylint + continue_if_failed: false + +REPORTERS_MARKDOWN_TYPE: simple From 78e6d7c2b22d9c0ab2e8d6e363593809edff4203 Mon Sep 17 00:00:00 2001 From: Yuan-Ming Hsu <48866415+technic960183@users.noreply.github.com> Date: Fri, 2 May 2025 22:53:46 +0800 Subject: [PATCH 02/10] Fix style: whitespace, blank lines, line-length Thanks: flake8 --- .github/workflows/deploy-docs.yml | 2 +- .github/workflows/publish-pypi.yml | 2 +- docs/source/dev/index.rst | 2 +- docs/source/tutorial/duplicates_removal.rst | 8 +-- docs/source/tutorial/fof.rst | 16 +++--- docs/source/tutorial/xmatch.rst | 4 +- spherimatch/catalog.py | 32 +++++------ spherimatch/chunk_generator.py | 8 +-- spherimatch/chunk_generator_grid.py | 8 +-- .../euclidean_vs_angular_distance_local.py | 53 ++++++++++--------- spherimatch/fof.py | 4 +- spherimatch/result_fof.py | 11 ++-- spherimatch/result_xmatch.py | 26 ++++----- spherimatch/utilities_spherical.py | 13 ++--- spherimatch/xmatch.py | 14 +++-- tests/test_catalog.py | 9 ++-- tests/test_chunk.py | 5 +- tests/test_fof.py | 18 +++---- tests/test_result_xmatch.py | 14 ++--- tests/test_xmatch.py | 22 ++++---- 20 files changed, 145 insertions(+), 126 deletions(-) diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index 245dcfe..d3fd557 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -53,7 +53,7 @@ jobs: environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} - + runs-on: ubuntu-latest steps: - name: Deploy to GitHub Pages diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index 0927b1c..bae1ad3 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -28,7 +28,7 @@ jobs: run: | python -m pip install -r requirements.txt python -m pip install build - + - name: Build release distributions run: python -m build diff --git a/docs/source/dev/index.rst b/docs/source/dev/index.rst index d556da3..140123c 100644 --- a/docs/source/dev/index.rst +++ b/docs/source/dev/index.rst @@ -17,7 +17,7 @@ please refer to the `API Reference <../ref/index.html>`_ or the `Tutorials <../t To develop the project, clone the repository and install the project in editable mode. .. code-block:: console - + $ git clone https://github.com/technic960183/spherimatch.git $ cd spherimatch $ pip install -e .[dev] diff --git a/docs/source/tutorial/duplicates_removal.rst b/docs/source/tutorial/duplicates_removal.rst index 443b87f..0955189 100644 --- a/docs/source/tutorial/duplicates_removal.rst +++ b/docs/source/tutorial/duplicates_removal.rst @@ -12,10 +12,10 @@ First, let's create a mock catalog with duplicates: import pandas as pd # Create a mock catalog as a pandas DataFrame - catalog = pd.DataFrame([[80.894, 41.269, 1200], [120.689, -41.269, 1500], - [10.689, -41.269, 3600], [10.688, -41.270, 300], - [10.689, -41.270, 1800], [10.690, -41.269, 2400], - [120.690, -41.270, 900], [10.689, -41.269, 2700]], + catalog = pd.DataFrame([[80.894, 41.269, 1200], [120.689, -41.269, 1500], + [10.689, -41.269, 3600], [10.688, -41.270, 300], + [10.689, -41.270, 1800], [10.690, -41.269, 2400], + [120.690, -41.270, 900], [10.689, -41.269, 2700]], columns=['ra', 'dec', 'exp_time']) Here, we actually only have 3 unique objects, but the catalog contains 8 entries and 5 of them are duplicates. diff --git a/docs/source/tutorial/fof.rst b/docs/source/tutorial/fof.rst index ea47a20..5be83c5 100644 --- a/docs/source/tutorial/fof.rst +++ b/docs/source/tutorial/fof.rst @@ -11,9 +11,9 @@ First, let's create a mock catalog: import pandas as pd # Create a mock catalog as a pandas DataFrame - catalog = pd.DataFrame([[80.894, 41.269, 15.5], [120.689, -41.269, 12.3], - [10.689, -41.269, 18.7], [10.688, -41.270, 14.1], - [10.689, -41.270, 16.4], [10.690, -41.269, 13.2], + catalog = pd.DataFrame([[80.894, 41.269, 15.5], [120.689, -41.269, 12.3], + [10.689, -41.269, 18.7], [10.688, -41.270, 14.1], + [10.689, -41.270, 16.4], [10.690, -41.269, 13.2], [120.690, -41.270, 17.8]], columns=['ra', 'dec', 'mag']) .. note:: @@ -47,7 +47,7 @@ To get the clustering results with the appendind data (``'mag'`` in this case), Expected output:: Ra Dec mag - Group Object + Group Object 0 0 80.894 41.269 15.5 1 1 120.689 -41.269 12.3 6 120.690 -41.270 17.8 @@ -72,20 +72,20 @@ Expected output:: Print group 0: The type of group is . Ra Dec mag - Group Object + Group Object 0 0 80.894 41.269 15.5 Print group 1: The type of group is . Ra Dec mag - Group Object + Group Object 1 1 120.689 -41.269 12.3 6 120.690 -41.270 17.8 Print group 2: The type of group is . Ra Dec mag - Group Object + Group Object 2 2 10.689 -41.269 18.7 3 10.688 -41.270 14.1 4 10.689 -41.270 16.4 @@ -108,7 +108,7 @@ If you want DataFrame with a single layer of index and the size of each group as Expected output:: Group Ra Dec mag group_size - Object + Object 0 0 80.894 41.269 15.5 1 1 1 120.689 -41.269 12.3 2 6 1 120.690 -41.270 17.8 2 diff --git a/docs/source/tutorial/xmatch.rst b/docs/source/tutorial/xmatch.rst index 777d209..2280ed3 100644 --- a/docs/source/tutorial/xmatch.rst +++ b/docs/source/tutorial/xmatch.rst @@ -23,7 +23,7 @@ xmatch() Then, we can perform the cross-matching with the tolerance of 0.01 degree using the :func:`spherimatch.xmatch` function. .. code-block:: python - + from spherimatch import xmatch result_object = xmatch(catalogA, catalogB, tolerance=0.01) @@ -39,7 +39,7 @@ To get the matching results of catalog A, use the :func:`spherimatch.XMatchResul print(result_object.get_dataframe1()) Expected output:: - + Ra Dec N_match 0 80.894 41.269 0 1 120.689 -41.269 1 diff --git a/spherimatch/catalog.py b/spherimatch/catalog.py index 1e46e97..b531e21 100644 --- a/spherimatch/catalog.py +++ b/spherimatch/catalog.py @@ -8,7 +8,7 @@ class Catalog: '''This class is used to store and manipulate the catalog data for xmatch and fof. Parameters - ---------- + ---------- data : array-like The input data can be either a numpy array or a pandas dataframe. @@ -16,14 +16,13 @@ class Catalog: two values: [ra (azimuth, longitude), dec (alltitude, latitude)]. * pd.DataFrame: The dataframe must have two columns named 'Ra' and 'Dec' (or all the possible combinations with 'ra', 'dec'; 'RA', 'DEC'). - ''' def __init__(self, data): self.datatype = type(data) self.input_data = data - self.ra = None # ra, longitude, azimuth - self.dec = None # dec, latitude, alltitude + self.ra = None # ra, longitude, azimuth + self.dec = None # dec, latitude, alltitude self.ra_column: Optional[str] = None self.dec_column: Optional[str] = None if self.datatype == np.ndarray: @@ -31,15 +30,15 @@ def __init__(self, data): elif self.datatype == pd.DataFrame: self.__type_pd_dataframe() elif self.datatype == tuple: - raise NotImplementedError() # [TODO] Support tuple input for Catalog + raise NotImplementedError() # [TODO] Support tuple input for Catalog elif self.datatype == list: - raise NotImplementedError() # [TODO] Support list input for Catalog + raise NotImplementedError() # [TODO] Support list input for Catalog elif self.datatype == dict: - raise NotImplementedError() # [TODO] Support dict input for Catalog + raise NotImplementedError() # [TODO] Support dict input for Catalog else: raise TypeError("The input data must be either a numpy array or a pandas dataframe!") self._check_validity_range() - + def _check_validity_range(self): '''Check the validity of the input data. Warning if the data is out of range. ''' @@ -63,7 +62,7 @@ def get_coordiantes(self) -> NDArray[np.float64]: The array of shape (N, 2) with [Ra, Dec]. ''' return np.vstack([self.ra, self.dec], dtype=np.float64).T - + def get_indexes(self) -> NDArray[np.int64]: '''Get the indexes of the points in the catalog for xmatch and fof. @@ -73,7 +72,7 @@ def get_indexes(self) -> NDArray[np.int64]: The array of indexes of shape (N,). ''' return np.arange(len(self.ra), dtype=np.int64) - + def get_appending_data(self, retain_all_columns=True, retain_columns=None, invalid_key_error=True) -> pd.DataFrame: '''Get the appending data of the points in the catalog for xmatch and fof. @@ -86,7 +85,7 @@ def get_appending_data(self, retain_all_columns=True, retain_columns=None, The list of columns to retain in the input dataframe. Overrides retain_all_columns if not empty. invalid_key_error : bool, optional Whether to raise an error when the columns are not in the input dataframe. Default is True. - + Returns ------- pandas.DataFrame @@ -104,14 +103,15 @@ def get_appending_data(self, retain_all_columns=True, retain_columns=None, else: raise TypeError("The elements in retain_columns must be string of column names!") elif isinstance(retain_columns, str): - raise TypeError(f"Cannot accept a string for retain_columns. Please provide it as a list: ['{retain_columns}']") + raise TypeError("Cannot accept a string for retain_columns. " + f"Please provide it as a list: ['{retain_columns}']") else: raise TypeError(f"Invalid type for retain_columns: {type(retain_columns)}") # Check if the columns are in the input DataFrame - non_existent_columns = [col for col in columns if col not in self.input_data.columns] + non_existent_columns = [col for col in columns if col not in self.input_data.columns] if non_existent_columns and invalid_key_error: raise KeyError(f"Columns {non_existent_columns} are not in the input DataFrame") - if not invalid_key_error: # Need to remove the non-existent columns only when invalid_key_error is False + if not invalid_key_error: # Need to remove the non-existent columns only when invalid_key_error is False columns = [col for col in columns if col in self.input_data.columns] # Drop the ra and dec columns if self.ra_column is not None and self.ra_column in columns: @@ -119,7 +119,7 @@ def get_appending_data(self, retain_all_columns=True, retain_columns=None, if self.dec_column is not None and self.dec_column in columns: columns.remove(self.dec_column) return pd.DataFrame(self.input_data[columns].values, index=self.get_indexes(), columns=columns) - + def __type_np_array(self): if self.input_data.ndim != 2: raise ValueError("The input array must be two-dimensional!") @@ -127,7 +127,7 @@ def __type_np_array(self): raise ValueError("The input array must have two columns!") self.ra = self.input_data[:, 0] self.dec = self.input_data[:, 1] - + def __type_pd_dataframe(self): RAS = ['ra', 'Ra', 'RA'] DECS = ['dec', 'Dec', 'DEC'] diff --git a/spherimatch/chunk_generator.py b/spherimatch/chunk_generator.py index f8b01d1..ba3b700 100644 --- a/spherimatch/chunk_generator.py +++ b/spherimatch/chunk_generator.py @@ -21,7 +21,7 @@ def distribute(self, catalog: Catalog) -> list[Chunk]: ---------- catalog : Catalog The catalog to be distributed. - + Returns ------- chunks : list[Chunk] @@ -55,14 +55,14 @@ def coor2id_central(self, ra: NDArray, dec: NDArray): '''Tell which chunk the given coordinate belongs to. (How to divide the sky.) --- SHOULD BE overridden by subclass --- - + Parameters ---------- ra : numpy.ndarray The array of RA. Shape: (N,). dec : numpy.ndarray The array of Dec. Shape: (N,). - + Returns ------- chink_id : numpy.ndarray @@ -84,7 +84,7 @@ def coor2id_boundary(self, ra: NDArray, dec: NDArray): The array of RA. Shape: (N,). dec : numpy.ndarray The array of Dec. Shape: (N,). - + Returns ------- list_of_chunk_of_list_of_object_index : list diff --git a/spherimatch/chunk_generator_grid.py b/spherimatch/chunk_generator_grid.py index cb1d773..0c0962b 100644 --- a/spherimatch/chunk_generator_grid.py +++ b/spherimatch/chunk_generator_grid.py @@ -23,7 +23,7 @@ def __init__(self, center, margin, width: Optional[tuple] = None, dec_bound: Opt Note ---- - Specify either the width for ring chunks or the dec_bound for polar chunks. + Specify either the width for ring chunks or the dec_bound for polar chunks. ''' self.margin = margin if dec_bound is not None and width is not None: @@ -116,7 +116,8 @@ def coor2id_central(self, ra, dec): # Ring chunks for i, config in enumerate(self.config_ring): ra_diff = np.abs(ra - config['center_ra']) - ra_diff = np.minimum(ra_diff, 360 - ra_diff) # Not necessary. The central parts don't cross the 0-360 boundary. + # The line below should make no difference, because the central parts don't cross the 0-360 boundary. + ra_diff = np.minimum(ra_diff, 360 - ra_diff) dec_diff = np.abs(dec - config['center_dec']) mask_ra = (ra_diff <= config['delta_ra']) mask_dec = (dec_diff <= config['delta_dec']) @@ -142,7 +143,8 @@ def coor2id_boundary(self, ra, dec): # Middle chunks for config in self.config_ring: ra_diff = np.abs(ra - config['center_ra']) - ra_diff = np.minimum(ra_diff, 360 - ra_diff) # Necessary. The boundary parts DO cross the 0-360 boundary. + # Necessary. The boundary parts DO cross the 0-360 boundary. + ra_diff = np.minimum(ra_diff, 360 - ra_diff) dec_diff = np.abs(dec - config['center_dec']) mask_ra = (ra_diff >= config['delta_ra']) & (ra_diff <= config['delta_ra'] + margin) & ( dec_diff <= config['delta_dec'] + margin) diff --git a/spherimatch/euclidean_vs_angular_distance_local.py b/spherimatch/euclidean_vs_angular_distance_local.py index 202d79c..4604504 100644 --- a/spherimatch/euclidean_vs_angular_distance_local.py +++ b/spherimatch/euclidean_vs_angular_distance_local.py @@ -4,57 +4,57 @@ ''' -This script provides a detailed analysis of the discrepancies between Euclidean -and angular distances on a spherical coordinate system, particularly examining +This script provides a detailed analysis of the discrepancies between Euclidean +and angular distances on a spherical coordinate system, particularly examining how these discrepancies occur for various declinations and angular distances. The script has two primary functionalities: 1. Compute Relative Error: - For a given declination and angular distance, the script calculates the maximum + For a given declination and angular distance, the script calculates the maximum relative error between the Euclidean and angular distances using the function: compute_error(declination, distance) - This relative error highlights the deviation of the Euclidean approximation - from the actual angular distance. Though primarily intended for error - visualization, this function can also be utilized in other scripts, serving as - an API for determining relative errors based on specific declination and + This relative error highlights the deviation of the Euclidean approximation + from the actual angular distance. Though primarily intended for error + visualization, this function can also be utilized in other scripts, serving as + an API for determining relative errors based on specific declination and angular distance inputs. 2. Visualize Errors: (Removed for brevity, see the original script in the backup branch) The script generates three main plots to enhance the understanding of these errors: a. Relative Error in Euclidean Distance vs. Declination (Top-left subplot): - This plot shows how the relative error between Euclidean and angular distances - varies with declination for different angular distances. Initially, the relative - error grows slowly nearly as a constant. As declination increases, the error + This plot shows how the relative error between Euclidean and angular distances + varies with declination for different angular distances. Initially, the relative + error grows slowly nearly as a constant. As declination increases, the error follows the trend of the transformation: transformed value = (1 - cos(Dec)) / cos(Dec) - A reference line representing this transformation is plotted to offer a baseline - comparison. This curve helps in illustrating how the error aligns with this + A reference line representing this transformation is plotted to offer a baseline + comparison. This curve helps in illustrating how the error aligns with this cosine transformation. Limits: ------- - For minimal angular separations (close to 0°), the relative error follows a power-law: - error ≈ 1.2694 * 10^(-5) * d^2 + error ≈ 1.2694 * 10^(-5) * d^2 where d represents the angular distance in degrees. - + - Approaching an angular separation of 75°, the relative error aligns with: error ≈ (1 - cos(θ)) / cos(θ) - - These limiting behaviors illuminate the approximation error extremes across + + These limiting behaviors illuminate the approximation error extremes across angular separations, guiding modeling efforts for errors across all angles. b. Angle Corresponding to Max Relative Error vs. Declination (Bottom-left subplot): - This plot identifies the direction, represented by θ (theta), where the maximum - discrepancy or relative error occurs for different declinations. θ is defined - as the direction in degrees counter-clockwise from the positive DEC axis when + This plot identifies the direction, represented by θ (theta), where the maximum + discrepancy or relative error occurs for different declinations. θ is defined + as the direction in degrees counter-clockwise from the positive DEC axis when viewed from the center of the celestial sphere. c. Fit of Relative Error vs. Angular Distance (Right subplot): - Focusing on a near-zero declination, this subplot visualizes how the relative - error varies with different angular distances. It contrasts observed data - with a fitted curve, revealing the inherent relationship between relative + Focusing on a near-zero declination, this subplot visualizes how the relative + error varies with different angular distances. It contrasts observed data + with a fitted curve, revealing the inherent relationship between relative error and angular distance at this specific declination. @@ -64,10 +64,10 @@ and angular distance. Note: -At low declinations, the maximum relative error typically arises when moving -in a diagonal direction around 45° from the DEC axis. This direction captures -more of the sphere's curvature compared to strictly horizontal or vertical -movements. As declination increases, the direction corresponding to the most +At low declinations, the maximum relative error typically arises when moving +in a diagonal direction around 45° from the DEC axis. This direction captures +more of the sphere's curvature compared to strictly horizontal or vertical +movements. As declination increases, the direction corresponding to the most pronounced error shifts, becoming predominantly horizontal (θ = 90°). ''' @@ -89,6 +89,7 @@ def compute_error(declination, distance): return max_error + def compute_max_relative_error(dec, distances, theta_values): origin = (180, dec) offset_points_theta = np.array(point_offset(origin, distances, theta_values)) diff --git a/spherimatch/fof.py b/spherimatch/fof.py index 94adee4..531c458 100644 --- a/spherimatch/fof.py +++ b/spherimatch/fof.py @@ -10,6 +10,7 @@ from .utilities_spherical import radec_to_cartesian, cartesian_to_radec from .utilities_spherical import great_circle_distance, rotate_radec_about_axis + def group_by_quadtree(catalog, tolerance, dec_bound=None, ring_chunk=None) -> FoFResult: warnings.warn("This function will be deprecated. Use fof() instead.", FutureWarning) if dec_bound is not None: @@ -18,6 +19,7 @@ def group_by_quadtree(catalog, tolerance, dec_bound=None, ring_chunk=None) -> Fo raise ValueError("The ring_chunk parameter is no longer supported.") return fof(catalog, tolerance) + def fof(catalog, tolerance) -> FoFResult: """Perform the Friends-of-Friends (FoF) grouping algorithm on a catalog. @@ -45,7 +47,7 @@ def fof(catalog, tolerance) -> FoFResult: cg = GridChunkGenerator(margin=2*tolerance) cg.set_symmetric_ring_chunk(dec_bound, ring_chunk) cg.distribute(_catalog) - + # print(f"Using a single process to group {len(cg.chunks)} chunks.") ds = DisjointSet(len(_catalog)) for chunk in cg.chunks: diff --git a/spherimatch/result_fof.py b/spherimatch/result_fof.py index 28b807c..e91c631 100644 --- a/spherimatch/result_fof.py +++ b/spherimatch/result_fof.py @@ -2,8 +2,9 @@ import numpy as np from .catalog import Catalog + class FoFResult: - + def __init__(self, catalog: Catalog, tolerance: float, result_list: list): self.catalog = catalog self.tolerance = tolerance @@ -19,7 +20,7 @@ def get_coordinates(self) -> list[list[tuple]]: """ objects_coordinates = self.catalog.get_coordiantes() return [[tuple(objects_coordinates[i, :]) for i in g] for g in self.result_list] - + def get_group_coordinates(self) -> list[tuple]: """Returns the center coordinates of the groups. @@ -31,7 +32,7 @@ def get_group_coordinates(self) -> list[tuple]: objects_coordinates = self.catalog.get_coordiantes() # [FIXME] This return a list of NDArrays, not a list of tuples. return [np.average(objects_coordinates[g, :], axis=0) for g in self.result_list] - + def get_group_sizes(self) -> list[int]: """Returns the object counts in each group. @@ -41,7 +42,7 @@ def get_group_sizes(self) -> list[int]: A list of integers representing the number of objects in each group. """ return [len(g) for g in self.result_list] - + def get_group_dataframe(self, min_group_size=1, coord_columns=['Ra', 'Dec'], retain_all_columns=True, retain_columns=None) -> pd.DataFrame: """Get the grouped data as a two-level indexed pandas DataFrame. @@ -66,7 +67,7 @@ def get_group_dataframe(self, min_group_size=1, coord_columns=['Ra', 'Dec'], new_index_tuples = [] original_indices = [] for group_index, group_indices in enumerate(self.result_list): - if len(group_indices) < min_group_size: # Skip groups with the size less than min_group_size + if len(group_indices) < min_group_size: # Skip groups with the size less than min_group_size continue for object_index in group_indices: new_index_tuples.append((group_index, object_index)) diff --git a/spherimatch/result_xmatch.py b/spherimatch/result_xmatch.py index aa97456..7d34663 100644 --- a/spherimatch/result_xmatch.py +++ b/spherimatch/result_xmatch.py @@ -3,6 +3,7 @@ import pandas as pd from .catalog import Catalog + class XMatchResult: def __init__(self, cat1: Catalog, cat2: Catalog, tolerance, result_dict: defaultdict): @@ -11,7 +12,7 @@ def __init__(self, cat1: Catalog, cat2: Catalog, tolerance, result_dict: default self.tolerance = tolerance self.result_dict = result_dict self.result_dict_reserve = None - + def __str__(self): return f"XMatchResult of cat1 with {len(self.cat1)} objects and cat2 with {len(self.cat2)} objects." @@ -25,7 +26,7 @@ def get_result_dict(self) -> defaultdict: def get_result_dict_reserve(self) -> defaultdict: # if self.result_dict_reserve is None: # [TODO] Save the result_dict_reserve to improve performance - temp_dd = defaultdict(list) # Improve the performance after fixing the issue of unsorted dictionary + temp_dd = defaultdict(list) # Improve the performance after fixing the issue of unsorted dictionary for k, v in self.result_dict.items(): for vv in v: temp_dd[vv].append(k) @@ -33,11 +34,11 @@ def get_result_dict_reserve(self) -> defaultdict: for idx in self.cat2.get_indexes(): self.result_dict_reserve[idx] = temp_dd[idx] return self.result_dict_reserve - + def get_dataframe1(self, min_match=0, coord_columns=['Ra', 'Dec'], retain_all_columns=True, retain_columns=None) -> pd.DataFrame: '''Get the first catalog with the number of matches as a pandas dataframe. - + Parameters ---------- min_match : int, optional @@ -64,7 +65,7 @@ def get_dataframe1(self, min_match=0, coord_columns=['Ra', 'Dec'], data_df = pd.concat([data_df, append_df], axis=1) data_df = data_df[data_df['N_match'] >= min_match] return data_df - + def get_dataframe2(self, min_match=0, coord_columns=['Ra', 'Dec'], retain_all_columns=True, retain_columns=None) -> pd.DataFrame: '''Get the second catalog with the number of matches as a pandas dataframe. @@ -91,9 +92,11 @@ def get_serial_dataframe(self, min_match=1, reverse=False, coord_columns=['Ra', Parameters ---------- min_match : int, optional - The minimum number of matches for an object from the first catalog to be included in the dataframe. Default is 1. + The minimum number of matches for an object from the first catalog to be included in the dataframe. + Default is 1. reverse : bool, optional - Whether to reverse the order of catalogs (i.e., make the second catalog as the first and vice versa). Default is False. + Whether to reverse the order of catalogs (i.e., make the second catalog as the first and vice versa). + Default is False. coord_columns : list[str], optional The names of the columns for the coordinates. Default is ['Ra', 'Dec']. retain_all_columns : bool, optional @@ -101,13 +104,13 @@ def get_serial_dataframe(self, min_match=1, reverse=False, coord_columns=['Ra', retain_columns : list[str], optional The names of the columns to retain in the output dataframe. Will override retain_all_columns if not empty. Default is None. - + Returns ------- pandas.DataFrame The serial dataframe of the two catalogs with the number of matches. ''' - if reverse: # Create a new XMatchResult object with the reversed result_dict + if reverse: # Create a new XMatchResult object with the reversed result_dict reserve_result = self.__class__(self.cat2, self.cat1, self.tolerance, self.get_result_dict_reserve()) df = reserve_result.get_serial_dataframe(min_match, reverse=False, coord_columns=coord_columns, retain_all_columns=retain_all_columns, @@ -151,13 +154,13 @@ def get_serial_dataframe(self, min_match=1, reverse=False, coord_columns=['Ra', combined_df = pd.concat([df1, df2], ignore_index=False) data_df = combined_df.iloc[idx_combine] if retain_columns is not None: - non_existent_columns = [col for col in retain_columns if col not in data_df.columns] + non_existent_columns = [col for col in retain_columns if col not in data_df.columns] if non_existent_columns: raise KeyError(f"Columns {non_existent_columns} are not in the input DataFrame") data_df.insert(2, 'N_match', n_match) data_df.insert(3, 'is_cat1', is_df1) return data_df - + def number_distribution(self) -> Counter: """Get the distribution of the number of matches for each object in the first catalog. @@ -169,4 +172,3 @@ def number_distribution(self) -> Counter: Ns = [len(v) for v in self.get_result_dict().values()] unique_counts = Counter(Ns) return unique_counts - \ No newline at end of file diff --git a/spherimatch/utilities_spherical.py b/spherimatch/utilities_spherical.py index 315b624..5d1db2f 100644 --- a/spherimatch/utilities_spherical.py +++ b/spherimatch/utilities_spherical.py @@ -72,17 +72,18 @@ def point_offset(ra_dec, angular_distance, theta): angular_distance : float Distance in degrees to move from the initial point. theta : float - Direction in degrees counter-clockwise from the positive DEC axis when viewed from the center of the celestial sphere. + Direction in degrees counter-clockwise from the positive DEC axis when viewed from the center of the + celestial sphere. Returns ------- new_point : tuple (RA, DEC) in degrees for the point after offset. - + Note ---- - The direction specified by theta is counter-clockwise when viewed from the center of the celestial sphere, looking outwards. - If visualizing from a point above the North Celestial Pole, the direction will appear clockwise. + The direction specified by theta is counter-clockwise when viewed from the center of the celestial sphere, + looking outwards. If visualizing from a point above the North Celestial Pole, the direction will appear clockwise. """ # Convert all angles to radians @@ -173,8 +174,8 @@ def rodrigues_rotation(v, k, theta): def rotate_radec_about_axis(ra, dec, axis_ra, axis_dec, theta): """Rotate a point (or points) in celestial coordinates about a specified axis. - Given a point (or an array of points) defined by its Right Ascension and Declination, - this function rotates it about an arbitrary axis (defined by its own RA and Dec) by a + Given a point (or an array of points) defined by its Right Ascension and Declination, + this function rotates it about an arbitrary axis (defined by its own RA and Dec) by a specified angle. Parameters diff --git a/spherimatch/xmatch.py b/spherimatch/xmatch.py index 5da1062..28e6ad8 100644 --- a/spherimatch/xmatch.py +++ b/spherimatch/xmatch.py @@ -14,8 +14,8 @@ def unique_merge_defaultdicts(d1: defaultdict, d2: defaultdict): """Joins two dictionaries, merging values for shared keys and preserving others. - When both dictionaries have the same key, this function makes a new list - with every distinct value from either dictionary. If a key is only in one + When both dictionaries have the same key, this function makes a new list + with every distinct value from either dictionary. If a key is only in one dictionary, it adds that key and its values directly to the result. Parameters @@ -50,6 +50,7 @@ def unique_merge_defaultdicts(d1: defaultdict, d2: defaultdict): result = defaultdict(list, {k: list(v) for k, v in zip(all_keys, all_values)}) return result + def xmatch(catalog1, catalog2, tolerance, verbose=False) -> XMatchResult: """Performs a cross-match between two catalogs. @@ -83,7 +84,7 @@ def xmatch(catalog1, catalog2, tolerance, verbose=False) -> XMatchResult: cg2.distribute(_catalog2) if len(cg1.chunks) != len(cg2.chunks): raise BrokenPipeError("The two catalogs have different number of chunks! Please contact the developer.") - merged_dict = defaultdict(list) # [FIXME] Change to dict or sorted dict, or don't assume the order of the keys. + merged_dict = defaultdict(list) # [FIXME] Change to dict or sorted dict, or don't assume the order of the keys. for i in range(len(cg1.chunks)): if verbose: print(f"Started Chunk {i}") @@ -94,6 +95,7 @@ def xmatch(catalog1, catalog2, tolerance, verbose=False) -> XMatchResult: merged_dict = unique_merge_defaultdicts(merged_dict, dd) return XMatchResult(_catalog1, _catalog2, tolerance, merged_dict) + def rotate_to_center(object_coor, chunk_ra, chunk_dec): # Rotate the center of the chunk to (180, 0) of the celestial sphere center_car = radec_to_cartesian(chunk_ra, chunk_dec) @@ -101,9 +103,10 @@ def rotate_to_center(object_coor, chunk_ra, chunk_dec): normal_car /= np.linalg.norm(normal_car) normal_ra, normal_dec = cartesian_to_radec(normal_car) angle = great_circle_distance(chunk_ra, chunk_dec, 180, 0) - rot_ra, rot_dec = rotate_radec_about_axis(object_coor[:,0], object_coor[:,1], normal_ra, normal_dec, angle) + rot_ra, rot_dec = rotate_radec_about_axis(object_coor[:, 0], object_coor[:, 1], normal_ra, normal_dec, angle) return rot_ra, rot_dec + def xmatch_chunk(args: tuple[Chunk, Chunk, float]): chunk1, chunk2, tolerance = args objects1, objects2 = chunk1.get_data(), chunk2.get_data() @@ -123,10 +126,11 @@ def xmatch_chunk(args: tuple[Chunk, Chunk, float]): dd[key] = value return dd + def spherical_xmatching(idx1: np.array, coor1: np.array, idx2: np.array, coor2: np.array, tolerance, A2E_factor): qt1 = KDTree(coor1) qt2 = KDTree(coor2) - list_of_indexes = qt1.query_ball_tree(qt2, tolerance * A2E_factor) # list of elements in idx2 + list_of_indexes = qt1.query_ball_tree(qt2, tolerance * A2E_factor) # list of elements in idx2 keys, vals = [], [] for i, indexes in enumerate(list_of_indexes): distance = distances_to_target(coor1[i, :], coor2[indexes, :]) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index f4efb34..871980c 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -4,6 +4,7 @@ from spherimatch.catalog import Catalog from spherimatch.utilities_spherical import generate_random_point + class TestCatalog_RandomCheckInputOutput(unittest.TestCase): @staticmethod @@ -18,7 +19,7 @@ def test_random_np(self): catalog = Catalog(np.vstack([ra, dec]).T) code_output = catalog.get_coordiantes() self.assertEqual(code_output.tolist(), expected_output.tolist()) - + def test_random_pd(self): for i in range(10): (ra, dec), expected_output = self.get_input_output_pair(N=1000) @@ -26,6 +27,7 @@ def test_random_pd(self): code_output = catalog.get_coordiantes() self.assertEqual(code_output.tolist(), expected_output.tolist()) + class TestCatalog_ValidInput(unittest.TestCase): def setUp(self): @@ -55,6 +57,7 @@ def tearDown(self): code_output = catalog.get_coordiantes() self.assertEqual(code_output.tolist(), self.expected_output.tolist()) + class TestCatalog_InvalidInput(unittest.TestCase): def test_invalid_type(self): @@ -65,7 +68,7 @@ def test_np_shape_1d(self): with self.assertRaises(ValueError): Catalog(np.array([1, 2, 3])) # 1D array - def test_np_shape_2x3(self): + def test_np_shape_2x3(self): with self.assertRaises(ValueError): Catalog(np.array([[1, 2, 3], [4, 5, 6]])) @@ -87,4 +90,4 @@ def test_pd_contains_nan(self): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_chunk.py b/tests/test_chunk.py index f8f35be..8c27400 100644 --- a/tests/test_chunk.py +++ b/tests/test_chunk.py @@ -6,7 +6,7 @@ from spherimatch import GridChunkGenerator from spherimatch import DisjointSet from spherimatch.catalog import Catalog -from spherimatch.result_fof import FoFResult +from spherimatch.result_fof import FoFResult from spherimatch.fof import group_by_quadtree_chunk @@ -68,6 +68,7 @@ def test_objects_outside_tolerance_boundary(self): result = chunk_gen.coor2id_boundary(ra, dec) self.assertEqual(result, expected_result) + class TestChunkIntegratingFoF(unittest.TestCase): def group_by_quadtree_scipy(self, objects_df: pd.DataFrame, tolerance, chunk_gen): @@ -100,7 +101,7 @@ def test_different_chunk(self): print(len(result_a.get_coordinates())) print(len(result_b.get_coordinates())) self.assertEqual(len(result_a.get_coordinates()), len(result_b.get_coordinates())) - + # Running the tests if __name__ == '__main__': diff --git a/tests/test_fof.py b/tests/test_fof.py index d3e7cbb..828cc18 100644 --- a/tests/test_fof.py +++ b/tests/test_fof.py @@ -54,12 +54,12 @@ def generate_celestial_grid(**kwargs) -> list[tuple[float, float]]: return grid -def create_groups_from_grid(grid: list[tuple[float, float]], - tolerance=1, seed=None, fraction=0.5, +def create_groups_from_grid(grid: list[tuple[float, float]], + tolerance=1, seed=None, fraction=0.5, ring_radius=(0, 1)) -> tuple[list[list[tuple[float, float]]], NDArray]: """ Randomly pick half of the grid points and create groups around them. - For each selected grid point, use the point_offset() function to create several points + For each selected grid point, use the point_offset() function to create several points within a tolerance (default 1 degree) circle around the central point. Returns a list of groups, where each group is a list of (RA, Dec) coordinates. """ @@ -92,7 +92,7 @@ def check_group_match(expected_groups: list[list[tuple[float, float]]], output_g List of groups that are expected to be grouped correctly. output_groups : list[list[tuple[float, float]]] List of groups that are output by the tested function. - + Returns ------- problematic_groups : list[list[tuple[float, float]]] @@ -116,11 +116,11 @@ class TestCelestialGrouping_RandomGrid(unittest.TestCase): """ Unit test for a celestial objects grouping method. - The purpose of this unit test is to ensure that a celestial objects grouping method works correctly. - A grid on the celestial sphere is created with a size of 10 deg. Half of the grid points are randomly - selected, and for each selected point, several points are created within a 1 deg circle around it using - the point_offset() function. These points, with the central point, become a 'group'. The tested function - is then called, and its output is checked against the expected groups. If there's a discrepancy, the + The purpose of this unit test is to ensure that a celestial objects grouping method works correctly. + A grid on the celestial sphere is created with a size of 10 deg. Half of the grid points are randomly + selected, and for each selected point, several points are created within a 1 deg circle around it using + the point_offset() function. These points, with the central point, become a 'group'. The tested function + is then called, and its output is checked against the expected groups. If there's a discrepancy, the problematic group is printed. """ diff --git a/tests/test_result_xmatch.py b/tests/test_result_xmatch.py index 2d83c00..d069be8 100644 --- a/tests/test_result_xmatch.py +++ b/tests/test_result_xmatch.py @@ -5,6 +5,7 @@ from spherimatch import xmatch from spherimatch.catalog import Catalog + class TestXMatchResult_Methods(unittest.TestCase): def setUp(self): @@ -16,11 +17,12 @@ def generate_offset_groups(base_ra: NDArray, base_dec: NDArray): base_coords = np.vstack([base_ra, base_dec]).T unit_ra = np.vstack([np.ones(l), np.zeros(l)]).T unit_dec = np.vstack([np.zeros(l), np.ones(l)]).T - coords = np.array([base_coords + 120 * i * unit_ra + 20 * j * unit_dec for i in range(3) for j in range(-1, 2)]) + coords = np.array([base_coords + 120 * i * unit_ra + 20 * j * unit_dec for i in range(3) + for j in range(-1, 2)]) coords = coords.reshape(-1, coords.shape[-1]) - coords[:,0] = coords[:,0] % 360 + coords[:, 0] = coords[:, 0] % 360 return coords - + base1 = np.array([-0.5, 0.1, 0.3]), np.array([0.2, -0.4, 0]) base2 = np.array([0.5, -0.4]), np.array([-0.1, 0.6]) self.coords1 = generate_offset_groups(*base1) @@ -192,7 +194,7 @@ def test_get_serial_dataframe_retain_columns(self): idx = i * (self.n2 + 1) + j + 1 self.assertAlmostEqual(df.iloc[idx]['A'], df2.loc[i // self.n1 * self.n2 + j, 'A']) self.assertAlmostEqual(df.iloc[idx]['C'], df2.loc[i // self.n1 * self.n2 + j, 'C']) - + @unittest.skip("Future functionality") def test_get_multiindex_dataframe(self): result = xmatch(self.coords1, self.coords2, 2) @@ -206,9 +208,7 @@ def test_get_multiindex_dataframe(self): self.assertTrue(all(sizes == self.n2)) for i in range(self.coords1.shape[0]): group_df = df.loc[i] - continue # [TODO] Check the content of the group dataframe + continue # [TODO] Check the content of the group dataframe # [FIXME] Write a test to check that if itterating over the deaultdict, the keys won't be in the correct order. # Thus yielding an incorrect result of N_match. - - \ No newline at end of file diff --git a/tests/test_xmatch.py b/tests/test_xmatch.py index 639c719..13aa646 100644 --- a/tests/test_xmatch.py +++ b/tests/test_xmatch.py @@ -46,10 +46,10 @@ def create_catalogs_from_grid(grid, tolerance=1, seed=None, fraction=0.5, ring_r i_cat2 = 0 for i, point in enumerate(selected_points): centrals.append(point) - idxes = [] # List of indexes of the surrounding points + idxes = [] # List of indexes of the surrounding points for _ in range(np.random.randint(5, 10)): # Randomly create 1 to 4 additional points theta = np.random.uniform(0, 360) # Random direction - distance = np.random.uniform(tolerance*ring_radius[0], tolerance*ring_radius[1]) # Random distance + distance = np.random.uniform(tolerance*ring_radius[0], tolerance*ring_radius[1]) # Random distance offset_point = point_offset(point, distance, theta) neighbors.append(offset_point) idxes.append(i_cat2) @@ -66,15 +66,15 @@ def check_Xmatching(expected_matches: dict, output_matches: defaultdict): if the matching process has been conducted correctly. Parameters: - - expected_matches (dict): A dictionary where keys are central points and values are lists of expected + - expected_matches (dict): A dictionary where keys are central points and values are lists of expected neighboring points that should match with the central point. - - output_matches (defaultdict): A defaultdict similar in structure to expected_matches, but contains - the actual neighboring points matched with each central point by the + - output_matches (defaultdict): A defaultdict similar in structure to expected_matches, but contains + the actual neighboring points matched with each central point by the matching algorithm being tested. Returns: - - problematic_matches (list): A list of tuples, where each tuple contains a central point and its expected - neighboring points that were not matched correctly by the matching algorithm. + - problematic_matches (list): A list of tuples, where each tuple contains a central point and its expected + neighboring points that were not matched correctly by the matching algorithm. If the algorithm works correctly, this list will be empty. """ problematic_matches = [] @@ -94,6 +94,7 @@ def check_Xmatching(expected_matches: dict, output_matches: defaultdict): print(f"Group {central} does not match!") return problematic_matches + def print_format_match(problematic_matches, central_point, surrounding_points): for match in problematic_matches: p = central_point[match[0]] @@ -122,6 +123,7 @@ class TestCelestialXMatching_RandomGrid(unittest.TestCase): the central points, and the second contains points surrounding the central points. """ + def setUp(self): # This method will be called before each test, setting up the common resources seed = np.random.randint(0, 1e5) @@ -133,7 +135,7 @@ def setUp(self): grid, self.tolerance, seed=seed, ring_radius=(0.999, 1.0), fraction=0.8) if panda: self.two_catalogs = (pd.DataFrame(self.two_catalogs[0], columns=['Ra', 'Dec']), - pd.DataFrame(self.two_catalogs[1], columns=['Ra', 'Dec'])) + pd.DataFrame(self.two_catalogs[1], columns=['Ra', 'Dec'])) def test_match_by_quadtree(self): output_matches = xmatch(self.two_catalogs[0], self.two_catalogs[1], self.tolerance).get_result_dict() @@ -204,8 +206,8 @@ def test_unsorted_data(self): def test_with_numpy(self): # Test with numpy arrays - df1 = np.array([[1, 3], [2, 4], [8, 6]]) # shape (3, 2) - df2 = np.array([[5, 7], [6, 8], [7, 9]]) # shape (3, 2) + df1 = np.array([[1, 3], [2, 4], [8, 6]]) # shape (3, 2) + df2 = np.array([[5, 7], [6, 8], [7, 9]]) # shape (3, 2) self.result = xmatch(df1, df2, self.tolerance) self.assertIsNotNone(self.result) # Assert result is not None From 07c0abf8b4eb4698a7b82274041f1546b384d0a4 Mon Sep 17 00:00:00 2001 From: Yuan-Ming Hsu <48866415+technic960183@users.noreply.github.com> Date: Fri, 2 May 2025 23:28:03 +0800 Subject: [PATCH 03/10] Fix style: Best practice for Python code Thanks: flake8, ruff --- spherimatch/catalog.py | 12 ++++++------ spherimatch/chunk.py | 4 ++-- tests/test_result_xmatch.py | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/spherimatch/catalog.py b/spherimatch/catalog.py index b531e21..5e138f5 100644 --- a/spherimatch/catalog.py +++ b/spherimatch/catalog.py @@ -25,15 +25,15 @@ def __init__(self, data): self.dec = None # dec, latitude, alltitude self.ra_column: Optional[str] = None self.dec_column: Optional[str] = None - if self.datatype == np.ndarray: + if isinstance(self.input_data, np.ndarray): self.__type_np_array() - elif self.datatype == pd.DataFrame: + elif isinstance(self.input_data, pd.DataFrame): self.__type_pd_dataframe() - elif self.datatype == tuple: + elif isinstance(self.input_data, tuple): raise NotImplementedError() # [TODO] Support tuple input for Catalog - elif self.datatype == list: + elif isinstance(self.input_data, list): raise NotImplementedError() # [TODO] Support list input for Catalog - elif self.datatype == dict: + elif isinstance(self.input_data, dict): raise NotImplementedError() # [TODO] Support dict input for Catalog else: raise TypeError("The input data must be either a numpy array or a pandas dataframe!") @@ -91,7 +91,7 @@ def get_appending_data(self, retain_all_columns=True, retain_columns=None, pandas.DataFrame The dataframe of the appending data. ''' - if self.datatype != pd.DataFrame: + if not isinstance(self.input_data, pd.DataFrame): return pd.DataFrame(index=self.get_indexes()) columns = [] if retain_all_columns: diff --git a/spherimatch/chunk.py b/spherimatch/chunk.py index 73d5dce..ce88f4a 100644 --- a/spherimatch/chunk.py +++ b/spherimatch/chunk.py @@ -6,7 +6,7 @@ class Chunk: def __init__(self, chunk_id, ra, dec, discription=None): self.chunk_id = chunk_id - self.discription = discription if discription != None else f"Chunk {chunk_id} ({ra:3f}, {dec:3f})" + self.discription = discription if discription is not None else f"Chunk {chunk_id} ({ra:3f}, {dec:3f})" self.central_data = np.empty((0, 2), dtype=np.float64) self.boundary_data = np.empty((0, 2), dtype=np.float64) self.central_index = np.empty((0), dtype=np.int64) @@ -33,7 +33,7 @@ def get_center(self): return self.chunk_ra, self.chunk_dec def farest_distance(self, distance=None): - if distance == None: + if distance is None: return self.max_size self.max_size = distance diff --git a/tests/test_result_xmatch.py b/tests/test_result_xmatch.py index d069be8..6bae61c 100644 --- a/tests/test_result_xmatch.py +++ b/tests/test_result_xmatch.py @@ -13,10 +13,10 @@ def setUp(self): def generate_offset_groups(base_ra: NDArray, base_dec: NDArray): if base_ra.shape[0] != base_dec.shape[0]: raise ValueError("The two arrays must have the same length.") - l = base_ra.shape[0] + length = base_ra.shape[0] base_coords = np.vstack([base_ra, base_dec]).T - unit_ra = np.vstack([np.ones(l), np.zeros(l)]).T - unit_dec = np.vstack([np.zeros(l), np.ones(l)]).T + unit_ra = np.vstack([np.ones(length), np.zeros(length)]).T + unit_dec = np.vstack([np.zeros(length), np.ones(length)]).T coords = np.array([base_coords + 120 * i * unit_ra + 20 * j * unit_dec for i in range(3) for j in range(-1, 2)]) coords = coords.reshape(-1, coords.shape[-1]) From 6bd4b2a2955ecd6074001d1dbdbe6da7d50a6bdb Mon Sep 17 00:00:00 2001 From: Yuan-Ming Hsu <48866415+technic960183@users.noreply.github.com> Date: Sat, 3 May 2025 00:26:00 +0800 Subject: [PATCH 04/10] Fix static types Thanks: mypy, pyright --- spherimatch/catalog.py | 4 ++-- spherimatch/chunk_generator.py | 2 +- spherimatch/result_xmatch.py | 9 +++++---- tests/test_fof.py | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/spherimatch/catalog.py b/spherimatch/catalog.py index 5e138f5..5dd63fe 100644 --- a/spherimatch/catalog.py +++ b/spherimatch/catalog.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Any, Optional import numpy as np import pandas as pd from numpy.typing import NDArray @@ -18,7 +18,7 @@ class Catalog: possible combinations with 'ra', 'dec'; 'RA', 'DEC'). ''' - def __init__(self, data): + def __init__(self, data: Any) -> None: self.datatype = type(data) self.input_data = data self.ra = None # ra, longitude, azimuth diff --git a/spherimatch/chunk_generator.py b/spherimatch/chunk_generator.py index ba3b700..f99221f 100644 --- a/spherimatch/chunk_generator.py +++ b/spherimatch/chunk_generator.py @@ -5,7 +5,7 @@ class ChunkGenerator: - def __init__(self, margin): + def __init__(self, margin: float) -> None: '''Initialize the chunk generator. ''' self.chunks: list[Chunk] = [] diff --git a/spherimatch/result_xmatch.py b/spherimatch/result_xmatch.py index 7d34663..994fa5e 100644 --- a/spherimatch/result_xmatch.py +++ b/spherimatch/result_xmatch.py @@ -1,4 +1,5 @@ from collections import Counter, defaultdict +from typing import Optional import numpy as np import pandas as pd from .catalog import Catalog @@ -11,7 +12,7 @@ def __init__(self, cat1: Catalog, cat2: Catalog, tolerance, result_dict: default self.cat2 = cat2 self.tolerance = tolerance self.result_dict = result_dict - self.result_dict_reserve = None + self.result_dict_reserve: Optional[defaultdict] = None def __str__(self): return f"XMatchResult of cat1 with {len(self.cat1)} objects and cat2 with {len(self.cat2)} objects." @@ -136,9 +137,9 @@ def get_serial_dataframe(self, min_match=1, reverse=False, coord_columns=['Ra', if len(idx_combine) == 0: return pd.DataFrame(columns=coord_columns) idx_combine = np.array(idx_combine, dtype=np.int64) - is_df1 = np.array(is_df1) + is_df1_np = np.array(is_df1) n1 = len(self.cat1) - idx_combine[~is_df1] += n1 + idx_combine[~is_df1_np] += n1 idxes_array1 = self.cat1.get_indexes() idxes_array2 = self.cat2.get_indexes() df1 = pd.DataFrame(self.cat1.get_coordiantes(), columns=coord_columns, index=idxes_array1) @@ -158,7 +159,7 @@ def get_serial_dataframe(self, min_match=1, reverse=False, coord_columns=['Ra', if non_existent_columns: raise KeyError(f"Columns {non_existent_columns} are not in the input DataFrame") data_df.insert(2, 'N_match', n_match) - data_df.insert(3, 'is_cat1', is_df1) + data_df.insert(3, 'is_cat1', is_df1_np) return data_df def number_distribution(self) -> Counter: diff --git a/tests/test_fof.py b/tests/test_fof.py index 828cc18..ffba43d 100644 --- a/tests/test_fof.py +++ b/tests/test_fof.py @@ -55,7 +55,7 @@ def generate_celestial_grid(**kwargs) -> list[tuple[float, float]]: def create_groups_from_grid(grid: list[tuple[float, float]], - tolerance=1, seed=None, fraction=0.5, + tolerance=1., seed=None, fraction=0.5, ring_radius=(0, 1)) -> tuple[list[list[tuple[float, float]]], NDArray]: """ Randomly pick half of the grid points and create groups around them. From fc2ab9e29a9b3fb37619d0095380b0cdc362d8a4 Mon Sep 17 00:00:00 2001 From: Yuan-Ming Hsu <48866415+technic960183@users.noreply.github.com> Date: Sat, 3 May 2025 16:39:59 +0800 Subject: [PATCH 05/10] Clean up and sort `import` Thanks: isort, ruff Also make the relative import to absolute import for unittest files. So the unittest loader can load them correctly. - `from .test_fof import generate_celestial_grid` + `from tests.test_fof import generate_celestial_grid` --- docs/source/conf.py | 1 + spherimatch/__init__.py | 6 ------ spherimatch/chunk_generator.py | 2 +- spherimatch/euclidean_vs_angular_distance_local.py | 1 - spherimatch/fof.py | 8 ++++++-- spherimatch/result_fof.py | 2 +- spherimatch/xmatch.py | 10 +++++++--- tests/test_chunk.py | 10 +++++----- tests/test_fof.py | 6 ++++-- tests/test_result_xmatch.py | 1 - tests/test_toolbox_spherical.py | 8 ++++++-- tests/test_xmatch.py | 6 +++--- 12 files changed, 34 insertions(+), 27 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 6a342c7..f33cdd3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,5 +1,6 @@ import os import sys + sys.path.insert(0, os.path.abspath('../../')) # Configuration file for the Sphinx documentation builder. diff --git a/spherimatch/__init__.py b/spherimatch/__init__.py index 2cf0d67..0c6b050 100644 --- a/spherimatch/__init__.py +++ b/spherimatch/__init__.py @@ -1,10 +1,4 @@ -from .chunk_generator_grid import GridChunkGenerator, GridChunkConfig -from .chunk_generator_grid import ChunkGeneratorByGrid, ChunkGeneratorByDenseGrid, ChunkGeneratorBySuperDenseGrid -from .disjoint_set import DisjointSet from .fof import fof, group_by_quadtree -from .result_fof import FoFResult -from .result_xmatch import XMatchResult -from .utilities_spherical import * from .xmatch import xmatch __all__ = ['fof', 'group_by_quadtree', 'xmatch'] diff --git a/spherimatch/chunk_generator.py b/spherimatch/chunk_generator.py index f99221f..b905982 100644 --- a/spherimatch/chunk_generator.py +++ b/spherimatch/chunk_generator.py @@ -1,6 +1,6 @@ +from numpy.typing import NDArray from .catalog import Catalog from .chunk import Chunk -from numpy.typing import NDArray class ChunkGenerator: diff --git a/spherimatch/euclidean_vs_angular_distance_local.py b/spherimatch/euclidean_vs_angular_distance_local.py index 4604504..5dce28d 100644 --- a/spherimatch/euclidean_vs_angular_distance_local.py +++ b/spherimatch/euclidean_vs_angular_distance_local.py @@ -2,7 +2,6 @@ from scipy.spatial.distance import euclidean from .utilities_spherical import point_offset - ''' This script provides a detailed analysis of the discrepancies between Euclidean and angular distances on a spherical coordinate system, particularly examining diff --git a/spherimatch/fof.py b/spherimatch/fof.py index 531c458..2a105b6 100644 --- a/spherimatch/fof.py +++ b/spherimatch/fof.py @@ -7,8 +7,12 @@ from .disjoint_set import DisjointSet from .euclidean_vs_angular_distance_local import compute_error from .result_fof import FoFResult -from .utilities_spherical import radec_to_cartesian, cartesian_to_radec -from .utilities_spherical import great_circle_distance, rotate_radec_about_axis +from .utilities_spherical import ( + cartesian_to_radec, + great_circle_distance, + radec_to_cartesian, + rotate_radec_about_axis, +) def group_by_quadtree(catalog, tolerance, dec_bound=None, ring_chunk=None) -> FoFResult: diff --git a/spherimatch/result_fof.py b/spherimatch/result_fof.py index e91c631..f9e614a 100644 --- a/spherimatch/result_fof.py +++ b/spherimatch/result_fof.py @@ -1,5 +1,5 @@ -import pandas as pd import numpy as np +import pandas as pd from .catalog import Catalog diff --git a/spherimatch/xmatch.py b/spherimatch/xmatch.py index 28e6ad8..8a1ca86 100644 --- a/spherimatch/xmatch.py +++ b/spherimatch/xmatch.py @@ -6,9 +6,13 @@ from .chunk_generator_grid import GridChunkGenerator from .euclidean_vs_angular_distance_local import compute_error from .result_xmatch import XMatchResult -from .utilities_spherical import radec_to_cartesian, cartesian_to_radec -from .utilities_spherical import great_circle_distance, rotate_radec_about_axis -from .utilities_spherical import distances_to_target +from .utilities_spherical import ( + cartesian_to_radec, + distances_to_target, + great_circle_distance, + radec_to_cartesian, + rotate_radec_about_axis, +) def unique_merge_defaultdicts(d1: defaultdict, d2: defaultdict): diff --git a/tests/test_chunk.py b/tests/test_chunk.py index 8c27400..b19a144 100644 --- a/tests/test_chunk.py +++ b/tests/test_chunk.py @@ -1,13 +1,13 @@ import unittest import numpy as np import pandas as pd -from spherimatch import ChunkGeneratorByGrid -from spherimatch import ChunkGeneratorByDenseGrid, ChunkGeneratorBySuperDenseGrid -from spherimatch import GridChunkGenerator -from spherimatch import DisjointSet from spherimatch.catalog import Catalog -from spherimatch.result_fof import FoFResult +from spherimatch.chunk_generator_grid import ChunkGeneratorByGrid, GridChunkGenerator +from spherimatch.disjoint_set import DisjointSet from spherimatch.fof import group_by_quadtree_chunk +from spherimatch.result_fof import FoFResult + +# from spherimatch.chunk_generator_grid import ChunkGeneratorByDenseGrid, ChunkGeneratorBySuperDenseGrid class TestChunkGeneratorByGrid_coor2id_central(unittest.TestCase): diff --git a/tests/test_fof.py b/tests/test_fof.py index ffba43d..2f2272b 100644 --- a/tests/test_fof.py +++ b/tests/test_fof.py @@ -1,9 +1,11 @@ import unittest import numpy as np from numpy.typing import NDArray -from spherimatch import point_offset, generate_random_point -# from spherimatch import group_by_disjoint_set, group_by_DFS from spherimatch import fof +from spherimatch.utilities_spherical import point_offset + +# from spherimatch.utilities_spherical import generate_random_point +# from spherimatch import group_by_disjoint_set, group_by_DFS def generate_celestial_grid(**kwargs) -> list[tuple[float, float]]: diff --git a/tests/test_result_xmatch.py b/tests/test_result_xmatch.py index 6bae61c..0ca05d2 100644 --- a/tests/test_result_xmatch.py +++ b/tests/test_result_xmatch.py @@ -3,7 +3,6 @@ import pandas as pd from numpy.typing import NDArray from spherimatch import xmatch -from spherimatch.catalog import Catalog class TestXMatchResult_Methods(unittest.TestCase): diff --git a/tests/test_toolbox_spherical.py b/tests/test_toolbox_spherical.py index b7c15ce..97075b7 100644 --- a/tests/test_toolbox_spherical.py +++ b/tests/test_toolbox_spherical.py @@ -1,7 +1,11 @@ import unittest import numpy as np -from spherimatch import distances_to_target, point_offset, rotate_radec_about_axis -from spherimatch import great_circle_distance +from spherimatch.utilities_spherical import ( + distances_to_target, + great_circle_distance, + point_offset, + rotate_radec_about_axis, +) class TestAngularDistance(unittest.TestCase): diff --git a/tests/test_xmatch.py b/tests/test_xmatch.py index 13aa646..bbc7619 100644 --- a/tests/test_xmatch.py +++ b/tests/test_xmatch.py @@ -1,10 +1,10 @@ -from collections import defaultdict import unittest +from collections import defaultdict import numpy as np import pandas as pd -from spherimatch import point_offset, generate_random_point +from tests.test_fof import generate_celestial_grid from spherimatch import xmatch -from .test_fof import generate_celestial_grid +from spherimatch.utilities_spherical import point_offset def create_catalogs_from_grid(grid, tolerance=1, seed=None, fraction=0.5, ring_radius=(0, 1)): From 716e02a2b72d3e8d23018bfd137b668a5057ca20 Mon Sep 17 00:00:00 2001 From: Yuan-Ming Hsu <48866415+technic960183@users.noreply.github.com> Date: Sat, 3 May 2025 17:03:49 +0800 Subject: [PATCH 06/10] Comment out invalid / outdated unittests Thanks: ruff, pylint --- tests/test_fof.py | 53 +++++++++++++++++++------------------ tests/test_result_xmatch.py | 30 ++++++++++----------- 2 files changed, 41 insertions(+), 42 deletions(-) diff --git a/tests/test_fof.py b/tests/test_fof.py index 2f2272b..abc89d1 100644 --- a/tests/test_fof.py +++ b/tests/test_fof.py @@ -152,7 +152,7 @@ def test_group_by_quadtree(self): self.assertEqual(len(problematic_groups), 0, f"Failed groups: {problematic_groups}") -class TestCelestialGrouping_Random(unittest.TestCase): +# class TestCelestialGrouping_Random(unittest.TestCase): # def test_comparing_DFS_quadtree(self): # ra, dec = generate_random_point(10000) @@ -163,22 +163,23 @@ class TestCelestialGrouping_Random(unittest.TestCase): # problematic_groups = check_group_match(output_groups_dfs, output_groups_qt) # self.assertEqual(len(problematic_groups), 0, f"Failed groups: {problematic_groups}") - @unittest.skip("This test takes too long to run.") - def test_comparing_chunk_setting(self): - ra, dec = generate_random_point(10000, seed=0) - all_points = np.array([ra, dec]).T - tolerance = 1.5 - output_groups_base = fof(all_points, tolerance, dec_bound=60, ring_chunk=[6, 6]).get_coordinates() - for i in range(400): - print(f"Test {i+1} started!") - dec = np.random.uniform(50, 80) - N = np.random.randint(2, 6) - ring = [np.random.randint(6, 12) for _ in range(N)] - output_groups_test = fof(all_points, tolerance, dec_bound=dec, ring_chunk=ring).get_coordinates() - problematic_groups = check_group_match(output_groups_test, output_groups_base) - self.assertEqual( - len(problematic_groups), - 0, f"Failed groups: {problematic_groups} with dec_bound={dec}, ring_chunk={ring}") + # [TODO] This test has not been updated to the new API yet. + # @unittest.skip("This test takes too long to run.") + # def test_comparing_chunk_setting(self): + # ra, dec = generate_random_point(10000, seed=0) + # all_points = np.array([ra, dec]).T + # tolerance = 1.5 + # output_groups_base = fof(all_points, tolerance, dec_bound=60, ring_chunk=[6, 6]).get_coordinates() + # for i in range(400): + # print(f"Test {i+1} started!") + # dec = np.random.uniform(50, 80) + # N = np.random.randint(2, 6) + # ring = [np.random.randint(6, 12) for _ in range(N)] + # output_groups_test = fof(all_points, tolerance, dec_bound=dec, ring_chunk=ring).get_coordinates() + # problematic_groups = check_group_match(output_groups_test, output_groups_base) + # self.assertEqual( + # len(problematic_groups), + # 0, f"Failed groups: {problematic_groups} with dec_bound={dec}, ring_chunk={ring}") class TestCelestialGrouping(unittest.TestCase): @@ -250,12 +251,12 @@ def print_format_group(groups): print(f"[X] {central_point_str}: [{surrounding_points_str}]") -if __name__ == "__main__": - # unittest.main(verbosity=2) - suite = unittest.TestLoader().loadTestsFromTestCase(TestCelestialGrouping_Random) - unittest.TextTestRunner(verbosity=2).run(suite) - for i in range(0): - ut = TestCelestialGrouping_RandomGrid() - ut.setUp() - ut.test_group_by_quadtree() - print(f"Test {i+1} passed!") +# if __name__ == "__main__": +# # unittest.main(verbosity=2) +# suite = unittest.TestLoader().loadTestsFromTestCase(TestCelestialGrouping_Random) +# unittest.TextTestRunner(verbosity=2).run(suite) +# for i in range(0): +# ut = TestCelestialGrouping_RandomGrid() +# ut.setUp() +# ut.test_group_by_quadtree() +# print(f"Test {i+1} passed!") diff --git a/tests/test_result_xmatch.py b/tests/test_result_xmatch.py index 0ca05d2..00256eb 100644 --- a/tests/test_result_xmatch.py +++ b/tests/test_result_xmatch.py @@ -73,7 +73,6 @@ def test_get_dataframe1_retain_all_columns_True(self): def test_get_dataframe1_retain_all_columns_False(self): columns = ['RA', 'DEC'] - retain_columns = ['A', 'B'] df1 = pd.DataFrame(self.coords1, columns=columns) df1['A'] = np.cos(self.coords1[:, 0]) + np.arange(self.coords1.shape[0]) df1['B'] = np.sin(self.coords1[:, 1]) + np.arange(self.coords1.shape[0]) @@ -84,7 +83,6 @@ def test_get_dataframe1_retain_all_columns_False(self): def test_get_dataframe1_retain_columns(self): columns = ['RA', 'DEC'] - retain_columns = ['A', 'B'] df1 = pd.DataFrame(self.coords1, columns=columns) df1['A'] = np.cos(self.coords1[:, 0]) + np.arange(self.coords1.shape[0]) df1['B'] = np.sin(self.coords1[:, 1]) + np.arange(self.coords1.shape[0]) @@ -194,20 +192,20 @@ def test_get_serial_dataframe_retain_columns(self): self.assertAlmostEqual(df.iloc[idx]['A'], df2.loc[i // self.n1 * self.n2 + j, 'A']) self.assertAlmostEqual(df.iloc[idx]['C'], df2.loc[i // self.n1 * self.n2 + j, 'C']) - @unittest.skip("Future functionality") - def test_get_multiindex_dataframe(self): - result = xmatch(self.coords1, self.coords2, 2) - columns = ['Ra', 'Deccc'] - df = result.get_multiindex_dataframe(coord_columns=columns) - self.assertEqual(len(df), self.coords1.shape[0] + self.coords2.shape[0] * self.n1) - self.assertListEqual(list(df.columns), ['Catalog', 'Column']) - self.assertListEqual(list(df.index), ['Group', 'Object']) - sizes = df.groupby('Group').size() - self.assertEqual(len(sizes), self.coords1.shape[0]) - self.assertTrue(all(sizes == self.n2)) - for i in range(self.coords1.shape[0]): - group_df = df.loc[i] - continue # [TODO] Check the content of the group dataframe + # @unittest.skip("Future functionality") + # def test_get_multiindex_dataframe(self): + # result = xmatch(self.coords1, self.coords2, 2) + # columns = ['Ra', 'Deccc'] + # df = result.get_multiindex_dataframe(coord_columns=columns) + # self.assertEqual(len(df), self.coords1.shape[0] + self.coords2.shape[0] * self.n1) + # self.assertListEqual(list(df.columns), ['Catalog', 'Column']) + # self.assertListEqual(list(df.index), ['Group', 'Object']) + # sizes = df.groupby('Group').size() + # self.assertEqual(len(sizes), self.coords1.shape[0]) + # self.assertTrue(all(sizes == self.n2)) + # for i in range(self.coords1.shape[0]): + # group_df = df.loc[i] + # continue # [TODO] Check the content of the group dataframe # [FIXME] Write a test to check that if itterating over the deaultdict, the keys won't be in the correct order. # Thus yielding an incorrect result of N_match. From d0cdd3972f2986f45e069a41c608bdb31f7d537e Mon Sep 17 00:00:00 2001 From: Yuan-Ming Hsu <48866415+technic960183@users.noreply.github.com> Date: Sat, 3 May 2025 19:38:59 +0800 Subject: [PATCH 07/10] Suppress `pylint` false positive --- .mega-linter.yml | 1 + tests/test_fof.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mega-linter.yml b/.mega-linter.yml index de0e9c7..20242fc 100644 --- a/.mega-linter.yml +++ b/.mega-linter.yml @@ -19,6 +19,7 @@ ENABLE_ERRORS_LINTERS: MARKDOWN_MARKDOWNLINT_ARGUMENTS: --disable MD041 PYTHON_BLACK_ARGUMENTS: --skip-string-normalization --line-length 120 +PYTHON_PYLINT_ARGUMENTS: --enable I0021 PRE_COMMANDS: diff --git a/tests/test_fof.py b/tests/test_fof.py index abc89d1..2ab15a0 100644 --- a/tests/test_fof.py +++ b/tests/test_fof.py @@ -232,7 +232,7 @@ def test_qt_random_tree(self): point_now = (ra_now, dec_now) all_points = [point_now] for _ in range(1000): - node = all_points[np.random.randint(0, len(all_points))] + node = all_points[np.random.randint(0, len(all_points))] # pylint: disable=invalid-sequence-index point_now = point_offset(node, np.random.uniform(0, 1), np.random.uniform(0, 360)) all_points.append(point_now) all_points = np.array(all_points) From bc1d2a9f6cdd172a8283dcd488651071d8f72e91 Mon Sep 17 00:00:00 2001 From: Yuan-Ming Hsu <48866415+technic960183@users.noreply.github.com> Date: Sun, 4 May 2025 17:48:51 +0800 Subject: [PATCH 08/10] Adapt some suggestions from `black` Thanks: black I have not decided yet if I want to use `black` style for the whole project. --- docs/source/conf.py | 6 +----- spherimatch/catalog.py | 6 ++---- spherimatch/chunk_generator.py | 5 ++--- spherimatch/chunk_generator_grid.py | 20 ++++++++++++------- .../euclidean_vs_angular_distance_local.py | 3 +-- spherimatch/fof.py | 4 ++-- spherimatch/result_xmatch.py | 10 +++++++--- spherimatch/utilities_spherical.py | 2 +- spherimatch/xmatch.py | 6 +++--- tests/test_chunk.py | 4 ++-- tests/test_fof.py | 16 ++++++++------- tests/test_result_xmatch.py | 5 +++-- tests/test_toolbox_spherical.py | 5 +++-- tests/test_xmatch.py | 4 ++-- 14 files changed, 51 insertions(+), 45 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index f33cdd3..b4a502e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,11 +19,7 @@ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.viewcode', - 'sphinx.ext.napoleon' -] +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon'] templates_path = ['_templates'] exclude_patterns = ['build', 'Thumbs.db', '.DS_Store'] diff --git a/spherimatch/catalog.py b/spherimatch/catalog.py index 5dd63fe..26537d2 100644 --- a/spherimatch/catalog.py +++ b/spherimatch/catalog.py @@ -40,8 +40,7 @@ def __init__(self, data: Any) -> None: self._check_validity_range() def _check_validity_range(self): - '''Check the validity of the input data. Warning if the data is out of range. - ''' + '''Check the validity of the input data. Warning if the data is out of range.''' if np.any(self.ra < 0) or np.any(self.ra > 360): print("Warning: Ra values are out of range [0, 360]!") if np.any(self.dec < -90) or np.any(self.dec > 90): @@ -73,8 +72,7 @@ def get_indexes(self) -> NDArray[np.int64]: ''' return np.arange(len(self.ra), dtype=np.int64) - def get_appending_data(self, retain_all_columns=True, retain_columns=None, - invalid_key_error=True) -> pd.DataFrame: + def get_appending_data(self, retain_all_columns=True, retain_columns=None, invalid_key_error=True) -> pd.DataFrame: '''Get the appending data of the points in the catalog for xmatch and fof. Parameters diff --git a/spherimatch/chunk_generator.py b/spherimatch/chunk_generator.py index b905982..a4b01cc 100644 --- a/spherimatch/chunk_generator.py +++ b/spherimatch/chunk_generator.py @@ -6,8 +6,7 @@ class ChunkGenerator: def __init__(self, margin: float) -> None: - '''Initialize the chunk generator. - ''' + '''Initialize the chunk generator.''' self.chunks: list[Chunk] = [] self.margin = margin @@ -34,7 +33,7 @@ def distribute(self, catalog: Catalog) -> list[Chunk]: # Get chunk ids for central coordinates central_chunk_ids = self.coor2id_central(ra, dec) for i in range(len(self.chunks)): - mask = (central_chunk_ids == i) + mask = central_chunk_ids == i self.chunks[i].add_central_data(coordiantes[mask], indexes[mask]) # Get chunk ids for boundary coordinates diff --git a/spherimatch/chunk_generator_grid.py b/spherimatch/chunk_generator_grid.py index 0c0962b..9bb5190 100644 --- a/spherimatch/chunk_generator_grid.py +++ b/spherimatch/chunk_generator_grid.py @@ -119,8 +119,8 @@ def coor2id_central(self, ra, dec): # The line below should make no difference, because the central parts don't cross the 0-360 boundary. ra_diff = np.minimum(ra_diff, 360 - ra_diff) dec_diff = np.abs(dec - config['center_dec']) - mask_ra = (ra_diff <= config['delta_ra']) - mask_dec = (dec_diff <= config['delta_dec']) + mask_ra = ra_diff <= config['delta_ra'] + mask_dec = dec_diff <= config['delta_dec'] mask = mask_ra & mask_dec chunk_ids[mask] = i + 2 @@ -146,10 +146,16 @@ def coor2id_boundary(self, ra, dec): # Necessary. The boundary parts DO cross the 0-360 boundary. ra_diff = np.minimum(ra_diff, 360 - ra_diff) dec_diff = np.abs(dec - config['center_dec']) - mask_ra = (ra_diff >= config['delta_ra']) & (ra_diff <= config['delta_ra'] + margin) & ( - dec_diff <= config['delta_dec'] + margin) - mask_dec = (dec_diff >= config['delta_dec']) & (dec_diff <= config['delta_dec'] + margin) & ( - ra_diff <= config['delta_ra'] + margin) + mask_ra = ( + (ra_diff >= config['delta_ra']) + & (ra_diff <= config['delta_ra'] + margin) + & (dec_diff <= config['delta_dec'] + margin) + ) + mask_dec = ( + (dec_diff >= config['delta_dec']) + & (dec_diff <= config['delta_dec'] + margin) + & (ra_diff <= config['delta_ra'] + margin) + ) mask = mask_ra | mask_dec list_of_chunk_of_list_of_object_index.append(list(np.where(mask)[0])) @@ -171,4 +177,4 @@ def __init__(self, margin): class ChunkGeneratorBySuperDenseGrid(GridChunkGenerator): def __init__(self, margin): super().__init__(margin=margin) - self.set_symmetric_ring_chunk(polar_dec=80, Ns_horizontal_ring=[24]*10) + self.set_symmetric_ring_chunk(polar_dec=80, Ns_horizontal_ring=[24] * 10) diff --git a/spherimatch/euclidean_vs_angular_distance_local.py b/spherimatch/euclidean_vs_angular_distance_local.py index 5dce28d..273dc35 100644 --- a/spherimatch/euclidean_vs_angular_distance_local.py +++ b/spherimatch/euclidean_vs_angular_distance_local.py @@ -92,8 +92,7 @@ def compute_error(declination, distance): def compute_max_relative_error(dec, distances, theta_values): origin = (180, dec) offset_points_theta = np.array(point_offset(origin, distances, theta_values)) - euclidean_distances_theta = np.array([euclidean(origin, offset_point) - for offset_point in offset_points_theta.T]) + euclidean_distances_theta = np.array([euclidean(origin, offset_point) for offset_point in offset_points_theta.T]) relative_errors = np.abs((euclidean_distances_theta - distances) / distances) max_relative_error = np.max(relative_errors) angle_of_max_error = theta_values[np.argmax(relative_errors)] diff --git a/spherimatch/fof.py b/spherimatch/fof.py index 2a105b6..799d16f 100644 --- a/spherimatch/fof.py +++ b/spherimatch/fof.py @@ -48,7 +48,7 @@ def fof(catalog, tolerance) -> FoFResult: dec_bound, ring_chunk = DEC_BOUND, RING_CHUNK _catalog = Catalog(catalog) - cg = GridChunkGenerator(margin=2*tolerance) + cg = GridChunkGenerator(margin=2 * tolerance) cg.set_symmetric_ring_chunk(dec_bound, ring_chunk) cg.distribute(_catalog) @@ -68,7 +68,7 @@ def group_by_quadtree_chunk(args: tuple[Chunk, float]): # Rotate the center of the chunk to (180, 0) of the celestial sphere ra, dec = chunk.get_center() center_car = radec_to_cartesian(ra, dec) - normal_car = np.cross(center_car, np.array([-1., 0., 0.])) + normal_car = np.cross(center_car, np.array([-1.0, 0.0, 0.0])) normal_car /= np.linalg.norm(normal_car) normal_ra, normal_dec = cartesian_to_radec(normal_car) angle = great_circle_distance(ra, dec, 180, 0) diff --git a/spherimatch/result_xmatch.py b/spherimatch/result_xmatch.py index 994fa5e..bcab2a5 100644 --- a/spherimatch/result_xmatch.py +++ b/spherimatch/result_xmatch.py @@ -113,9 +113,13 @@ def get_serial_dataframe(self, min_match=1, reverse=False, coord_columns=['Ra', ''' if reverse: # Create a new XMatchResult object with the reversed result_dict reserve_result = self.__class__(self.cat2, self.cat1, self.tolerance, self.get_result_dict_reserve()) - df = reserve_result.get_serial_dataframe(min_match, reverse=False, coord_columns=coord_columns, - retain_all_columns=retain_all_columns, - retain_columns=retain_columns) + df = reserve_result.get_serial_dataframe( + min_match, + reverse=False, + coord_columns=coord_columns, + retain_all_columns=retain_all_columns, + retain_columns=retain_columns, + ) df['is_cat1'] = ~df['is_cat1'] return df idxes1 = self.cat1.get_indexes() diff --git a/spherimatch/utilities_spherical.py b/spherimatch/utilities_spherical.py index 5d1db2f..901da3e 100644 --- a/spherimatch/utilities_spherical.py +++ b/spherimatch/utilities_spherical.py @@ -31,7 +31,7 @@ def distances_to_target(target, points): # Haversine formula delta_ra = points_rad[:, 0] - target_rad[0] delta_dec = points_rad[:, 1] - target_rad[1] - a = np.sin(delta_dec/2.0)**2 + np.cos(target_rad[1]) * np.cos(points_rad[:, 1]) * np.sin(delta_ra/2.0)**2 + a = np.sin(delta_dec / 2.0) ** 2 + np.cos(target_rad[1]) * np.cos(points_rad[:, 1]) * np.sin(delta_ra / 2.0) ** 2 distances = 2 * np.arcsin(np.sqrt(a)) # Convert back to degrees diff --git a/spherimatch/xmatch.py b/spherimatch/xmatch.py index 8a1ca86..7189765 100644 --- a/spherimatch/xmatch.py +++ b/spherimatch/xmatch.py @@ -80,8 +80,8 @@ def xmatch(catalog1, catalog2, tolerance, verbose=False) -> XMatchResult: # [ENH]: Add an option for sorting the output _catalog1 = Catalog(catalog1) _catalog2 = Catalog(catalog2) - cg1 = GridChunkGenerator(margin=2*tolerance) - cg2 = GridChunkGenerator(margin=2*tolerance) + cg1 = GridChunkGenerator(margin=2 * tolerance) + cg2 = GridChunkGenerator(margin=2 * tolerance) cg1.set_symmetric_ring_chunk(60, [6, 6]) cg2.set_symmetric_ring_chunk(60, [6, 6]) cg1.distribute(_catalog1) @@ -103,7 +103,7 @@ def xmatch(catalog1, catalog2, tolerance, verbose=False) -> XMatchResult: def rotate_to_center(object_coor, chunk_ra, chunk_dec): # Rotate the center of the chunk to (180, 0) of the celestial sphere center_car = radec_to_cartesian(chunk_ra, chunk_dec) - normal_car = np.cross(center_car, np.array([-1., 0., 0.])) + normal_car = np.cross(center_car, np.array([-1.0, 0.0, 0.0])) normal_car /= np.linalg.norm(normal_car) normal_ra, normal_dec = cartesian_to_radec(normal_car) angle = great_circle_distance(chunk_ra, chunk_dec, 180, 0) diff --git a/tests/test_chunk.py b/tests/test_chunk.py index b19a144..9350ab1 100644 --- a/tests/test_chunk.py +++ b/tests/test_chunk.py @@ -93,9 +93,9 @@ def setUp(self): def test_different_chunk(self): tolerance = 0.01 - cg_a = GridChunkGenerator(margin=2*tolerance) + cg_a = GridChunkGenerator(margin=2 * tolerance) cg_a.set_symmetric_ring_chunk(60, [6, 6]) - cg_b = ChunkGeneratorByGrid(margin=2*tolerance) + cg_b = ChunkGeneratorByGrid(margin=2 * tolerance) result_a = self.group_by_quadtree_scipy(self.df_a, tolerance, cg_a) result_b = self.group_by_quadtree_scipy(self.df_b, tolerance, cg_b) print(len(result_a.get_coordinates())) diff --git a/tests/test_fof.py b/tests/test_fof.py index 2ab15a0..b4e4cc9 100644 --- a/tests/test_fof.py +++ b/tests/test_fof.py @@ -56,9 +56,9 @@ def generate_celestial_grid(**kwargs) -> list[tuple[float, float]]: return grid -def create_groups_from_grid(grid: list[tuple[float, float]], - tolerance=1., seed=None, fraction=0.5, - ring_radius=(0, 1)) -> tuple[list[list[tuple[float, float]]], NDArray]: +def create_groups_from_grid( + grid: list[tuple[float, float]], tolerance=1.0, seed=None, fraction=0.5, ring_radius=(0, 1) +) -> tuple[list[list[tuple[float, float]]], NDArray]: """ Randomly pick half of the grid points and create groups around them. For each selected grid point, use the point_offset() function to create several points @@ -69,7 +69,7 @@ def create_groups_from_grid(grid: list[tuple[float, float]], seed = np.random.randint(0, 1e6) np.random.seed(seed) np.random.shuffle(grid) - selected_points = grid[:np.floor(len(grid)*fraction).astype(int)] + selected_points = grid[: np.floor(len(grid) * fraction).astype(int)] groups = [] for point in selected_points: @@ -77,8 +77,8 @@ def create_groups_from_grid(grid: list[tuple[float, float]], for _ in range(np.random.randint(1, 5)): # Randomly create 1 to 4 additional points theta = np.random.uniform(0, 360) # Random direction offset_point = point_offset( - point, np.random.uniform(tolerance*ring_radius[0], tolerance*ring_radius[1]), - theta) # Random distance within 1 deg (tolerance) + point, np.random.uniform(tolerance * ring_radius[0], tolerance * ring_radius[1]), theta + ) # Random distance within 1 deg (tolerance) group.append(offset_point) groups.append(group) all_points = np.array([point for group in groups for point in group[0:]]) @@ -211,7 +211,9 @@ def test_qt_long_chain(self): tolerance = 2 all_points = np.array(grid) output_groups = fof(all_points, tolerance).get_coordinates() - self.assertEqual(len(output_groups), (dec_range//5)*2+1, f"Number of groups obtained: {len(output_groups)}") + self.assertEqual( + len(output_groups), (dec_range // 5) * 2 + 1, f"Number of groups obtained: {len(output_groups)}" + ) def test_qt_random_walk(self): ra_now = np.random.uniform(0, 360) diff --git a/tests/test_result_xmatch.py b/tests/test_result_xmatch.py index 00256eb..54aa310 100644 --- a/tests/test_result_xmatch.py +++ b/tests/test_result_xmatch.py @@ -16,8 +16,9 @@ def generate_offset_groups(base_ra: NDArray, base_dec: NDArray): base_coords = np.vstack([base_ra, base_dec]).T unit_ra = np.vstack([np.ones(length), np.zeros(length)]).T unit_dec = np.vstack([np.zeros(length), np.ones(length)]).T - coords = np.array([base_coords + 120 * i * unit_ra + 20 * j * unit_dec for i in range(3) - for j in range(-1, 2)]) + coords = np.array( + [base_coords + 120 * i * unit_ra + 20 * j * unit_dec for i in range(3) for j in range(-1, 2)] + ) coords = coords.reshape(-1, coords.shape[-1]) coords[:, 0] = coords[:, 0] % 360 return coords diff --git a/tests/test_toolbox_spherical.py b/tests/test_toolbox_spherical.py index 97075b7..fa567e5 100644 --- a/tests/test_toolbox_spherical.py +++ b/tests/test_toolbox_spherical.py @@ -173,8 +173,9 @@ def test_multiple_rotations(self): new_ra1, new_dec1 = rotate_radec_about_axis(ra, dec, axis_ra, axis_dec, theta1) new_ra2, new_dec2 = rotate_radec_about_axis(new_ra1, new_dec1, axis_ra, axis_dec, theta2) new_ra_combined, new_dec_combined = rotate_radec_about_axis(ra, dec, axis_ra, axis_dec, theta1 + theta2) - self.assertTrue(np.isclose(new_ra2, new_ra_combined, atol=1e-5) - and np.isclose(new_dec2, new_dec_combined, atol=1e-5)) + self.assertTrue( + np.isclose(new_ra2, new_ra_combined, atol=1e-5) and np.isclose(new_dec2, new_dec_combined, atol=1e-5) + ) if __name__ == '__main__': diff --git a/tests/test_xmatch.py b/tests/test_xmatch.py index bbc7619..97f78f5 100644 --- a/tests/test_xmatch.py +++ b/tests/test_xmatch.py @@ -38,7 +38,7 @@ def create_catalogs_from_grid(grid, tolerance=1, seed=None, fraction=0.5, ring_r seed = np.random.randint(0, 1e6) np.random.seed(seed) np.random.shuffle(grid) - selected_points = grid[:np.floor(len(grid)*fraction).astype(int)] + selected_points = grid[: np.floor(len(grid) * fraction).astype(int)] centrals = [] neighbors = [] @@ -49,7 +49,7 @@ def create_catalogs_from_grid(grid, tolerance=1, seed=None, fraction=0.5, ring_r idxes = [] # List of indexes of the surrounding points for _ in range(np.random.randint(5, 10)): # Randomly create 1 to 4 additional points theta = np.random.uniform(0, 360) # Random direction - distance = np.random.uniform(tolerance*ring_radius[0], tolerance*ring_radius[1]) # Random distance + distance = np.random.uniform(tolerance * ring_radius[0], tolerance * ring_radius[1]) # Random distance offset_point = point_offset(point, distance, theta) neighbors.append(offset_point) idxes.append(i_cat2) From eac44ce514f0c29bad827ad326f76cb5d6fae792 Mon Sep 17 00:00:00 2001 From: Yuan-Ming Hsu <48866415+technic960183@users.noreply.github.com> Date: Sun, 4 May 2025 22:36:35 +0800 Subject: [PATCH 09/10] Fix typos in variable names and docs Thanks: cspell --- .github/workflows/publish-pypi.yml | 4 ++-- README.md | 2 +- docs/source/tutorial/duplicates_removal.rst | 2 +- docs/source/tutorial/fof.rst | 6 +++--- docs/source/tutorial/input_validation.rst | 4 ++-- spherimatch/catalog.py | 6 +++--- spherimatch/chunk.py | 8 ++++---- spherimatch/chunk_generator.py | 8 ++++---- spherimatch/chunk_generator_grid.py | 2 +- spherimatch/fof.py | 8 ++++---- spherimatch/result_fof.py | 6 +++--- spherimatch/result_xmatch.py | 8 ++++---- spherimatch/xmatch.py | 18 +++++++++--------- tests/test_catalog.py | 6 +++--- tests/test_result_xmatch.py | 12 ++++++------ tests/test_xmatch.py | 10 +++++----- 16 files changed, 55 insertions(+), 55 deletions(-) diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index bae1ad3..d09a4be 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -35,7 +35,7 @@ jobs: - name: Upload distributions uses: actions/upload-artifact@v4 with: - name: release-dists + name: release-dist path: dist/ pypi-publish: @@ -55,7 +55,7 @@ jobs: - name: Retrieve release distributions uses: actions/download-artifact@v4 with: - name: release-dists + name: release-dist path: dist/ - name: Publish to PyPI diff --git a/README.md b/README.md index 923976a..2007842 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,6 @@ To cite spherimatch in your publication, please use the following BibTeX entry: note = {Accessed: YYYY-MM} } ``` -Addtionally, you may add a reference to `https://github.com/technic960183/spherimatch` in the footnote if suitable. +Additionally, you may add a reference to `https://github.com/technic960183/spherimatch` in the footnote if suitable. If you publish a paper that uses `spherimatch`, please let me know. I would be happy to know how this package has been used in research. diff --git a/docs/source/tutorial/duplicates_removal.rst b/docs/source/tutorial/duplicates_removal.rst index 0955189..2b4caca 100644 --- a/docs/source/tutorial/duplicates_removal.rst +++ b/docs/source/tutorial/duplicates_removal.rst @@ -57,7 +57,7 @@ properties of your catalog. The ``'dup_num'`` column shows the number of duplica .. note:: When there are two 'unique' objects that are very close to each other, it is possible that they will be grouped together. - In an exetrema case, it is possible that a chain of unique objects will be grouped together, linking by their duplicates. + In an extreme case, it is possible that a chain of unique objects will be grouped together, linking by their duplicates. But this is rare for most catalogs. To solve this problem, you can try to decrease the tolerance value. However, if decreasing the tolerance value separates objects that should be considered as duplicates, this package does not provide a solution for now. You may need to remove the duplicates manually for those close objects. diff --git a/docs/source/tutorial/fof.rst b/docs/source/tutorial/fof.rst index 5be83c5..3b46474 100644 --- a/docs/source/tutorial/fof.rst +++ b/docs/source/tutorial/fof.rst @@ -36,7 +36,7 @@ The result object contains the clustering results. Four methods are available to get_group_dataframe() --------------------- -To get the clustering results with the appendind data (``'mag'`` in this case), use the +To get the clustering results with the appending data (``'mag'`` in this case), use the :func:`spherimatch.FoFResult.get_group_dataframe` method: .. code-block:: python @@ -94,8 +94,8 @@ Expected output:: Each group is also a pandas DataFrame. .. note:: - The iterater from ``groupby()`` is extremely slow for large datasets. The current solution is to flatten the - DataFrame into a single layer of index and manupulate the index directly, or even turn the DataFrame into a numpy array. + The iterator from ``groupby()`` is extremely slow for large datasets. The current solution is to flatten the + DataFrame into a single layer of index and manipulates the index directly, or even turn the DataFrame into a numpy array. If you want DataFrame with a single layer of index and the size of each group as a column, you can use the following code: diff --git a/docs/source/tutorial/input_validation.rst b/docs/source/tutorial/input_validation.rst index b0c65da..20e9115 100644 --- a/docs/source/tutorial/input_validation.rst +++ b/docs/source/tutorial/input_validation.rst @@ -16,7 +16,7 @@ The input DataFrame should have the following columns: - One of the ``['ra', 'Ra', 'RA']`` (Right Ascension) in degrees. - One of the ``['dec', 'Dec', 'DEC']`` (Declination) in degrees. -Addtionally, the DataFrame can have any other columns as well. These columns will be preserved in the output. +Additionally, the DataFrame can have any other columns as well. These columns will be preserved in the output. And the index of the DataFrame has no restrictions and will be preserved in the output as well. (MultiIndex is not supported for now.) numpy.ndarray @@ -27,4 +27,4 @@ The input numpy array should be in the shape of (N, 2), where N is the number of - The first column (``data[:, 0]``) should be the Right Ascension in degrees. - The second column (``data[:, 1]``) should be the Declination in degrees. -Addtional data columns are not supported in the numpy array format for now. +Additional data columns are not supported in the numpy array format for now. diff --git a/spherimatch/catalog.py b/spherimatch/catalog.py index 26537d2..b7059ca 100644 --- a/spherimatch/catalog.py +++ b/spherimatch/catalog.py @@ -13,7 +13,7 @@ class Catalog: The input data can be either a numpy array or a pandas dataframe. * np.array: The array must have a shape of (N, 2), representing N points with - two values: [ra (azimuth, longitude), dec (alltitude, latitude)]. + two values: [ra (azimuth, longitude), dec (altitude, latitude)]. * pd.DataFrame: The dataframe must have two columns named 'Ra' and 'Dec' (or all the possible combinations with 'ra', 'dec'; 'RA', 'DEC'). ''' @@ -22,7 +22,7 @@ def __init__(self, data: Any) -> None: self.datatype = type(data) self.input_data = data self.ra = None # ra, longitude, azimuth - self.dec = None # dec, latitude, alltitude + self.dec = None # dec, latitude, altitude self.ra_column: Optional[str] = None self.dec_column: Optional[str] = None if isinstance(self.input_data, np.ndarray): @@ -52,7 +52,7 @@ def _check_validity_range(self): if len(self.ra) != len(self.dec): raise ValueError("The length of Ra and Dec must be the same!") - def get_coordiantes(self) -> NDArray[np.float64]: + def get_coordinates(self) -> NDArray[np.float64]: '''Get the coordinate of the points in the catalog for xmatch and fof. Returns diff --git a/spherimatch/chunk.py b/spherimatch/chunk.py index ce88f4a..23db898 100644 --- a/spherimatch/chunk.py +++ b/spherimatch/chunk.py @@ -4,9 +4,9 @@ class Chunk: - def __init__(self, chunk_id, ra, dec, discription=None): + def __init__(self, chunk_id, ra, dec, description=None): self.chunk_id = chunk_id - self.discription = discription if discription is not None else f"Chunk {chunk_id} ({ra:3f}, {dec:3f})" + self.description = description if description is not None else f"Chunk {chunk_id} ({ra:3f}, {dec:3f})" self.central_data = np.empty((0, 2), dtype=np.float64) self.boundary_data = np.empty((0, 2), dtype=np.float64) self.central_index = np.empty((0), dtype=np.int64) @@ -32,7 +32,7 @@ def get_index(self) -> NDArray[np.int64]: def get_center(self): return self.chunk_ra, self.chunk_dec - def farest_distance(self, distance=None): + def farthest_distance(self, distance=None): if distance is None: return self.max_size self.max_size = distance @@ -44,4 +44,4 @@ def __repr__(self): return f"Chunk {self.chunk_id} ({self.chunk_ra:.1f}, {self.chunk_dec:.1f}): {len(self)} objects" def __str__(self): - return self.discription + f" with {len(self)} objects" + return self.description + f" with {len(self)} objects" diff --git a/spherimatch/chunk_generator.py b/spherimatch/chunk_generator.py index a4b01cc..359dd4a 100644 --- a/spherimatch/chunk_generator.py +++ b/spherimatch/chunk_generator.py @@ -26,20 +26,20 @@ def distribute(self, catalog: Catalog) -> list[Chunk]: chunks : list[Chunk] List of chunks with data. ''' - coordiantes = catalog.get_coordiantes() + coordinates = catalog.get_coordinates() indexes = catalog.get_indexes() - ra, dec = coordiantes[:, 0], coordiantes[:, 1] + ra, dec = coordinates[:, 0], coordinates[:, 1] # Get chunk ids for central coordinates central_chunk_ids = self.coor2id_central(ra, dec) for i in range(len(self.chunks)): mask = central_chunk_ids == i - self.chunks[i].add_central_data(coordiantes[mask], indexes[mask]) + self.chunks[i].add_central_data(coordinates[mask], indexes[mask]) # Get chunk ids for boundary coordinates boundary_chunk_indices = self.coor2id_boundary(ra, dec) for boundary_chunk_id, indices in enumerate(boundary_chunk_indices): # May be a bug here - self.chunks[boundary_chunk_id].add_boundary_data(coordiantes[indices], indexes[indices]) + self.chunks[boundary_chunk_id].add_boundary_data(coordinates[indices], indexes[indices]) return self.chunks diff --git a/spherimatch/chunk_generator_grid.py b/spherimatch/chunk_generator_grid.py index 9bb5190..507c5db 100644 --- a/spherimatch/chunk_generator_grid.py +++ b/spherimatch/chunk_generator_grid.py @@ -104,7 +104,7 @@ def generate(self): chunk_id = 0 for config in self.get_all_config(): chunk = Chunk(chunk_id, config['center_ra'], config['center_dec']) - chunk.farest_distance(distance=config.get_max_radius()) + chunk.farthest_distance(distance=config.get_max_radius()) self.chunks.append(chunk) chunk_id += 1 diff --git a/spherimatch/fof.py b/spherimatch/fof.py index 799d16f..a828f97 100644 --- a/spherimatch/fof.py +++ b/spherimatch/fof.py @@ -73,11 +73,11 @@ def group_by_quadtree_chunk(args: tuple[Chunk, float]): normal_ra, normal_dec = cartesian_to_radec(normal_car) angle = great_circle_distance(ra, dec, 180, 0) rot_ra, rot_dec = rotate_radec_about_axis(objects[:, 0], objects[:, 1], normal_ra, normal_dec, angle) - corrdinates_np = np.vstack((rot_ra, rot_dec)).T + coordinates_np = np.vstack((rot_ra, rot_dec)).T index_np = chunk.get_index() - SAFTY_FACTOR = 1.05 - A2E_factor = (1 + compute_error(chunk.farest_distance(), tolerance)) * SAFTY_FACTOR - groups_index = spherical_quadtree_grouping(index_np, corrdinates_np, tolerance, A2E_factor) + SAFETY_FACTOR = 1.05 + A2E_factor = (1 + compute_error(chunk.farthest_distance(), tolerance)) * SAFETY_FACTOR + groups_index = spherical_quadtree_grouping(index_np, coordinates_np, tolerance, A2E_factor) return groups_index diff --git a/spherimatch/result_fof.py b/spherimatch/result_fof.py index f9e614a..c4883ec 100644 --- a/spherimatch/result_fof.py +++ b/spherimatch/result_fof.py @@ -18,7 +18,7 @@ def get_coordinates(self) -> list[list[tuple]]: list[list[tuple]] A list of lists of tuples of coordinates of objects in each group. """ - objects_coordinates = self.catalog.get_coordiantes() + objects_coordinates = self.catalog.get_coordinates() return [[tuple(objects_coordinates[i, :]) for i in g] for g in self.result_list] def get_group_coordinates(self) -> list[tuple]: @@ -29,7 +29,7 @@ def get_group_coordinates(self) -> list[tuple]: list[tuple] A list of tuples of coordinates of the center of each group. """ - objects_coordinates = self.catalog.get_coordiantes() + objects_coordinates = self.catalog.get_coordinates() # [FIXME] This return a list of NDArrays, not a list of tuples. return [np.average(objects_coordinates[g, :], axis=0) for g in self.result_list] @@ -73,7 +73,7 @@ def get_group_dataframe(self, min_group_size=1, coord_columns=['Ra', 'Dec'], new_index_tuples.append((group_index, object_index)) original_indices.append(object_index) - data_df = pd.DataFrame(self.catalog.get_coordiantes(), columns=coord_columns, index=self.catalog.get_indexes()) + data_df = pd.DataFrame(self.catalog.get_coordinates(), columns=coord_columns, index=self.catalog.get_indexes()) append_df = self.catalog.get_appending_data(retain_all_columns, retain_columns) if len(append_df.columns) > 0: data_df = pd.concat([data_df, append_df], axis=1) diff --git a/spherimatch/result_xmatch.py b/spherimatch/result_xmatch.py index bcab2a5..0cc078f 100644 --- a/spherimatch/result_xmatch.py +++ b/spherimatch/result_xmatch.py @@ -58,7 +58,7 @@ def get_dataframe1(self, min_match=0, coord_columns=['Ra', 'Dec'], The dataframe of the first catalog with the number of matches. ''' idxes_array = self.cat1.get_indexes() - coords_array = self.cat1.get_coordiantes() + coords_array = self.cat1.get_coordinates() data_df = pd.DataFrame(coords_array, columns=coord_columns, index=idxes_array) data_df['N_match'] = [len(v) for v in self.get_result_dict().values()] append_df = self.cat1.get_appending_data(retain_all_columns, retain_columns) @@ -74,7 +74,7 @@ def get_dataframe2(self, min_match=0, coord_columns=['Ra', 'Dec'], Please refer to the `get_dataframe1()` and replace the 'first catalog' with the 'second catalog'. ''' idxes_array = self.cat2.get_indexes() - coords_array = self.cat2.get_coordiantes() + coords_array = self.cat2.get_coordinates() data_df = pd.DataFrame(coords_array, columns=coord_columns, index=idxes_array) data_df['N_match'] = [len(v) for v in self.get_result_dict_reserve().values()] append_df = self.cat2.get_appending_data(retain_all_columns, retain_columns) @@ -146,8 +146,8 @@ def get_serial_dataframe(self, min_match=1, reverse=False, coord_columns=['Ra', idx_combine[~is_df1_np] += n1 idxes_array1 = self.cat1.get_indexes() idxes_array2 = self.cat2.get_indexes() - df1 = pd.DataFrame(self.cat1.get_coordiantes(), columns=coord_columns, index=idxes_array1) - df2 = pd.DataFrame(self.cat2.get_coordiantes(), columns=coord_columns, index=idxes_array2) + df1 = pd.DataFrame(self.cat1.get_coordinates(), columns=coord_columns, index=idxes_array1) + df2 = pd.DataFrame(self.cat2.get_coordinates(), columns=coord_columns, index=idxes_array2) append_df1 = self.cat1.get_appending_data(retain_all_columns, retain_columns, invalid_key_error=False) append_df2 = self.cat2.get_appending_data(retain_all_columns, retain_columns, invalid_key_error=False) if len(append_df1.columns) > 0: diff --git a/spherimatch/xmatch.py b/spherimatch/xmatch.py index 7189765..83c5f2f 100644 --- a/spherimatch/xmatch.py +++ b/spherimatch/xmatch.py @@ -15,7 +15,7 @@ ) -def unique_merge_defaultdicts(d1: defaultdict, d2: defaultdict): +def unique_merge_defaultdict(d1: defaultdict, d2: defaultdict): """Joins two dictionaries, merging values for shared keys and preserving others. When both dictionaries have the same key, this function makes a new list @@ -35,7 +35,7 @@ def unique_merge_defaultdicts(d1: defaultdict, d2: defaultdict): A dictionary with all keys from both d1 and d2. For shared keys, it has a list of unique values. For unshared keys, it has the original list. """ - # Convert defaultdicts to arrays + # Convert defaultdict to arrays keys1 = np.array(list(d1.keys()), dtype=np.int64) keys2 = np.array(list(d2.keys()), dtype=np.int64) # Find intersection and unique keys in both arrays @@ -96,7 +96,7 @@ def xmatch(catalog1, catalog2, tolerance, verbose=False) -> XMatchResult: if i == 0: merged_dict = dd else: - merged_dict = unique_merge_defaultdicts(merged_dict, dd) + merged_dict = unique_merge_defaultdict(merged_dict, dd) return XMatchResult(_catalog1, _catalog2, tolerance, merged_dict) @@ -120,18 +120,18 @@ def xmatch_chunk(args: tuple[Chunk, Chunk, float]): ra, dec = chunk1.get_center() rot_coor1 = np.array(rotate_to_center(objects1, ra, dec)).T rot_coor2 = np.array(rotate_to_center(objects2, ra, dec)).T - if chunk1.farest_distance() != chunk2.farest_distance(): - raise ValueError("The two chunks have different farest distances!") - SAFTY_FACTOR = 1.01 - A2E_factor = (1 + compute_error(chunk1.farest_distance(), tolerance)) * SAFTY_FACTOR - idx1, idxes2 = spherical_xmatching(index1, rot_coor1, index2, rot_coor2, tolerance, A2E_factor) + if chunk1.farthest_distance() != chunk2.farthest_distance(): + raise ValueError("The two chunks have different farthest distances!") + SAFETY_FACTOR = 1.01 + A2E_factor = (1 + compute_error(chunk1.farthest_distance(), tolerance)) * SAFETY_FACTOR + idx1, idxes2 = spherical_xmatch(index1, rot_coor1, index2, rot_coor2, tolerance, A2E_factor) dd = defaultdict(list) for key, value in zip(idx1, idxes2): dd[key] = value return dd -def spherical_xmatching(idx1: np.array, coor1: np.array, idx2: np.array, coor2: np.array, tolerance, A2E_factor): +def spherical_xmatch(idx1: np.array, coor1: np.array, idx2: np.array, coor2: np.array, tolerance, A2E_factor): qt1 = KDTree(coor1) qt2 = KDTree(coor2) list_of_indexes = qt1.query_ball_tree(qt2, tolerance * A2E_factor) # list of elements in idx2 diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 871980c..543bda9 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -17,14 +17,14 @@ def test_random_np(self): for i in range(10): (ra, dec), expected_output = self.get_input_output_pair(N=1000) catalog = Catalog(np.vstack([ra, dec]).T) - code_output = catalog.get_coordiantes() + code_output = catalog.get_coordinates() self.assertEqual(code_output.tolist(), expected_output.tolist()) def test_random_pd(self): for i in range(10): (ra, dec), expected_output = self.get_input_output_pair(N=1000) catalog = Catalog(pd.DataFrame({'Ra': ra, 'Dec': dec})) - code_output = catalog.get_coordiantes() + code_output = catalog.get_coordinates() self.assertEqual(code_output.tolist(), expected_output.tolist()) @@ -54,7 +54,7 @@ def test_pd_column_name_mixed(self): def tearDown(self): catalog = Catalog(self.parameter) - code_output = catalog.get_coordiantes() + code_output = catalog.get_coordinates() self.assertEqual(code_output.tolist(), self.expected_output.tolist()) diff --git a/tests/test_result_xmatch.py b/tests/test_result_xmatch.py index 54aa310..dbaa7ec 100644 --- a/tests/test_result_xmatch.py +++ b/tests/test_result_xmatch.py @@ -42,7 +42,7 @@ def test_get_result_dict(self): def test_get_dataframe1(self): result = xmatch(self.coords1, self.coords2, 2) - columns = ['Ra', 'Deccc'] + columns = ['Ra', 'DE'] df = result.get_dataframe1(coord_columns=columns) self.assertEqual(len(df), self.coords1.shape[0]) self.assertListEqual(list(df.columns), columns + ['N_match']) @@ -97,7 +97,7 @@ def test_get_dataframe1_retain_columns(self): def test_get_dataframe2(self): result = xmatch(self.coords1, self.coords2, 2) - columns = ['Ra', 'Deccc'] + columns = ['Ra', 'DE'] df = result.get_dataframe2(coord_columns=columns) self.assertEqual(len(df), self.coords2.shape[0]) self.assertListEqual(list(df.columns), columns + ['N_match']) @@ -109,7 +109,7 @@ def test_get_dataframe2(self): def test_get_serial_dataframe(self): result = xmatch(self.coords1, self.coords2, 2) - columns = ['Ra', 'Deccc'] + columns = ['Ra', 'DE'] df = result.get_serial_dataframe(coord_columns=columns) self.assertEqual(len(df), self.coords1.shape[0] + self.coords2.shape[0] * self.n1) self.assertListEqual(list(df.columns), columns + ['N_match', 'is_cat1']) @@ -128,7 +128,7 @@ def test_get_serial_dataframe(self): def test_get_serial_dataframe_reverse(self): result = xmatch(self.coords1, self.coords2, 2) - columns = ['Ra', 'Deccc'] + columns = ['Ra', 'DE'] df = result.get_serial_dataframe(coord_columns=columns, reverse=True) self.assertEqual(len(df), self.coords2.shape[0] + self.coords1.shape[0] * self.n2) self.assertListEqual(list(df.columns), columns + ['N_match', 'is_cat1']) @@ -196,7 +196,7 @@ def test_get_serial_dataframe_retain_columns(self): # @unittest.skip("Future functionality") # def test_get_multiindex_dataframe(self): # result = xmatch(self.coords1, self.coords2, 2) - # columns = ['Ra', 'Deccc'] + # columns = ['Ra', 'DE'] # df = result.get_multiindex_dataframe(coord_columns=columns) # self.assertEqual(len(df), self.coords1.shape[0] + self.coords2.shape[0] * self.n1) # self.assertListEqual(list(df.columns), ['Catalog', 'Column']) @@ -208,5 +208,5 @@ def test_get_serial_dataframe_retain_columns(self): # group_df = df.loc[i] # continue # [TODO] Check the content of the group dataframe - # [FIXME] Write a test to check that if itterating over the deaultdict, the keys won't be in the correct order. + # [FIXME] Write a test to check that if iterating over the defaultdict, the keys won't be in the correct order. # Thus yielding an incorrect result of N_match. diff --git a/tests/test_xmatch.py b/tests/test_xmatch.py index 97f78f5..25e6174 100644 --- a/tests/test_xmatch.py +++ b/tests/test_xmatch.py @@ -60,7 +60,7 @@ def create_catalogs_from_grid(grid, tolerance=1, seed=None, fraction=0.5, ring_r return expected_idx, (centrals_np, neighbors_np) -def check_Xmatching(expected_matches: dict, output_matches: defaultdict): +def check_xmatch(expected_matches: dict, output_matches: defaultdict): """ Compares the expected matching groups with the output matching groups to determine if the matching process has been conducted correctly. @@ -104,7 +104,7 @@ def print_format_match(problematic_matches, central_point, surrounding_points): print(f"[X] {central_point_str}: [{surrounding_points_str}]") -class TestCelestialXMatching_RandomGrid(unittest.TestCase): +class TestCelestialXmatch_RandomGrid(unittest.TestCase): """ A unittest class for verifying the functionality of a celestial object cross-matching algorithm. @@ -139,7 +139,7 @@ def setUp(self): def test_match_by_quadtree(self): output_matches = xmatch(self.two_catalogs[0], self.two_catalogs[1], self.tolerance).get_result_dict() - problematic_matches = check_Xmatching(self.expected_matching, output_matches) + problematic_matches = check_xmatch(self.expected_matching, output_matches) print_format_match(problematic_matches, self.two_catalogs[0], self.two_catalogs[1]) self.assertEqual(len(problematic_matches), 0, f"Failed groups: {problematic_matches}") @@ -250,10 +250,10 @@ def tearDown(self): if __name__ == "__main__": - suite = unittest.TestLoader().loadTestsFromTestCase(TestCelestialXMatching_RandomGrid) + suite = unittest.TestLoader().loadTestsFromTestCase(TestCelestialXmatch_RandomGrid) unittest.TextTestRunner(verbosity=2).run(suite) for i in range(100): - ut = TestCelestialXMatching_RandomGrid() + ut = TestCelestialXmatch_RandomGrid() ut.setUp() ut.test_match_by_quadtree() print(f"Test {i+1} passed!") From 49fb0fdb1e7049af3b99255825b9a7a0a74c9656 Mon Sep 17 00:00:00 2001 From: Yuan-Ming Hsu <48866415+technic960183@users.noreply.github.com> Date: Mon, 5 May 2025 22:42:18 +0800 Subject: [PATCH 10/10] Rename `pycorrelator.rst` to `spherimatch.rst` Thanks: cspell Thanks the `megalinter` team to make `cspell` able to check the filename. This lead me to the discovery that I forgot to rename this rst file when I renamed the package. --- docs/source/dev/index.rst | 10 +++++----- docs/source/dev/{pycorrelator.rst => spherimatch.rst} | 0 2 files changed, 5 insertions(+), 5 deletions(-) rename docs/source/dev/{pycorrelator.rst => spherimatch.rst} (100%) diff --git a/docs/source/dev/index.rst b/docs/source/dev/index.rst index 140123c..a7ae8a5 100644 --- a/docs/source/dev/index.rst +++ b/docs/source/dev/index.rst @@ -9,11 +9,6 @@ please refer to the `API Reference <../ref/index.html>`_ or the `Tutorials <../t This section is not complete yet. -.. toctree:: - :maxdepth: 2 - - spherimatch - To develop the project, clone the repository and install the project in editable mode. .. code-block:: console @@ -29,3 +24,8 @@ To test the project, run the following command. $ python -m unittest You should see ``OK (skipped=3)`` if all tests pass. + +.. toctree:: + :maxdepth: 2 + + spherimatch diff --git a/docs/source/dev/pycorrelator.rst b/docs/source/dev/spherimatch.rst similarity index 100% rename from docs/source/dev/pycorrelator.rst rename to docs/source/dev/spherimatch.rst