From fe8ee850f477b55db90016668a8d1b2db27085fd Mon Sep 17 00:00:00 2001 From: Kaiyu Yang Date: Tue, 3 Dec 2024 16:56:57 +0000 Subject: [PATCH] minor updates --- src/lean_dojo/data_extraction/lean.py | 13 +++++++++---- tests/conftest.py | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/lean_dojo/data_extraction/lean.py b/src/lean_dojo/data_extraction/lean.py index 0fc7bedb..1e9a2d20 100644 --- a/src/lean_dojo/data_extraction/lean.py +++ b/src/lean_dojo/data_extraction/lean.py @@ -55,7 +55,7 @@ _URL_REGEX = re.compile(r"(?P.*?)/*") -_SSH_TO_HTTPS_REGEX = re.compile(r"^git@github\.com:(.+)/(.+)(?:\.git)?$") +_SSH_TO_HTTPS_REGEX = re.compile(r"git@github\.com:(?P.+)/(?P.+)\.git") REPO_CACHE_PREFIX = "repos" @@ -70,7 +70,13 @@ def normalize_url(url: str, repo_type: RepoType = RepoType.GITHUB) -> str: if repo_type == RepoType.LOCAL: # Convert to absolute path if local. return os.path.abspath(url) # Remove trailing `/`. - return _URL_REGEX.fullmatch(url)["url"] # type: ignore + url = _URL_REGEX.fullmatch(url)["url"] # type: ignore + return ssh_to_https(url) + + +def ssh_to_https(url: str) -> str: + m = _SSH_TO_HTTPS_REGEX.fullmatch(url) + return f"https://github.com/{m.group('user')}/{m.group('repo')}" if m else url def get_repo_type(url: str) -> Optional[RepoType]: @@ -81,8 +87,7 @@ def get_repo_type(url: str) -> Optional[RepoType]: Returns: Optional[str]: The type of the repository (None if the repo cannot be found). """ - m = _SSH_TO_HTTPS_REGEX.match(url) - url = f"https://github.com/{m.group(1)}/{m.group(2)}" if m else url + url = ssh_to_https(url) parsed_url = urllib.parse.urlparse(url) # type: ignore if parsed_url.scheme in ["http", "https"]: # Case 1 - GitHub URL. diff --git a/tests/conftest.py b/tests/conftest.py index d581c620..a735e64e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,7 @@ from lean_dojo import * +MINIF2F_URL = "git@github.com:yangky11/miniF2F-lean4.git" BATTERIES_URL = "https://github.com/leanprover-community/batteries" AESOP_URL = "https://github.com/leanprover-community/aesop" MATHLIB4_URL = "https://github.com/leanprover-community/mathlib4" @@ -10,6 +11,7 @@ EXAMPLE_COMMIT_HASH = "3f8c5eb303a225cdef609498b8d87262e5ef344b" REMOTE_EXAMPLE_URL = "https://gitee.com/rexzong/lean4-example" URLS = [ + MINIF2F_URL, BATTERIES_URL, AESOP_URL, MATHLIB4_URL,