Skip to content

Commit

Permalink
Merge branch 'main' into add-intro-vertexai
Browse files Browse the repository at this point in the history
  • Loading branch information
SauravP97 authored Jan 7, 2025
2 parents 72c5dad + 258d624 commit 6f2b854
Show file tree
Hide file tree
Showing 26 changed files with 3,498 additions and 1,878 deletions.
6 changes: 3 additions & 3 deletions libs/community/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ lint_tests: MYPY_CACHE=.mypy_cache_test

lint lint_diff lint_package lint_tests:
./scripts/lint_imports.sh
poetry run ruff .
poetry run ruff check .
poetry run ruff format $(PYTHON_FILES) --diff
poetry run ruff --select I $(PYTHON_FILES)
poetry run ruff check --select I $(PYTHON_FILES)
mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)

format format_diff:
poetry run ruff format $(PYTHON_FILES)
poetry run ruff --select I --fix $(PYTHON_FILES)
poetry run ruff check --select I --fix $(PYTHON_FILES)

spell_check:
poetry run codespell --toml pyproject.toml
Expand Down
43 changes: 36 additions & 7 deletions libs/community/langchain_google_community/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,26 @@

import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Union
from typing import Any, ClassVar, Dict, List, Optional, Sequence, Tuple, Union

from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document
from pydantic import BaseModel, field_validator, model_validator

SCOPES = ["https://www.googleapis.com/auth/drive.file"]


class GoogleDriveLoader(BaseLoader, BaseModel):
"""Load Google Docs from `Google Drive`."""

# Generated from https://developers.google.com/drive/api/guides/api-specific-auth
# limiting to the scopes that are required to read the files
VALID_SCOPES: ClassVar[Tuple[str, ...]] = (
"https://www.googleapis.com/auth/drive.file",
"https://www.googleapis.com/auth/drive.readonly",
"https://www.googleapis.com/auth/drive.meet.readonly",
"https://www.googleapis.com/auth/drive.metadata.readonly",
"https://www.googleapis.com/auth/drive.metadata",
)

service_account_key: Path = Path.home() / ".credentials" / "keys.json"
"""Path to the service account key file."""
credentials_path: Path = Path.home() / ".credentials" / "credentials.json"
Expand Down Expand Up @@ -51,6 +59,9 @@ class GoogleDriveLoader(BaseLoader, BaseModel):
"""Whether to load authorization identities."""
load_extended_metadata: bool = False
"""Whether to load extended metadata."""
scopes: List[str] = ["https://www.googleapis.com/auth/drive.file"]
"""The credential scopes to use for Google Drive API access. Default is
drive.file scope."""

def _get_file_size_from_id(self, id: str) -> str:
"""Fetch the size of the file."""
Expand Down Expand Up @@ -252,6 +263,22 @@ def validate_credentials_path(cls, v: Any, **kwargs: Any) -> Any:
raise ValueError(f"credentials_path {v} does not exist")
return v

@field_validator("scopes")
def validate_scopes(cls, v: List[str]) -> List[str]:
"""Validate that the provided scopes are not empty and
are valid Google Drive API scopes."""
if not v:
raise ValueError("At least one scope must be provided")

invalid_scopes = [scope for scope in v if scope not in cls.VALID_SCOPES]
if invalid_scopes:
raise ValueError(
f"Invalid Google Drive API scope(s): {', '.join(invalid_scopes)}. "
f"Valid scopes are: {', '.join(cls.VALID_SCOPES)}"
)

return v

def _load_credentials(self) -> Any:
"""Load credentials."""
# Adapted from https://developers.google.com/drive/api/v3/quickstart/python
Expand All @@ -273,11 +300,13 @@ def _load_credentials(self) -> Any:
creds = None
if self.service_account_key.exists():
return service_account.Credentials.from_service_account_file(
str(self.service_account_key), scopes=SCOPES
str(self.service_account_key), scopes=self.scopes
)

if self.token_path.exists():
creds = Credentials.from_authorized_user_file(str(self.token_path), SCOPES)
creds = Credentials.from_authorized_user_file(
str(self.token_path), self.scopes
)

if self.credentials:
# use whatever was passed to us
Expand All @@ -289,13 +318,13 @@ def _load_credentials(self) -> Any:
creds.refresh(Request())
elif "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
creds, project = default()
creds = creds.with_scopes(SCOPES)
creds = creds.with_scopes(self.scopes)
# no need to write to file
if creds:
return creds
else:
flow = InstalledAppFlow.from_client_secrets_file(
str(self.credentials_path), SCOPES
str(self.credentials_path), self.scopes
)
creds = flow.run_local_server(port=0)
with open(self.token_path, "w") as token:
Expand Down
Loading

0 comments on commit 6f2b854

Please sign in to comment.