Skip to content

Commit

Permalink
feat(google-drive): add scope validation for Google Drive API credent…
Browse files Browse the repository at this point in the history
…ials (#667)
  • Loading branch information
madhukar32 authored Jan 6, 2025
1 parent 1e3af5a commit 3455e76
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 7 deletions.
43 changes: 36 additions & 7 deletions libs/community/langchain_google_community/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,26 @@

import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Union
from typing import Any, ClassVar, Dict, List, Optional, Sequence, Tuple, Union

from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document
from pydantic import BaseModel, field_validator, model_validator

SCOPES = ["https://www.googleapis.com/auth/drive.file"]


class GoogleDriveLoader(BaseLoader, BaseModel):
"""Load Google Docs from `Google Drive`."""

# Generated from https://developers.google.com/drive/api/guides/api-specific-auth
# limiting to the scopes that are required to read the files
VALID_SCOPES: ClassVar[Tuple[str, ...]] = (
"https://www.googleapis.com/auth/drive.file",
"https://www.googleapis.com/auth/drive.readonly",
"https://www.googleapis.com/auth/drive.meet.readonly",
"https://www.googleapis.com/auth/drive.metadata.readonly",
"https://www.googleapis.com/auth/drive.metadata",
)

service_account_key: Path = Path.home() / ".credentials" / "keys.json"
"""Path to the service account key file."""
credentials_path: Path = Path.home() / ".credentials" / "credentials.json"
Expand Down Expand Up @@ -51,6 +59,9 @@ class GoogleDriveLoader(BaseLoader, BaseModel):
"""Whether to load authorization identities."""
load_extended_metadata: bool = False
"""Whether to load extended metadata."""
scopes: List[str] = ["https://www.googleapis.com/auth/drive.file"]
"""The credential scopes to use for Google Drive API access. Default is
drive.file scope."""

def _get_file_size_from_id(self, id: str) -> str:
"""Fetch the size of the file."""
Expand Down Expand Up @@ -252,6 +263,22 @@ def validate_credentials_path(cls, v: Any, **kwargs: Any) -> Any:
raise ValueError(f"credentials_path {v} does not exist")
return v

@field_validator("scopes")
def validate_scopes(cls, v: List[str]) -> List[str]:
"""Validate that the provided scopes are not empty and
are valid Google Drive API scopes."""
if not v:
raise ValueError("At least one scope must be provided")

invalid_scopes = [scope for scope in v if scope not in cls.VALID_SCOPES]
if invalid_scopes:
raise ValueError(
f"Invalid Google Drive API scope(s): {', '.join(invalid_scopes)}. "
f"Valid scopes are: {', '.join(cls.VALID_SCOPES)}"
)

return v

def _load_credentials(self) -> Any:
"""Load credentials."""
# Adapted from https://developers.google.com/drive/api/v3/quickstart/python
Expand All @@ -273,11 +300,13 @@ def _load_credentials(self) -> Any:
creds = None
if self.service_account_key.exists():
return service_account.Credentials.from_service_account_file(
str(self.service_account_key), scopes=SCOPES
str(self.service_account_key), scopes=self.scopes
)

if self.token_path.exists():
creds = Credentials.from_authorized_user_file(str(self.token_path), SCOPES)
creds = Credentials.from_authorized_user_file(
str(self.token_path), self.scopes
)

if self.credentials:
# use whatever was passed to us
Expand All @@ -289,13 +318,13 @@ def _load_credentials(self) -> Any:
creds.refresh(Request())
elif "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
creds, project = default()
creds = creds.with_scopes(SCOPES)
creds = creds.with_scopes(self.scopes)
# no need to write to file
if creds:
return creds
else:
flow = InstalledAppFlow.from_client_secrets_file(
str(self.credentials_path), SCOPES
str(self.credentials_path), self.scopes
)
creds = flow.run_local_server(port=0)
with open(self.token_path, "w") as token:
Expand Down
39 changes: 39 additions & 0 deletions libs/community/tests/unit_tests/test_drive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import pytest

from langchain_google_community.drive import GoogleDriveLoader


def test_drive_default_scope() -> None:
"""Test that default scope is set correctly."""
loader = GoogleDriveLoader(folder_id="dummy_folder")
assert loader.scopes == ["https://www.googleapis.com/auth/drive.file"]


def test_drive_custom_scope() -> None:
"""Test setting custom scope."""
custom_scopes = ["https://www.googleapis.com/auth/drive.readonly"]
loader = GoogleDriveLoader(folder_id="dummy_folder", scopes=custom_scopes)
assert loader.scopes == custom_scopes


def test_drive_multiple_scopes() -> None:
"""Test setting multiple valid scopes."""
custom_scopes = [
"https://www.googleapis.com/auth/drive.readonly",
"https://www.googleapis.com/auth/drive.metadata.readonly",
]
loader = GoogleDriveLoader(folder_id="dummy_folder", scopes=custom_scopes)
assert loader.scopes == custom_scopes


def test_drive_empty_scope_list() -> None:
"""Test that empty scope list raises error."""
with pytest.raises(ValueError, match="At least one scope must be provided"):
GoogleDriveLoader(folder_id="dummy_folder", scopes=[])


def test_drive_invalid_scope() -> None:
"""Test that invalid scope raises error."""
invalid_scopes = ["https://www.googleapis.com/auth/drive.invalid"]
with pytest.raises(ValueError, match="Invalid Google Drive API scope"):
GoogleDriveLoader(folder_id="dummy_folder", scopes=invalid_scopes)

0 comments on commit 3455e76

Please sign in to comment.