Skip to content

Commit

Permalink
Merge pull request #27 from Querent-ai/Test_Cases_fixes
Browse files Browse the repository at this point in the history
Fixes test cases and wrote a github workflow
  • Loading branch information
saraswatpuneet committed Aug 25, 2023
2 parents ea73226 + 9c2b06f commit a935c81
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 18 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Run Pytest on Branches

on:
push:
branches:
- '*'
paths-ignore:
- 'README.md' # Add any paths you want to exclude

jobs:
pytest:
if: ${{ github.ref != 'refs/heads/main' }}
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run Pytest
run: pytest --disable-warnings .
1 change: 1 addition & 0 deletions querent/common/types/collected_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ def __init__(self, file: str, data: bytes, error: str = None) -> None:
self.error = error
self.file = file
if self.file:
file = str(file)
self.extension = file.split(".")[-1]
self.file_id = file.split("/")[-1].split(".")[0]

Expand Down
24 changes: 16 additions & 8 deletions querent/storage/local/local_file_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from querent.storage.storage_factory import StorageFactory
from querent.storage.storage_result import StorageResult


class AsyncDebouncer:
def __init__(self):
self.cache = {}
Expand Down Expand Up @@ -43,10 +44,12 @@ async def get_or_create(self, key, build_a_future):
del self.cache[key]
return result


class DebouncerEntry:
def __init__(self, future):
self.future = future


class DebouncedStorage:
def __init__(self, underlying):
self.underlying = underlying
Expand Down Expand Up @@ -84,7 +87,7 @@ async def bulk_delete(self, paths):
await self.underlying.bulk_delete(paths)

async def get_all(self, path):
key = (path, 0, float('inf'))
key = (path, 0, float("inf"))
cached_result = await self.get_slice_cache(key)
if cached_result is None:
result = await self.underlying.get_all(path)
Expand All @@ -98,6 +101,7 @@ async def file_num_bytes(self, path):
def get_uri(self):
return self.underlying.get_uri()


class LocalFileStorage(Storage):
def __init__(self, uri: Uri, root=None):
self.uri = uri
Expand All @@ -106,6 +110,9 @@ def __init__(self, uri: Uri, root=None):
self.root = root
self.cache_lock = Lock()

async def initialize_lock(self):
self.cache_lock = Lock()

async def full_path(self, relative_path):
await self.ensure_valid_relative_path(relative_path)
return self.root / relative_path
Expand All @@ -129,7 +136,7 @@ async def check_connectivity(self):
f"Failed to create directories at {self.root}: {e}",
)

async def put(self, path: Path, payload: PutPayload)-> StorageResult:
async def put(self, path: Path, payload: PutPayload) -> StorageResult:
full_path = await self.full_path(path)
parent_dir = full_path.parent
try:
Expand All @@ -153,18 +160,18 @@ async def copy_to(self, path, output) -> StorageResult:
await asyncio.to_thread(shutil.copyfileobj, file, output)
return StorageResult.success(None)

async def get_slice(self, path, start, end)-> StorageResult:
async def get_slice(self, path, start, end) -> StorageResult:
full_path = await self.full_path(path)
with open(full_path, "rb") as file:
file.seek(start)
return StorageResult.success(file.read(end - start))

async def get_all(self, path)-> StorageResult:
async def get_all(self, path) -> StorageResult:
full_path = await self.full_path(path)
with open(full_path, "rb") as file:
return StorageResult.success(file.read())

async def delete(self, path)-> StorageResult:
async def delete(self, path) -> StorageResult:
full_path = await self.full_path(path)
try:
full_path.unlink()
Expand All @@ -181,11 +188,11 @@ async def bulk_delete(self, paths):
for path in paths:
await self.delete(path)

async def exists(self, path)-> StorageResult:
async def exists(self, path) -> StorageResult:
full_path = await self.full_path(path)
return StorageResult.success(full_path.exists())

async def file_num_bytes(self, path)-> StorageResult:
async def file_num_bytes(self, path) -> StorageResult:
full_path = await self.full_path(path)
try:
return StorageResult.success(full_path.stat().st_size)
Expand All @@ -196,9 +203,10 @@ async def file_num_bytes(self, path)-> StorageResult:
)

@property
def get_uri(self)-> Uri:
def get_uri(self) -> Uri:
return self.uri


class LocalStorageFactory(StorageFactory):
def backend(self) -> StorageBackend:
return StorageBackend.LocalFile
Expand Down
4 changes: 3 additions & 1 deletion querent/storage/storage_base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from abc import ABC, abstractmethod
from pathlib import Path
from typing import IO
from typing import List

from querent.common.uri import Uri
from querent.storage.payload import PutPayload
from querent.storage.storage_result import StorageResult


class Storage(ABC):
@abstractmethod
async def check_connectivity(self) -> None:
Expand Down Expand Up @@ -36,7 +38,7 @@ async def delete(self, path: Path) -> StorageResult:
pass

@abstractmethod
async def bulk_delete(self, paths: list[Path]) -> None:
async def bulk_delete(self, paths: List[Path]) -> None:
pass

@abstractmethod
Expand Down
2 changes: 1 addition & 1 deletion querent/tools/web_page_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def extract_with_bs4(self, url):
self.store_content(url, content)

# Recursively crawl internal links
self.extract_internal_links_and_crawl(soup)
# self.extract_internal_links_and_crawl(soup)
return content

elif response.status_code == 404:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,6 @@ Pillow==9.5.0
EbookLib==0.18
html2text==2020.1.16
duckduckgo-search==3.8.3
google-generativeai==0.1.0
asyncio==3.4.3
aiofiles
pytest-asyncio
22 changes: 15 additions & 7 deletions tests/test_local_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,28 @@
import tempfile
from pathlib import Path
import pytest
from querent.storage.local.local_file_storage import LocalFileStorage, LocalStorageFactory
from querent.storage.local.local_file_storage import (
LocalFileStorage,
LocalStorageFactory,
)
from querent.common.uri import Uri
import querent.storage.payload as querent_payload
from querent.storage.storage_resolver import StorageResolver


@pytest.fixture
def temp_dir():
temp_dir = tempfile.TemporaryDirectory()
yield temp_dir.name
temp_dir.cleanup()


@pytest.mark.asyncio
def test_local_storage(temp_dir):
uri = Uri("file://" + temp_dir) # Use the temp_dir as the base URI
storage = LocalFileStorage(uri, Path(temp_dir)) # Provide the 'uri' argument only
payload = querent_payload.BytesPayload(b"test")

print(f"Temp dir: {temp_dir}")
print(f"URI: {uri}")

Expand All @@ -32,18 +38,20 @@ def test_local_storage(temp_dir):
print(f"File content: {content.decode('utf-8')}")
assert content == b"test"


@pytest.mark.asyncio
def test_storage_resolver(temp_dir):
uri = Uri("file://" + temp_dir) # Use the temp_dir as the base URI
resolver = StorageResolver()

storage = asyncio.run(resolver.resolve(uri))

payload = querent_payload.BytesPayload(b"ok testing")
asyncio.run(storage.put(Path(temp_dir + "/test.txt"), payload))

file_path = Path(temp_dir, "test.txt")
assert file_path.exists()

with open(file_path, "rb") as file:
content = file.read()
assert content == b"ok testing"
assert content == b"ok testing"

0 comments on commit a935c81

Please sign in to comment.