Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
*.manifest
*.spec

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Virtual environments
venv/
ENV/
env/
.venv/
.env

# IDEs
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store

# Claude settings
.claude/*

# Project specific
models/
*.pth
*.pkl

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Note: Do NOT ignore poetry.lock or uv.lock files
1,757 changes: 1,757 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

106 changes: 106 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
[tool.poetry]
name = "active-learning-project"
version = "0.1.0"
description = "Active learning project for machine learning"
authors = ["Your Name <your.email@example.com>"]
readme = "README.md"
packages = [{include = "*.py"}]

[tool.poetry.dependencies]
python = "^3.8"
certifi = ">=2020.6.20"
numpy = ">=1.18.5"
Pillow = ">=8.3.2"
six = ">=1.15.0"
torch = ">=1.5.0"
torchvision = ">=0.6.0"

[tool.poetry.group.dev.dependencies]
pytest = "^7.4.0"
pytest-cov = "^4.1.0"
pytest-mock = "^3.11.0"

[tool.poetry.scripts]
test = "pytest:main"
tests = "pytest:main"

[tool.pytest.ini_options]
minversion = "7.0"
addopts = [
"-ra",
"--strict-markers",
"--cov=.",
"--cov-branch",
"--cov-report=term-missing",
"--cov-report=html:htmlcov",
"--cov-report=xml:coverage.xml",
"--cov-fail-under=80",
"-vv"
]
testpaths = ["tests"]
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
markers = [
"unit: marks tests as unit tests (fast, isolated)",
"integration: marks tests as integration tests (may have external dependencies)",
"slow: marks tests as slow running"
]
filterwarnings = [
"error",
"ignore::UserWarning",
"ignore::DeprecationWarning"
]

[tool.coverage.run]
source = ["."]
omit = [
"*/tests/*",
"*/test_*",
"*/__pycache__/*",
"*/venv/*",
"*/.venv/*",
"*/virtualenv/*",
"*/dist/*",
"*/build/*",
"*.egg-info/*",
"setup.py",
"conftest.py",
"*/.pytest_cache/*",
"*/.coverage*",
"*/htmlcov/*",
"active_learning.py",
"active_learning_basics.py",
"advanced_active_learning.py",
"diversity_sampling.py",
"pytorch_clusters.py",
"uncertainty_sampling.py"
]

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"if self.debug:",
"if settings.DEBUG",
"raise AssertionError",
"raise NotImplementedError",
"if 0:",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
"class .*\\bProtocol\\):",
"@(abc\\.)?abstractmethod"
]
show_missing = true
precision = 2
fail_under = 80

[tool.coverage.html]
directory = "htmlcov"

[tool.coverage.xml]
output = "coverage.xml"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
Empty file added tests/__init__.py
Empty file.
131 changes: 131 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import os
import sys
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch

import pytest
import torch

# Add the project root to the Python path
sys.path.insert(0, str(Path(__file__).parent.parent))


@pytest.fixture
def temp_dir():
"""Create a temporary directory for test files."""
with tempfile.TemporaryDirectory() as tmpdir:
yield Path(tmpdir)


@pytest.fixture
def mock_model():
"""Create a mock PyTorch model for testing."""
model = Mock()
model.eval = Mock(return_value=model)
model.train = Mock(return_value=model)
model.parameters = Mock(return_value=[torch.zeros(1)])
model.state_dict = Mock(return_value={'layer': torch.zeros(1)})
return model


@pytest.fixture
def sample_data():
"""Provide sample data for testing."""
return {
'features': torch.randn(10, 5),
'labels': torch.randint(0, 2, (10,)),
'texts': ['sample text'] * 10
}


@pytest.fixture
def mock_dataset():
"""Create a mock dataset for testing."""
dataset = Mock()
dataset.__len__ = Mock(return_value=100)
dataset.__getitem__ = Mock(return_value=(torch.randn(5), 0))
return dataset


@pytest.fixture
def csv_data(temp_dir):
"""Create temporary CSV files for testing."""
csv_content = "text,label\nSample text 1,0\nSample text 2,1\n"

# Create test CSV files
for subdir in ['training_data', 'validation_data', 'evaluation_data']:
dir_path = temp_dir / subdir
dir_path.mkdir()

for filename in ['related.csv', 'not_related.csv']:
file_path = dir_path / filename
file_path.write_text(csv_content)

# Create unlabeled data
unlabeled_dir = temp_dir / 'unlabeled_data'
unlabeled_dir.mkdir()
unlabeled_content = "text\nUnlabeled sample 1\nUnlabeled sample 2\n"
(unlabeled_dir / 'unlabeled_data.csv').write_text(unlabeled_content)

return temp_dir


@pytest.fixture
def mock_config():
"""Provide a mock configuration object."""
return {
'batch_size': 32,
'learning_rate': 0.001,
'epochs': 10,
'device': 'cpu',
'model_path': 'models/',
'random_seed': 42
}


@pytest.fixture(autouse=True)
def reset_random_seeds():
"""Reset random seeds before each test for reproducibility."""
import random
import numpy as np

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(42)


@pytest.fixture
def capture_stdout():
"""Capture stdout for testing print statements."""
from io import StringIO

captured_output = StringIO()
with patch('sys.stdout', new=captured_output):
yield captured_output


@pytest.fixture
def mock_file_operations():
"""Mock file operations for testing."""
with patch('builtins.open', create=True) as mock_open:
mock_file = Mock()
mock_open.return_value.__enter__ = Mock(return_value=mock_file)
mock_open.return_value.__exit__ = Mock(return_value=None)
yield mock_open, mock_file


@pytest.fixture
def device():
"""Provide the appropriate device for testing."""
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Markers for different test types
def pytest_configure(config):
"""Configure pytest with custom markers."""
config.addinivalue_line("markers", "unit: Unit tests (fast, isolated)")
config.addinivalue_line("markers", "integration: Integration tests")
config.addinivalue_line("markers", "slow: Slow running tests")
Empty file added tests/integration/__init__.py
Empty file.
Loading