diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 11dffc7..784fbb3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: strategy: matrix: - python-version: ['3.9', '3.10', '3.11', '3.12'] + python-version: ['3.10', '3.11', '3.12'] steps: - name: Checkout repository diff --git a/.github/workflows/link-and-size-check.yml b/.github/workflows/link-and-size-check.yml index 1763422..75e3faa 100644 --- a/.github/workflows/link-and-size-check.yml +++ b/.github/workflows/link-and-size-check.yml @@ -39,7 +39,7 @@ jobs: - name: Check all Markdown links (internal & external) uses: lycheeverse/lychee-action@v2.0.2 with: - args: --verbose --no-progress --exclude-mail --require-https --timeout 15 --max-concurrency 10 '**/*.md' 'site/**/*.html' + args: --verbose --no-progress --exclude-mail --timeout 15 --max-concurrency 10 --max-retries 3 --exclude 'https://github.com/.*/edit/.*' --exclude 'https://github.com/.*/raw/.*' --exclude 'https://fonts.gstatic.com' --exclude 'http://localhost:.*' --exclude '^file://.*' --exclude 'https://www.youtube.com/watch\?v=example.*' --exclude 'https://plausible.io/docs/experiments' '**/*.md' 'site/**/*.html' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 0000000..44d1c51 --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,23 @@ +# Changelog + +All notable changes to the Annie documentation website will be documented in this file. + +## [Unreleased] + +### Changed + +- Updated urllib3 from 2.5.0 to 2.6.0 to address security vulnerabilities (CVE-2025-66471, CVE-2025-66418) +- Added brotli>=1.2.0 dependency for enhanced security in HTTP content decompression +- Ensures compatibility with urllib3 2.6.0's improved handling of decompression bombs and chained encodings +- Dropped Python 3.9 support from CI testing (Python 3.9 reached end of life in October 2025, and newer dependencies require Python 3.10+) + +### Security + +- Fixed potential decompression bomb vulnerabilities through urllib3 2.6.0 update +- Fixed potential DoS attack via unlimited chained encodings through urllib3 2.6.0 update +- Added brotli 1.2.0+ for security fixes in brotli decompression + +### Notes + +- No code changes were required as the codebase does not use the deprecated urllib3 APIs (HTTPResponse.getheaders(), HTTPResponse.getheader()) +- The repository only uses urllib3 indirectly through the requests library diff --git a/mkdocs.yml b/mkdocs.yml index c1ca9e6..ebeab01 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -78,6 +78,7 @@ nav: - Filtered Search: filtering.md - Examples: examples.md - Contributing: contributing.md + - Changelog: changelog.md markdown_extensions: - toc: diff --git a/requirements.in b/requirements.in index cb37a11..f36147c 100644 --- a/requirements.in +++ b/requirements.in @@ -1,3 +1,4 @@ mkdocs>=1.5.0 mkdocs-material>=9.0.0 PyYAML>=6.0 +brotli>=1.2.0 # Security fix for urllib3 2.6.0 diff --git a/requirements.txt b/requirements.txt index a85cd1a..78611eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,15 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile requirements.in +# pip-compile --output-file=requirements.txt.new requirements.in # babel==2.17.0 # via mkdocs-material backrefs==6.1 # via mkdocs-material +brotli==1.2.0 + # via -r requirements.in certifi==2025.11.12 # via requests charset-normalizer==3.4.4 @@ -53,7 +55,7 @@ paginate==0.5.7 # via mkdocs-material pathspec==0.12.1 # via mkdocs -platformdirs==4.5.0 +platformdirs==4.5.1 # via mkdocs-get-deps pygments==2.19.2 # via mkdocs-material @@ -74,7 +76,7 @@ requests==2.32.5 # via mkdocs-material six==1.17.0 # via python-dateutil -urllib3==2.5.0 +urllib3==2.6.0 # via requests watchdog==6.0.0 # via mkdocs diff --git a/site/404.html b/site/404.html deleted file mode 100644 index 76a35eb..0000000 --- a/site/404.html +++ /dev/null @@ -1,1628 +0,0 @@ - - - -
- - - - - - - - - - - - - - - - - - - -
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ThreadSafeAnnIndex and PyHnswIndex for Concurrent Access¶Annie exposes a thread-safe version of its ANN index (AnnIndex) for use in Python. This is useful when you want to perform parallel search or update operations from Python threads. Additionally, the PyHnswIndex class provides a Python interface to the HNSW index, which now includes enhanced data handling capabilities.
search, search_batch)add, remove)RwLock and exposed via PyO3PyHnswIndex supports mapping internal IDs to user IDs and handling vector data efficientlyfrom annie import ThreadSafeAnnIndex, Distance
-import numpy as np
-import threading
-
-# Create index
-index = ThreadSafeAnnIndex(128, Distance.Cosine)
-
-# Add vectors
-data = np.random.rand(1000, 128).astype('float32')
-ids = np.arange(1000, dtype=np.int64)
-index.add(data, ids)
-
-# Run concurrent searches
-def run_search():
- query = np.random.rand(128).astype('float32')
- ids, distances = index.search(query, 10)
- print(ids)
-
-threads = [threading.Thread(target=run_search) for _ in range(4)]
-[t.start() for t in threads]
-[t.join() for t in threads]
-
-# Using PyHnswIndex
-from rust_annie import PyHnswIndex
-
-# Create HNSW index
-hnsw_index = PyHnswIndex(dims=128)
-
-# Add vectors to HNSW index
-hnsw_index.add(data, ids)
-
-# Search in HNSW index
-query = np.random.rand(128).astype('float32')
-user_ids, distances = hnsw_index.search(query, 10)
-print(user_ids)
-The CI/CD pipeline for PyPI publishing has been updated to include parallel jobs for building wheels and source distributions across multiple operating systems and Python versions. This involves concurrency considerations that should be documented for users who are integrating or maintaining the pipeline.
-The pipeline is triggered on pushes and pull requests to the main branch, as well as manually via workflow_dispatch. It includes the following jobs:
ubuntu-latest and includes steps for checking out the code, setting up Rust, caching dependencies, running tests, and checking code formatting.ubuntu-latest, windows-latest, and macos-latest for Python versions 3.8, 3.9, 3.10, and 3.11. This job builds the wheels using maturin and uploads them as artifacts.ubuntu-latest and builds the source distribution using maturin, uploading it as an artifact.workflow_dispatch with the appropriate input.workflow_dispatch with the appropriate input.build-wheels job utilizes a matrix strategy to run builds concurrently across different operating systems and Python versions. This reduces the overall build time but requires careful management of dependencies and environment setup to ensure consistency across platforms.By understanding these concurrency considerations, users can effectively manage and extend the CI/CD pipeline to suit their specific needs.
-The AnnIndex class provides efficient brute-force nearest neighbor search with support for multiple distance metrics.
AnnIndex(dim: int, metric: Distance)¶Creates a new brute-force index.
-dim (int): Vector dimensionmetric (Distance): Distance metric (EUCLIDEAN, COSINE, MANHATTAN, CHEBYSHEV)new_minkowski(dim: int, p: float)¶Creates a Minkowski distance index.
-dim (int): Vector dimensionp (float): Minkowski exponent (p > 0)add(data: ndarray, ids: ndarray)¶Add vectors to the index.
-data: N×dim array of float32 vectorsids: N-dimensional array of int64 IDssearch(query: ndarray, k: int) -> Tuple[ndarray, ndarray]¶Search for k nearest neighbors.
-query: dim-dimensional query vectork: Number of neighbors to returnsearch_batch(queries: ndarray, k: int) -> Tuple[ndarray, ndarray]¶Batch search for multiple queries.
-queries: M×dim array of queriesk: Number of neighbors per querysearch_filter_py(query: ndarray, k: int, filter_fn: Callable[[int], bool]) -> Tuple[ndarray, ndarray]¶Search with ID filtering.
-query: dim-dimensional query vectork: Maximum neighbors to returnfilter_fn: Function that returns True for allowed IDssave(path: str)¶Save index to disk.
-static load(path: str) -> AnnIndex¶Load index from disk.
-import numpy as np
-from rust_annie import AnnIndex, Distance
-
-# Create index
-index = AnnIndex(128, Distance.EUCLIDEAN)
-
-# Add data
-data = np.random.rand(1000, 128).astype(np.float32)
-ids = np.arange(1000, dtype=np.int64)
-index.add(data, ids)
-
-# Search
-query = np.random.rand(128).astype(np.float32)
-neighbor_ids, distances = index.search(query, k=5)
-The PyHnswIndex class provides approximate nearest neighbor search using Hierarchical Navigable Small World (HNSW) graphs.
PyHnswIndex(dims: int)¶Creates a new HNSW index.
-dims (int): Vector dimensionadd(data: ndarray, ids: ndarray)¶Add vectors to the index.
-data: N×dims array of float32 vectorsids: N-dimensional array of int64 IDssearch(vector: ndarray, k: int) -> Tuple[ndarray, ndarray]¶Search for k approximate nearest neighbors.
-vector: dims-dimensional query vectork: Number of neighbors to returnsave(path: str)¶Save index to disk.
-static load(path: str) -> PyHnswIndex¶Load index from disk (currently not implemented)
-import numpy as np
-from rust_annie import PyHnswIndex
-
-# Create index
-index = PyHnswIndex(dims=128)
-
-# Add data
-data = np.random.rand(10000, 128).astype(np.float32)
-ids = np.arange(10000, dtype=np.int64)
-index.add(data, ids)
-
-# Search
-query = np.random.rand(128).astype(np.float32)
-neighbor_ids, _ = index.search(query, k=10)
-The ThreadSafeAnnIndex class provides a thread-safe wrapper around AnnIndex for concurrent access.
ThreadSafeAnnIndex(dim: int, metric: Distance)¶Creates a new thread-safe index.
-dim (int): Vector dimensionmetric (Distance): Distance metricadd(data: ndarray, ids: ndarray)¶Thread-safe vector addition.
-remove(ids: List[int])¶Thread-safe removal by IDs.
-search(query: ndarray, k: int) -> Tuple[ndarray, ndarray]¶Thread-safe single query search.
-search_batch(queries: ndarray, k: int) -> Tuple[ndarray, ndarray]¶Thread-safe batch search.
-save(path: str)¶Thread-safe save.
-static load(path: str) -> ThreadSafeAnnIndex¶Thread-safe load.
-import numpy as np
-from rust_annie import ThreadSafeAnnIndex, Distance
-from concurrent.futures import ThreadPoolExecutor
-
-# Create index
-index = ThreadSafeAnnIndex(128, Distance.COSINE)
-
-# Add data from multiple threads
-with ThreadPoolExecutor() as executor:
- for i in range(4):
- data = np.random.rand(250, 128).astype(np.float32)
- ids = np.arange(i*250, (i+1)*250, dtype=np.int64)
- executor.submit(index.add, data, ids)
-
-# Concurrent searches
-with ThreadPoolExecutor() as executor:
- futures = []
- for _ in range(10):
- query = np.random.rand(128).astype(np.float32)
- futures.append(executor.submit(index.search, query, k=5))
-
- for future in futures:
- ids, dists = future.result()
-import numpy as np
-from rust_annie import AnnIndex, Distance
-
-# Create index
-index = AnnIndex(128, Distance.EUCLIDEAN)
-
-# Generate and add data
-data = np.random.rand(1000, 128).astype(np.float32)
-ids = np.arange(1000, dtype=np.int64)
-index.add(data, ids)
-
-# Single query
-query = np.random.rand(128).astype(np.float32)
-neighbor_ids, distances = index.search(query, k=5)
-
-# Batch queries
-queries = np.random.rand(10, 128).astype(np.float32)
-batch_ids, batch_dists = index.search_batch(queries, k=3)
-# Create index with sample data
-index = AnnIndex(3, Distance.EUCLIDEAN)
-data = np.array([
- [1.0, 2.0, 3.0],
- [4.0, 5.0, 6.0],
- [7.0, 8.0, 9.0]
-], dtype=np.float32)
-ids = np.array([10, 20, 30], dtype=np.int64)
-index.add(data, ids)
-
-# Define filter function
-def even_ids(id: int) -> bool:
- return id % 2 == 0
-
-# Filtered search
-query = np.array([1.0, 2.0, 3.0], dtype=np.float32)
-filtered_ids, filtered_dists = index.search_filter_py(query, k=3, filter_fn=even_ids)
-# Only IDs 10 and 30 will be returned (20 is odd)
-from rust_annie import PyHnswIndex
-
-# Create HNSW index
-index = PyHnswIndex(dims=128)
-
-# Add large dataset
-data = np.random.rand(100000, 128).astype(np.float32)
-ids = np.arange(100000, dtype=np.int64)
-index.add(data, ids)
-
-# Fast approximate search
-query = np.random.rand(128).astype(np.float32)
-neighbor_ids, _ = index.search(query, k=10)
-# Create and save index
-index = AnnIndex(64, Distance.COSINE)
-data = np.random.rand(500, 64).astype(np.float32)
-ids = np.arange(500, dtype=np.int64)
-index.add(data, ids)
-index.save("my_index")
-
-# Load index
-loaded_index = AnnIndex.load("my_index")
-from rust_annie import ThreadSafeAnnIndex, Distance
-from concurrent.futures import ThreadPoolExecutor
-
-index = ThreadSafeAnnIndex(256, Distance.MANHATTAN)
-
-# Concurrent writes
-with ThreadPoolExecutor() as executor:
- for i in range(10):
- data = np.random.rand(100, 256).astype(np.float32)
- ids = np.arange(i*100, (i+1)*100, dtype=np.int64)
- executor.submit(index.add, data, ids)
-
-# Concurrent reads
-with ThreadPoolExecutor() as executor:
- futures = []
- for _ in range(100):
- query = np.random.rand(256).astype(np.float32)
- futures.append(executor.submit(index.search, query, k=3))
-
- results = [f.result() for f in futures]
-# Create index with custom distance
-index = AnnIndex.new_minkowski(dim=64, p=2.5)
-data = np.random.rand(200, 64).astype(np.float32)
-ids = np.arange(200, dtype=np.int64)
-index.add(data, ids)
-
-# Search with Minkowski distance
-query = np.random.rand(64).astype(np.float32)
-ids, dists = index.search(query, k=5)
-Filters allow you to narrow down search results dynamically based on: -- Metadata (e.g., tags, IDs, labels) -- Numeric thresholds (e.g., only items above/below a value) -- Custom user-defined logic
-This improves both precision and flexibility of search.
-from rust_annie import AnnIndex
-import numpy as np
-
-# 1. Create an index with vector dimension 128
-index = AnnIndex(dimension=128)
-
-# 2. Add data with metadata
-vector0 = np.random.rand(128).astype(np.float32)
-vector1 = np.random.rand(128).astype(np.float32)
-
-index.add_item(0, vector0, metadata={"category": "A"})
-index.add_item(1, vector1, metadata={"category": "B"})
-
-# 3. Define a filter function (e.g., only include items where category == "A")
-def category_filter(metadata):
- return metadata.get("category") == "A"
-
-# 4. Perform search with the filter applied
-query_vector = np.random.rand(128).astype(np.float32)
-results = index.search(query_vector, k=5, filter=category_filter)
-
-print("Filtered search results:", results)
-This library supports applying filters to narrow down ANN search results dynamically.
-| Filter type | -Example | -
|---|---|
| Equals | -Filter.equals("category", "A") |
-
| Greater than | -Filter.gt("score", 0.8) |
-
| Less than | -Filter.lt("price", 100) |
-
| Custom predicate | -Filter.custom(lambda metadata: ...) |
-
Filters work on the metadata you provide when adding items to the index.
-The BruteForceIndex now uses total_cmp for sorting, which provides NaN-resistant sorting behavior. This change ensures that any NaN values in the data are handled consistently, preventing potential issues with partial comparisons.
The library now includes a benchmarking function to evaluate the performance of different index types, specifically PyHnswIndex and AnnIndex. This function measures the average, maximum, and minimum query times, providing insights into the efficiency of each index type.
import numpy as np
-import time
-from rust_annie import PyHnswIndex, AnnIndex
-
-def benchmark(index_cls, name, dim=128, n=10_000, q=100, k=10):
- print(f"\nBenchmarking {name} with {n} vectors (dim={dim})...")
-
- # Data
- data = np.random.rand(n, dim).astype(np.float32)
- ids = np.arange(n, dtype=np.int64)
- queries = np.random.rand(q, dim).astype(np.float32)
-
- # Index setup
- index = index_cls(dims=dim)
- index.add(data, ids)
-
- # Warm-up + Timing
- times = []
- for i in range(q):
- start = time.perf_counter()
- _ = index.search(queries[i], k=k)
- times.append((time.perf_counter() - start) * 1000)
-
- print(f" Avg query time: {np.mean(times):.3f} ms")
- print(f" Max query time: {np.max(times):.3f} ms")
- print(f" Min query time: {np.min(times):.3f} ms")
-
-if __name__ == "__main__":
- benchmark(PyHnswIndex, "HNSW")
- benchmark(AnnIndex, "Brute-Force")
-src/filters.rs in the Rust code.Blazingly fast Approximate Nearest Neighbors in Rust
-pip install rust_annie
-import numpy as np
-from rust_annie import AnnIndex, Distance
-
-# Create index
-index = AnnIndex(128, Distance.EUCLIDEAN)
-
-# Add data
-data = np.random.rand(1000, 128).astype(np.float32)
-ids = np.arange(1000, dtype=np.int64)
-index.add(data, ids)
-
-# Search
-query = np.random.rand(128).astype(np.float32)
-neighbor_ids, distances = index.search(query, k=5)
-
-
-
-
- Thank you for your interest in contributing to Annie's documentation! This guide will help you get started with contributing to our documentation site.
-git clone https://github.com/YOUR-USERNAME/Annie-Docs.git
-cd Annie-Docs
-./build-docs.sh
-source venv/bin/activate
-mkdocs serve
-http://localhost:8000 to see your changes live.docs/
-├── index.md # Homepage
-├── api/ # API Reference
-│ ├── ann_index.md # AnnIndex class
-│ ├── hnsw_index.md # PyHnswIndex class
-│ └── threadsafe_index.md
-├── examples.md # Usage examples
-├── concurrency.md # Thread-safety features
-└── filtering.md # Filtered search
-# Create virtual environment
-python3 -m venv venv
-source venv/bin/activate
-
-# Install dependencies
-pip install -r requirements.txt
-
-# Build site
-mkdocs build
-
-# Serve locally with auto-reload
-mkdocs serve --dev-addr=0.0.0.0:8000
-# Build documentation
-./build-docs.sh
-
-# Deploy (build + prepare for hosting)
-./deploy.sh
-git checkout -b feature/improve-examples
-Make Your Changes
-Edit files in the docs/ directory
Follow our writing guidelines
-Test Locally
-mkdocs serve
-Visit http://localhost:8000 to review changes
mkdocs build
-# for main headings, ## for sections, ### for subsections# Good
-import numpy as np
-**bold** for emphasis, *italic* for secondary emphasisinline code and class names like AnnIndex# Good example
-import numpy as np
-from rust_annie import AnnIndex, Distance
-
-# Create index for 128-dimensional vectors
-index = AnnIndex(128, Distance.EUCLIDEAN)
-
-# Add sample data
-data = np.random.rand(1000, 128).astype(np.float32)
-ids = np.arange(1000, dtype=np.int64)
-index.add(data, ids)
-
-# Search for nearest neighbors
-query = np.random.rand(128).astype(np.float32)
-neighbor_ids, distances = index.search(query, k=5)
-print(f"Found {len(neighbor_ids)} neighbors")
-mkdocs build # Check for build errors
-mkdocs serve # Test locally
-Check Links
-Ensure all internal links work
-Verify external links are accessible
-Review Content
-git add .
-git commit -m "docs: improve examples in filtering.md"
-git push origin feature/improve-examples
-## Description
-
-Brief description of changes made.
-
-## Type of Change
-
-- [ ] Bug fix (typo, broken link, etc.)
-- [ ] Content update (new examples, clarifications)
-- [ ] New documentation (new features)
-- [ ] Structure improvement
-
-## Testing
-
-- [ ] Built successfully with `mkdocs build`
-- [ ] Tested locally with `mkdocs serve`
-- [ ] Checked all links work
-- [ ] Verified code examples run
-
-## Screenshots (if applicable)
-
-Add screenshots of significant visual changes.
-main branchWhen creating issues, use these labels:
-documentation - General documentation issuesbug - Errors in docs (typos, broken links)enhancement - Improvements to existing contentnew-content - Requests for new documentationgood-first-issue - Good for newcomersThank you for helping make Annie's documentation better!
- - - - - - - - - - - - - -
-
-
-
- --Interactive Examples:
-You can now run selected code blocks directly in your browser! Click the Try it button above a code block to execute it. Use sliders to adjust parameters like vector dimension or dataset size. Powered by Pyodide (Python in the browser). Learn more.
-
import numpy as np
-from rust_annie import AnnIndex, Distance
-
-dim = {{dim|128}}
-size = {{size|1000}}
-
-# Create index
-index = AnnIndex(dim, Distance.EUCLIDEAN)
-
-# Generate and add data
-data = np.random.rand(size, dim).astype(np.float32)
-ids = np.arange(size, dtype=np.int64)
-index.add(data, ids)
-
-# Single query
-query = np.random.rand(dim).astype(np.float32)
-neighbor_ids, distances = index.search(query, k=5)
-print(neighbor_ids, distances)
-
-# Batch queries
-queries = np.random.rand(10, dim).astype(np.float32)
-batch_ids, batch_dists = index.search_batch(queries, k=3)
-print(batch_ids.shape, batch_dists.shape)
-# Create index with sample data
-index = AnnIndex(3, Distance.EUCLIDEAN)
-data = np.array([
- [1.0, 2.0, 3.0],
- [4.0, 5.0, 6.0],
- [7.0, 8.0, 9.0]
-
-## Filtered Search
-<div class="interactive-block" data-interactive>
-```python
-import numpy as np
-from rust_annie import AnnIndex, Distance
-
-# Create index with sample data
-index = AnnIndex(3, Distance.EUCLIDEAN)
-data = np.array([
- [1.0, 2.0, 3.0],
- [4.0, 5.0, 6.0],
- [7.0, 8.0, 9.0]
-], dtype=np.float32)
-ids = np.array([10, 20, 30], dtype=np.int64)
-index.add(data, ids)
-
-# Define filter function
-def even_ids(id: int) -> bool:
- return id % 2 == 0
-
-# Filtered search
-query = np.array([1.0, 2.0, 3.0], dtype=np.float32)
-filtered_ids, filtered_dists = index.search_filter_py(query, k=3, filter_fn=even_ids)
-print(filtered_ids)
-def even_ids(id: int) -> bool: - return id % 2 == 0
-query = np.array([1.0, 2.0, 3.0], dtype=np.float32) -filtered_ids, filtered_dists = index.search_filter_py(query, k=3, filter_fn=even_ids)
-## HNSW Index
-```python
-
-## HNSW Index
-<div class="interactive-block" data-interactive>
-<div class="interactive-controls">
-<label>Dimension: <input type="range" min="8" max="256" value="128" class="slider" data-var="dim" /></label>
-<span class="slider-value" data-var="dim">128</span>
-<label>Dataset size: <input type="range" min="1000" max="200000" value="100000" class="slider" data-var="size" /></label>
-<span class="slider-value" data-var="size">100000</span>
-</div>
-```python
-import numpy as np
-from rust_annie import PyHnswIndex
-
-dim = {{dim|128}}
-size = {{size|100000}}
-
-# Create HNSW index
-index = PyHnswIndex(dims=dim)
-
-# Add large dataset
-data = np.random.rand(size, dim).astype(np.float32)
-ids = np.arange(size, dtype=np.int64)
-index.add(data, ids)
-
-# Fast approximate search
-query = np.random.rand(dim).astype(np.float32)
-neighbor_ids, _ = index.search(query, k=10)
-print(neighbor_ids)
-index = PyHnswIndex(dims=128)
-data = np.random.rand(100000, 128).astype(np.float32) -ids = np.arange(100000, dtype=np.int64) -index.add(data, ids)
-query = np.random.rand(128).astype(np.float32) -neighbor_ids, _ = index.search(query, k=10) -
## Saving and Loading
-```python
-# Create and save index
-index = AnnIndex(64, Distance.COSINE)
-data = np.random.rand(500, 64).astype(np.float32)
-ids = np.arange(500, dtype=np.int64)
-index.add(data, ids)
-index.save("my_index")
-
-# Load index
-loaded_index = AnnIndex.load("my_index")
-from rust_annie import ThreadSafeAnnIndex, Distance
-from concurrent.futures import ThreadPoolExecutor
-
-index = ThreadSafeAnnIndex(256, Distance.MANHATTAN)
-
-# Concurrent writes
-with ThreadPoolExecutor() as executor:
- for i in range(10):
- data = np.random.rand(100, 256).astype(np.float32)
- ids = np.arange(i*100, (i+1)*100, dtype=np.int64)
- executor.submit(index.add, data, ids)
-
-# Concurrent reads
-with ThreadPoolExecutor() as executor:
- futures = []
- for _ in range(100):
- query = np.random.rand(256).astype(np.float32)
- futures.append(executor.submit(index.search, query, k=3))
-
- results = [f.result() for f in futures]
-# Create index with custom distance
-index = AnnIndex.new_minkowski(dim=64, p=2.5)
-data = np.random.rand(200, 64).astype(np.float32)
-ids = np.arange(200, dtype=np.int64)
-index.add(data, ids)
-
-# Search with Minkowski distance
-query = np.random.rand(64).astype(np.float32)
-ids, dists = index.search(query, k=5)
-
A lightning-fast, Rust-powered Approximate Nearest Neighbor library for Python with multiple backends, thread-safety, and GPU acceleration.
-# Stable release from PyPI:
-pip install rust-annie
-
-# Install with GPU support (requires CUDA):
-pip install rust-annie[gpu]
-
-# Or install from source:
-git clone https://github.com/Programmers-Paradise/Annie.git
-cd Annie
-pip install maturin
-maturin develop --release
-import numpy as np
-from rust_annie import AnnIndex, Distance
-
-# Create index
-index = AnnIndex(128, Distance.EUCLIDEAN)
-
-# Add data
-data = np.random.rand(1000, 128).astype(np.float32)
-ids = np.arange(1000, dtype=np.int64)
-index.add(data, ids)
-
-# Search
-query = np.random.rand(128).astype(np.float32)
-neighbor_ids, distances = index.search(query, k=5)
-from rust_annie import PyHnswIndex
-
-index = PyHnswIndex(dims=128)
-data = np.random.rand(10000, 128).astype(np.float32)
-ids = np.arange(10000, dtype=np.int64)
-index.add(data, ids)
-
-# Search
-query = np.random.rand(128).astype(np.float32)
-neighbor_ids, _ = index.search(query, k=10)
-from rust_annie import AnnIndex, Distance
-import numpy as np
-
-# Create index
-idx = AnnIndex(4, Distance.COSINE)
-
-# Add data
-data = np.random.rand(50, 4).astype(np.float32)
-ids = np.arange(50, dtype=np.int64)
-idx.add(data, ids)
-
-# Search
-labels, dists = idx.search(data[10], k=3)
-print(labels, dists)
-from rust_annie import AnnIndex, Distance
-import numpy as np
-
-# Create index
-idx = AnnIndex(16, Distance.EUCLIDEAN)
-
-# Add data
-data = np.random.rand(1000, 16).astype(np.float32)
-ids = np.arange(1000, dtype=np.int64)
-idx.add(data, ids)
-
-# Batch search
-queries = data[:32]
-labels_batch, dists_batch = idx.search_batch(queries, k=10)
-print(labels_batch.shape) # (32, 10)
-from rust_annie import ThreadSafeAnnIndex, Distance
-import numpy as np
-from concurrent.futures import ThreadPoolExecutor
-
-# Create thread-safe index
-idx = ThreadSafeAnnIndex(32, Distance.EUCLIDEAN)
-
-# Add data
-data = np.random.rand(500, 32).astype(np.float32)
-ids = np.arange(500, dtype=np.int64)
-idx.add(data, ids)
-
-# Concurrent searches
-def task(q):
- return idx.search(q, k=5)
-
-queries = np.random.rand(100, 32).astype(np.float32)
-with ThreadPoolExecutor(max_workers=8) as executor:
- futures = [executor.submit(task, q) for q in queries]
- for f in futures:
- print(f.result())
-from rust_annie import AnnIndex, Distance
-import numpy as np
-
-# Create index
-index = AnnIndex(3, Distance.EUCLIDEAN)
-data = np.array([
- [1.0, 2.0, 3.0],
- [4.0, 5.0, 6.0],
- [7.0, 8.0, 9.0]
-], dtype=np.float32)
-ids = np.array([10, 20, 30], dtype=np.int64)
-index.add(data, ids)
-
-# Filter function
-def even_ids(id: int) -> bool:
- return id % 2 == 0
-
-# Filtered search
-query = np.array([1.0, 2.0, 3.0], dtype=np.float32)
-filtered_ids, filtered_dists = index.search_filter_py(
- query,
- k=3,
- filter_fn=even_ids
-)
-print(filtered_ids) # [10, 30] (20 is filtered out)
-This section demonstrates a complete, beginner-friendly example of how to build and query a brute-force AnnIndex using Python.
Measured on a 6-core CPU:
-That’s a \~4× speedup vs. NumPy!
-| Operation | -Dataset Size | -Time (ms) | -Speedup vs Python | -
|---|---|---|---|
| Single Query (Brute) | -10,000 × 64 | -0.7 | -4× | -
| Batch Query (64) | -10,000 × 64 | -0.23 | -12× | -
| HNSW Query | -100,000 × 128 | -0.05 | -56× | -
You’ll find:
-Create a brute-force k-NN index.
-Enum: Distance.EUCLIDEAN, Distance.COSINE, Distance.MANHATTAN
Same API as AnnIndex, safe for concurrent use.
| Class | -Description | -
|---|---|
| AnnIndex | -Brute-force exact search | -
| PyHnswIndex | -Approximate HNSW index | -
| ThreadSafeAnnIndex | -Thread-safe wrapper for AnnIndex | -
| Distance | -Distance metrics (Euclidean, Cosine, etc) | -
| Method | -Description | -
|---|---|
| add(data, ids) | -Add vectors to index | -
| search(query, k) | -Single query search | -
| search_batch(queries, k) | -Batch query search | -
| search_filter_py(query, k, filter_fn) | -Filtered search | -
| save(path) | -Save index to disk | -
| load(path) | -Load index from disk | -
CI runs on GitHub Actions, building wheels on Linux, Windows, macOS, plus:
-benchmark.py & batch_benchmark.py & compare_results.py# Run tests
-cargo test
-pytest tests/
-
-# Run benchmarks
-python scripts/benchmark.py
-python scripts/batch_benchmark.py
-
-# Generate documentation
-mkdocs build
-CI pipeline includes: - - Cross-platform builds (Linux, Windows, macOS) - - Unit tests and integration tests - - Performance benchmarking - - Documentation generation
-Benchmarks are tracked over time using:
-Enable CUDA support for brute-force calculations: -
# Install with GPU support
-pip install rust-annie[gpu]
-
-# Or build from source with GPU features
-maturin develop --release --features gpu
-Supported operations: - - Batch L2 distance calculations - - High-dimensional similarity search
-Requirements: - - NVIDIA GPU with CUDA support - - CUDA Toolkit installed
-Contributions are welcome! Please:
-See the main CONTRIBUTING guide for details.
-This project is licensed under the MIT License. See LICENSE for details.
- - - - - - - - - - - - - - -
-
-
-
- ## ANN Search Filtering
-
-This document explains how to use the filtering capabilities to improve Approximate Nearest Neighbor (ANN) search.
-
-### Why Filtering?
-
-Filters allow you to narrow down search results dynamically based on:
-- Metadata (e.g., tags, IDs, labels)
-- Numeric thresholds (e.g., only items above/below a value)
-- Custom user-defined logic
-
-This improves both precision and flexibility of search.
-
-#### Example: Python API
-
-```python
-from rust_annie import AnnIndex
-import numpy as np
-
-# 1. Create an index with vector dimension 128
-index = AnnIndex(dimension=128)
-
-# 2. Add data with metadata
-vector0 = np.random.rand(128).astype(np.float32)
-vector1 = np.random.rand(128).astype(np.float32)
-
-index.add_item(0, vector0, metadata={"category": "A"})
-index.add_item(1, vector1, metadata={"category": "B"})
-
-# 3. Define a filter function (e.g., only include items where category == "A")
-def category_filter(metadata):
- return metadata.get("category") == "A"
-
-# 4. Perform search with the filter applied
-query_vector = np.random.rand(128).astype(np.float32)
-results = index.search(query_vector, k=5, filter=category_filter)
-
-print("Filtered search results:", results)
-This library supports applying filters to narrow down ANN search results dynamically.
-| Filter type | -Example | -
|---|---|
| Equals | -Filter.equals("category", "A") |
-
| Greater than | -Filter.gt("score", 0.8) |
-
| Less than | -Filter.lt("price", 100) |
-
| Custom predicate | -Filter.custom(lambda metadata: ...) |
-
Filters work on the metadata you provide when adding items to the index.
-The library now supports filtered search using custom Python callbacks, allowing for more complex filtering logic directly in Python.
-from rust_annie import AnnIndex, Distance
-import numpy as np
-
-# Create index
-index = AnnIndex(3, Distance.EUCLIDEAN)
-data = np.array([
- [1.0, 2.0, 3.0],
- [4.0, 5.0, 6.0],
- [7.0, 8.0, 9.0]
-], dtype=np.float32)
-ids = np.array([10, 20, 30], dtype=np.int64)
-index.add(data, ids)
-
-# Filter function
-def even_ids(id: int) -> bool:
- return id % 2 == 0
-
-# Filtered search
-query = np.array([1.0, 2.0, 3.0], dtype=np.float32)
-filtered_ids, filtered_dists = index.search_filter_py(
- query,
- k=3,
- filter_fn=even_ids
-)
-print(filtered_ids) # [10, 30] (20 is filtered out)
-The BruteForceIndex now uses total_cmp for sorting, which provides NaN-resistant sorting behavior. This change ensures that any NaN values in the data are handled consistently, preventing potential issues with partial comparisons.
The library now includes a benchmarking function to evaluate the performance of different index types, specifically PyHnswIndex and AnnIndex. This function measures the average, maximum, and minimum query times, providing insights into the efficiency of each index type.
import numpy as np
-import time
-from rust_annie import PyHnswIndex, AnnIndex
-
-def benchmark(index_cls, name, dim=128, n=10_000, q=100, k=10):
- print(f"\nBenchmarking {name} with {n} vectors (dim={dim})...")
-
- # Data
- data = np.random.rand(n, dim).astype(np.float32)
- ids = np.arange(n, dtype=np.int64)
- queries = np.random.rand(q, dim).astype(np.float32)
-
- # Index setup
- index = index_cls(dims=dim)
- index.add(data, ids)
-
- # Warm-up + Timing
- times = []
- for i in range(q):
- start = time.perf_counter()
- _ = index.search(queries[i], k=k)
- times.append((time.perf_counter() - start) * 1000)
-
- print(f" Avg query time: {np.mean(times):.3f} ms")
- print(f" Max query time: {np.max(times):.3f} ms")
- print(f" Min query time: {np.min(times):.3f} ms")
-
-if __name__ == "__main__":
- benchmark(PyHnswIndex, "HNSW")
- benchmark(AnnIndex, "Brute-Force")
-src/filters.rs in the Rust code.```
- - - - - - - - - - - - - -
-
-
-
- Blazingly fast Approximate Nearest Neighbors in Rust
-# Stable release from PyPI:
-pip install rust-annie
-
-# Install with GPU support (requires CUDA):
-pip install rust-annie[gpu]
-
-# Or install from source:
-git clone https://github.com/Programmers-Paradise/Annie.git
-cd Annie
-pip install maturin
-maturin develop --release
-import numpy as np
-from rust_annie import AnnIndex, Distance
-
-# Create index
-index = AnnIndex(128, Distance.EUCLIDEAN)
-
-# Add data
-data = np.random.rand(1000, 128).astype(np.float32)
-ids = np.arange(1000, dtype=np.int64)
-index.add(data, ids)
-
-# Search
-query = np.random.rand(128).astype(np.float32)
-neighbor_ids, distances = index.search(query, k=5)
-Blazingly fast Approximate Nearest Neighbors in Rust
"},{"location":"#installation","title":"Installation","text":"# Stable release from PyPI:\npip install rust-annie\n\n# Install with GPU support (requires CUDA):\npip install rust-annie[gpu]\n\n# Or install from source:\ngit clone https://github.com/Programmers-Paradise/Annie.git\ncd Annie\npip install maturin\nmaturin develop --release\n"},{"location":"#basic-usage","title":"Basic Usage","text":""},{"location":"#brute-force-index","title":"Brute-Force Index","text":"import numpy as np\nfrom rust_annie import AnnIndex, Distance\n\n# Create index\nindex = AnnIndex(128, Distance.EUCLIDEAN)\n\n# Add data\ndata = np.random.rand(1000, 128).astype(np.float32)\nids = np.arange(1000, dtype=np.int64)\nindex.add(data, ids)\n\n# Search\nquery = np.random.rand(128).astype(np.float32)\nneighbor_ids, distances = index.search(query, k=5)\n"},{"location":"#key-features","title":"Key Features","text":"This section showcases Annie's performance and helps you optimize for your use case.
"},{"location":"benchmarks/#interactive-benchmark-dashboard","title":"Interactive Benchmark Dashboard","text":"If the dashboard does not load, view it here.
"},{"location":"benchmarks/#library-comparison-table","title":"Library Comparison Table","text":"Library Build Time Search Latency Recall@10 Memory Usage CPU GPU Support Annie 1x 1x 99.2% 1x Yes Yes Faiss 1.2x 1.1x 98.7% 1.1x Yes Yes Annoy 2.5x 2.2x 97.5% 1.3x Yes No HNSWlib 1.1x 1.2x 98.9% 1.2x Yes NoAll results normalized to Annie (lower is better for time/latency/memory).
"},{"location":"benchmarks/#latency-vs-accuracy","title":"Latency vs. Accuracy","text":"ef_search, ef_construction) for your workload.For more details, see Performance Optimization Tutorial.
"},{"location":"concurrency/","title":"UsingThreadSafeAnnIndex and PyHnswIndex for Concurrent Access","text":"Annie exposes a thread-safe version of its ANN index (AnnIndex) for use in Python. This is useful when you want to perform parallel search or update operations from Python threads. Additionally, the PyHnswIndex class provides a Python interface to the HNSW index, which now includes enhanced data handling capabilities.
search, search_batch)add, remove)RwLock and exposed via PyO3PyHnswIndex supports mapping internal IDs to user IDs and handling vector data efficientlyfrom annie import ThreadSafeAnnIndex, Distance\nimport numpy as np\nimport threading\n\n# Create index\nindex = ThreadSafeAnnIndex(128, Distance.Cosine)\n\n# Add vectors\ndata = np.random.rand(1000, 128).astype('float32')\nids = np.arange(1000, dtype=np.int64)\nindex.add(data, ids)\n\n# Run concurrent searches\ndef run_search():\n query = np.random.rand(128).astype('float32')\n ids, distances = index.search(query, 10)\n print(ids)\n\nthreads = [threading.Thread(target=run_search) for _ in range(4)]\n[t.start() for t in threads]\n[t.join() for t in threads]\n\n# Using PyHnswIndex\nfrom rust_annie import PyHnswIndex\n\n# Create HNSW index\nhnsw_index = PyHnswIndex(dims=128)\n\n# Add vectors to HNSW index\nhnsw_index.add(data, ids)\n\n# Search in HNSW index\nquery = np.random.rand(128).astype('float32')\nuser_ids, distances = hnsw_index.search(query, 10)\nprint(user_ids)\n"},{"location":"concurrency/#cicd-pipeline-for-pypi-publishing","title":"CI/CD Pipeline for PyPI Publishing","text":"The CI/CD pipeline for PyPI publishing has been updated to include parallel jobs for building wheels and source distributions across multiple operating systems and Python versions. This involves concurrency considerations that should be documented for users who are integrating or maintaining the pipeline.
"},{"location":"concurrency/#pipeline-overview","title":"Pipeline Overview","text":"The pipeline is triggered on pushes and pull requests to the main branch, as well as manually via workflow_dispatch. It includes the following jobs:
ubuntu-latest and includes steps for checking out the code, setting up Rust, caching dependencies, running tests, and checking code formatting.ubuntu-latest, windows-latest, and macos-latest for Python versions 3.8, 3.9, 3.10, and 3.11. This job builds the wheels using maturin and uploads them as artifacts.ubuntu-latest and builds the source distribution using maturin, uploading it as an artifact.workflow_dispatch with the appropriate input.workflow_dispatch with the appropriate input.build-wheels job utilizes a matrix strategy to run builds concurrently across different operating systems and Python versions. This reduces the overall build time but requires careful management of dependencies and environment setup to ensure consistency across platforms.By understanding these concurrency considerations, users can effectively manage and extend the CI/CD pipeline to suit their specific needs.
"},{"location":"concurrency/#annindex-brute-force-nearest-neighbor-search","title":"AnnIndex - Brute-force Nearest Neighbor Search","text":"The AnnIndex class provides efficient brute-force nearest neighbor search with support for multiple distance metrics.
AnnIndex(dim: int, metric: Distance)","text":"Creates a new brute-force index.
dim (int): Vector dimensionmetric (Distance): Distance metric (EUCLIDEAN, COSINE, MANHATTAN, CHEBYSHEV)new_minkowski(dim: int, p: float)","text":"Creates a Minkowski distance index.
dim (int): Vector dimensionp (float): Minkowski exponent (p > 0)add(data: ndarray, ids: ndarray)","text":"Add vectors to the index.
data: N\u00d7dim array of float32 vectorsids: N-dimensional array of int64 IDssearch(query: ndarray, k: int) -> Tuple[ndarray, ndarray]","text":"Search for k nearest neighbors.
query: dim-dimensional query vectork: Number of neighbors to returnsearch_batch(queries: ndarray, k: int) -> Tuple[ndarray, ndarray]","text":"Batch search for multiple queries.
queries: M\u00d7dim array of queriesk: Number of neighbors per querysearch_filter_py(query: ndarray, k: int, filter_fn: Callable[[int], bool]) -> Tuple[ndarray, ndarray]","text":"Search with ID filtering.
query: dim-dimensional query vectork: Maximum neighbors to returnfilter_fn: Function that returns True for allowed IDssave(path: str)","text":"Save index to disk.
"},{"location":"concurrency/#static-loadpath-str-annindex","title":"static load(path: str) -> AnnIndex","text":"Load index from disk.
"},{"location":"concurrency/#example_1","title":"Example","text":"import numpy as np\nfrom rust_annie import AnnIndex, Distance\n\n# Create index\nindex = AnnIndex(128, Distance.EUCLIDEAN)\n\n# Add data\ndata = np.random.rand(1000, 128).astype(np.float32)\nids = np.arange(1000, dtype=np.int64)\nindex.add(data, ids)\n\n# Search\nquery = np.random.rand(128).astype(np.float32)\nneighbor_ids, distances = index.search(query, k=5)\n"},{"location":"concurrency/#pyhnswindex-approximate-nearest-neighbors-with-hnsw","title":"PyHnswIndex - Approximate Nearest Neighbors with HNSW","text":"The PyHnswIndex class provides approximate nearest neighbor search using Hierarchical Navigable Small World (HNSW) graphs.
PyHnswIndex(dims: int)","text":"Creates a new HNSW index.
dims (int): Vector dimensionadd(data: ndarray, ids: ndarray)","text":"Add vectors to the index.
data: N\u00d7dims array of float32 vectorsids: N-dimensional array of int64 IDssearch(vector: ndarray, k: int) -> Tuple[ndarray, ndarray]","text":"Search for k approximate nearest neighbors.
vector: dims-dimensional query vectork: Number of neighbors to returnsave(path: str)","text":"Save index to disk.
"},{"location":"concurrency/#static-loadpath-str-pyhnswindex","title":"static load(path: str) -> PyHnswIndex","text":"Load index from disk (currently not implemented)
"},{"location":"concurrency/#example_2","title":"Example","text":"import numpy as np\nfrom rust_annie import PyHnswIndex\n\n# Create index\nindex = PyHnswIndex(dims=128)\n\n# Add data\ndata = np.random.rand(10000, 128).astype(np.float32)\nids = np.arange(10000, dtype=np.int64)\nindex.add(data, ids)\n\n# Search\nquery = np.random.rand(128).astype(np.float32)\nneighbor_ids, _ = index.search(query, k=10)\n"},{"location":"concurrency/#threadsafeannindex-thread-safe-nearest-neighbor-index","title":"ThreadSafeAnnIndex - Thread-safe Nearest Neighbor Index","text":"The ThreadSafeAnnIndex class provides a thread-safe wrapper around AnnIndex for concurrent access.
ThreadSafeAnnIndex(dim: int, metric: Distance)","text":"Creates a new thread-safe index.
dim (int): Vector dimensionmetric (Distance): Distance metricadd(data: ndarray, ids: ndarray)","text":"Thread-safe vector addition.
"},{"location":"concurrency/#removeids-listint","title":"remove(ids: List[int])","text":"Thread-safe removal by IDs.
"},{"location":"concurrency/#searchquery-ndarray-k-int-tuplendarray-ndarray_1","title":"search(query: ndarray, k: int) -> Tuple[ndarray, ndarray]","text":"Thread-safe single query search.
"},{"location":"concurrency/#search_batchqueries-ndarray-k-int-tuplendarray-ndarray_1","title":"search_batch(queries: ndarray, k: int) -> Tuple[ndarray, ndarray]","text":"Thread-safe batch search.
"},{"location":"concurrency/#savepath-str_2","title":"save(path: str)","text":"Thread-safe save.
"},{"location":"concurrency/#static-loadpath-str-threadsafeannindex","title":"static load(path: str) -> ThreadSafeAnnIndex","text":"Thread-safe load.
"},{"location":"concurrency/#example_3","title":"Example","text":"import numpy as np\nfrom rust_annie import ThreadSafeAnnIndex, Distance\nfrom concurrent.futures import ThreadPoolExecutor\n\n# Create index\nindex = ThreadSafeAnnIndex(128, Distance.COSINE)\n\n# Add data from multiple threads\nwith ThreadPoolExecutor() as executor:\n for i in range(4):\n data = np.random.rand(250, 128).astype(np.float32)\n ids = np.arange(i*250, (i+1)*250, dtype=np.int64)\n executor.submit(index.add, data, ids)\n\n# Concurrent searches\nwith ThreadPoolExecutor() as executor:\n futures = []\n for _ in range(10):\n query = np.random.rand(128).astype(np.float32)\n futures.append(executor.submit(index.search, query, k=5))\n\n for future in futures:\n ids, dists = future.result()\n"},{"location":"concurrency/#annie-examples","title":"Annie Examples","text":""},{"location":"concurrency/#basic-usage","title":"Basic Usage","text":"import numpy as np\nfrom rust_annie import AnnIndex, Distance\n\n# Create index\nindex = AnnIndex(128, Distance.EUCLIDEAN)\n\n# Generate and add data\ndata = np.random.rand(1000, 128).astype(np.float32)\nids = np.arange(1000, dtype=np.int64)\nindex.add(data, ids)\n\n# Single query\nquery = np.random.rand(128).astype(np.float32)\nneighbor_ids, distances = index.search(query, k=5)\n\n# Batch queries\nqueries = np.random.rand(10, 128).astype(np.float32)\nbatch_ids, batch_dists = index.search_batch(queries, k=3)\n"},{"location":"concurrency/#filtered-search","title":"Filtered Search","text":"# Create index with sample data\nindex = AnnIndex(3, Distance.EUCLIDEAN)\ndata = np.array([\n [1.0, 2.0, 3.0],\n [4.0, 5.0, 6.0],\n [7.0, 8.0, 9.0]\n], dtype=np.float32)\nids = np.array([10, 20, 30], dtype=np.int64)\nindex.add(data, ids)\n\n# Define filter function\ndef even_ids(id: int) -> bool:\n return id % 2 == 0\n\n# Filtered search\nquery = np.array([1.0, 2.0, 3.0], dtype=np.float32)\nfiltered_ids, filtered_dists = index.search_filter_py(query, k=3, filter_fn=even_ids)\n# Only IDs 10 and 30 will be returned (20 is odd)\n"},{"location":"concurrency/#hnsw-index","title":"HNSW Index","text":"from rust_annie import PyHnswIndex\n\n# Create HNSW index\nindex = PyHnswIndex(dims=128)\n\n# Add large dataset\ndata = np.random.rand(100000, 128).astype(np.float32)\nids = np.arange(100000, dtype=np.int64)\nindex.add(data, ids)\n\n# Fast approximate search\nquery = np.random.rand(128).astype(np.float32)\nneighbor_ids, _ = index.search(query, k=10)\n"},{"location":"concurrency/#saving-and-loading","title":"Saving and Loading","text":"# Create and save index\nindex = AnnIndex(64, Distance.COSINE)\ndata = np.random.rand(500, 64).astype(np.float32)\nids = np.arange(500, dtype=np.int64)\nindex.add(data, ids)\nindex.save(\"my_index\")\n\n# Load index\nloaded_index = AnnIndex.load(\"my_index\")\n"},{"location":"concurrency/#thread-safe-operations","title":"Thread-safe Operations","text":"from rust_annie import ThreadSafeAnnIndex, Distance\nfrom concurrent.futures import ThreadPoolExecutor\n\nindex = ThreadSafeAnnIndex(256, Distance.MANHATTAN)\n\n# Concurrent writes\nwith ThreadPoolExecutor() as executor:\n for i in range(10):\n data = np.random.rand(100, 256).astype(np.float32)\n ids = np.arange(i*100, (i+1)*100, dtype=np.int64)\n executor.submit(index.add, data, ids)\n\n# Concurrent reads\nwith ThreadPoolExecutor() as executor:\n futures = []\n for _ in range(100):\n query = np.random.rand(256).astype(np.float32)\n futures.append(executor.submit(index.search, query, k=3))\n\n results = [f.result() for f in futures]\n"},{"location":"concurrency/#minkowski-distance","title":"Minkowski Distance","text":"# Create index with custom distance\nindex = AnnIndex.new_minkowski(dim=64, p=2.5)\ndata = np.random.rand(200, 64).astype(np.float32)\nids = np.arange(200, dtype=np.int64)\nindex.add(data, ids)\n\n# Search with Minkowski distance\nquery = np.random.rand(64).astype(np.float32)\nids, dists = index.search(query, k=5)\n"},{"location":"concurrency/#filtering","title":"Filtering","text":""},{"location":"concurrency/#why-filtering","title":"Why Filtering?","text":"Filters allow you to narrow down search results dynamically based on: - Metadata (e.g., tags, IDs, labels) - Numeric thresholds (e.g., only items above/below a value) - Custom user-defined logic
This improves both precision and flexibility of search.
"},{"location":"concurrency/#example-python-api","title":"Example: Python API","text":"from rust_annie import AnnIndex\nimport numpy as np\n\n# 1. Create an index with vector dimension 128\nindex = AnnIndex(dimension=128)\n\n# 2. Add data with metadata\nvector0 = np.random.rand(128).astype(np.float32)\nvector1 = np.random.rand(128).astype(np.float32)\n\nindex.add_item(0, vector0, metadata={\"category\": \"A\"})\nindex.add_item(1, vector1, metadata={\"category\": \"B\"})\n\n# 3. Define a filter function (e.g., only include items where category == \"A\")\ndef category_filter(metadata):\n return metadata.get(\"category\") == \"A\"\n\n# 4. Perform search with the filter applied\nquery_vector = np.random.rand(128).astype(np.float32)\nresults = index.search(query_vector, k=5, filter=category_filter)\n\nprint(\"Filtered search results:\", results)\n"},{"location":"concurrency/#supported-filters","title":"Supported Filters","text":"This library supports applying filters to narrow down ANN search results dynamically.
Filter type Example EqualsFilter.equals(\"category\", \"A\") Greater than Filter.gt(\"score\", 0.8) Less than Filter.lt(\"price\", 100) Custom predicate Filter.custom(lambda metadata: ...) Filters work on the metadata you provide when adding items to the index.
"},{"location":"concurrency/#sorting-behavior","title":"Sorting Behavior","text":"The BruteForceIndex now uses total_cmp for sorting, which provides NaN-resistant sorting behavior. This change ensures that any NaN values in the data are handled consistently, preventing potential issues with partial comparisons.
The library now includes a benchmarking function to evaluate the performance of different index types, specifically PyHnswIndex and AnnIndex. This function measures the average, maximum, and minimum query times, providing insights into the efficiency of each index type.
import numpy as np\nimport time\nfrom rust_annie import PyHnswIndex, AnnIndex\n\ndef benchmark(index_cls, name, dim=128, n=10_000, q=100, k=10):\n print(f\"\\nBenchmarking {name} with {n} vectors (dim={dim})...\")\n\n # Data\n data = np.random.rand(n, dim).astype(np.float32)\n ids = np.arange(n, dtype=np.int64)\n queries = np.random.rand(q, dim).astype(np.float32)\n\n # Index setup\n index = index_cls(dims=dim)\n index.add(data, ids)\n\n # Warm-up + Timing\n times = []\n for i in range(q):\n start = time.perf_counter()\n _ = index.search(queries[i], k=k)\n times.append((time.perf_counter() - start) * 1000)\n\n print(f\" Avg query time: {np.mean(times):.3f} ms\")\n print(f\" Max query time: {np.max(times):.3f} ms\")\n print(f\" Min query time: {np.min(times):.3f} ms\")\n\nif __name__ == \"__main__\":\n benchmark(PyHnswIndex, \"HNSW\")\n benchmark(AnnIndex, \"Brute-Force\")\n"},{"location":"concurrency/#integration-extensibility","title":"Integration & Extensibility","text":"src/filters.rs in the Rust code.Blazingly fast Approximate Nearest Neighbors in Rust
"},{"location":"concurrency/#installation","title":"Installation","text":"pip install rust_annie\n"},{"location":"concurrency/#basic-usage_1","title":"Basic Usage","text":"import numpy as np\nfrom rust_annie import AnnIndex, Distance\n\n# Create index\nindex = AnnIndex(128, Distance.EUCLIDEAN)\n\n# Add data\ndata = np.random.rand(1000, 128).astype(np.float32)\nids = np.arange(1000, dtype=np.int64)\nindex.add(data, ids)\n\n# Search\nquery = np.random.rand(128).astype(np.float32)\nneighbor_ids, distances = index.search(query, k=5)\n"},{"location":"concurrency/#key-features_1","title":"Key Features","text":"Thank you for your interest in contributing to Annie's documentation! This guide will help you get started with contributing to our documentation site.
"},{"location":"contributing/#table-of-contents","title":"Table of Contents","text":"git clone https://github.com/YOUR-USERNAME/Annie-Docs.git\ncd Annie-Docs\n ./build-docs.sh\n source venv/bin/activate\nmkdocs serve\n http://localhost:8000 to see your changes live.docs/\n\u251c\u2500\u2500 index.md # Homepage\n\u251c\u2500\u2500 api/ # API Reference\n\u2502 \u251c\u2500\u2500 ann_index.md # AnnIndex class\n\u2502 \u251c\u2500\u2500 hnsw_index.md # PyHnswIndex class\n\u2502 \u2514\u2500\u2500 threadsafe_index.md\n\u251c\u2500\u2500 examples.md # Usage examples\n\u251c\u2500\u2500 concurrency.md # Thread-safety features\n\u2514\u2500\u2500 filtering.md # Filtered search\n"},{"location":"contributing/#setting-up-development-environment","title":"Setting Up Development Environment","text":""},{"location":"contributing/#manual-setup","title":"Manual Setup","text":"# Create virtual environment\npython3 -m venv venv\nsource venv/bin/activate\n\n# Install dependencies\npip install -r requirements.txt\n\n# Build site\nmkdocs build\n\n# Serve locally with auto-reload\nmkdocs serve --dev-addr=0.0.0.0:8000\n"},{"location":"contributing/#using-scripts","title":"Using Scripts","text":"# Build documentation\n./build-docs.sh\n\n# Deploy (build + prepare for hosting)\n./deploy.sh\n"},{"location":"contributing/#making-changes","title":"Making Changes","text":""},{"location":"contributing/#types-of-contributions","title":"Types of Contributions","text":"git checkout -b feature/improve-examples\n Make Your Changes
Edit files in the docs/ directory
Follow our writing guidelines
Test Locally
mkdocs serve\n Visit http://localhost:8000 to review changes
mkdocs build\n Ensure no build errors# for main headings, ## for sections, ### for subsections# Good\nimport numpy as np\n**bold** for emphasis, *italic* for secondary emphasisinline code and class names like AnnIndex# Good example\nimport numpy as np\nfrom rust_annie import AnnIndex, Distance\n\n# Create index for 128-dimensional vectors\nindex = AnnIndex(128, Distance.EUCLIDEAN)\n\n# Add sample data\ndata = np.random.rand(1000, 128).astype(np.float32)\nids = np.arange(1000, dtype=np.int64)\nindex.add(data, ids)\n\n# Search for nearest neighbors\nquery = np.random.rand(128).astype(np.float32)\nneighbor_ids, distances = index.search(query, k=5)\nprint(f\"Found {len(neighbor_ids)} neighbors\")\n"},{"location":"contributing/#api-documentation","title":"API Documentation","text":"mkdocs build # Check for build errors\nmkdocs serve # Test locally\n Check Links
Ensure all internal links work
Verify external links are accessible
Review Content
git add .\ngit commit -m \"docs: improve examples in filtering.md\"\n git push origin feature/improve-examples\n ## Description\n\nBrief description of changes made.\n\n## Type of Change\n\n- [ ] Bug fix (typo, broken link, etc.)\n- [ ] Content update (new examples, clarifications)\n- [ ] New documentation (new features)\n- [ ] Structure improvement\n\n## Testing\n\n- [ ] Built successfully with `mkdocs build`\n- [ ] Tested locally with `mkdocs serve`\n- [ ] Checked all links work\n- [ ] Verified code examples run\n\n## Screenshots (if applicable)\n\nAdd screenshots of significant visual changes.\n"},{"location":"contributing/#review-process","title":"Review Process","text":""},{"location":"contributing/#what-we-look-for","title":"What We Look For","text":"main branchWhen creating issues, use these labels:
documentation - General documentation issuesbug - Errors in docs (typos, broken links)enhancement - Improvements to existing contentnew-content - Requests for new documentationgood-first-issue - Good for newcomersThank you for helping make Annie's documentation better!
"},{"location":"examples/","title":"Examples","text":""},{"location":"examples/#table-of-contents","title":"Table of Contents","text":"Interactive Examples:
You can now run selected code blocks directly in your browser! Click the Try it button above a code block to execute it. Use sliders to adjust parameters like vector dimension or dataset size. Powered by Pyodide (Python in the browser). Learn more.
"},{"location":"examples/#basic-usage","title":"Basic Usage","text":"Dimension: 128 Dataset size: 1000import numpy as np\nfrom rust_annie import AnnIndex, Distance\n\ndim = {{dim|128}}\nsize = {{size|1000}}\n\n# Create index\nindex = AnnIndex(dim, Distance.EUCLIDEAN)\n\n# Generate and add data\ndata = np.random.rand(size, dim).astype(np.float32)\nids = np.arange(size, dtype=np.int64)\nindex.add(data, ids)\n\n# Single query\nquery = np.random.rand(dim).astype(np.float32)\nneighbor_ids, distances = index.search(query, k=5)\nprint(neighbor_ids, distances)\n\n# Batch queries\nqueries = np.random.rand(10, dim).astype(np.float32)\nbatch_ids, batch_dists = index.search_batch(queries, k=3)\nprint(batch_ids.shape, batch_dists.shape)\n"},{"location":"examples/#filtered-search","title":"Filtered Search","text":"# Create index with sample data\nindex = AnnIndex(3, Distance.EUCLIDEAN)\ndata = np.array([\n [1.0, 2.0, 3.0],\n [4.0, 5.0, 6.0],\n [7.0, 8.0, 9.0]\n\n## Filtered Search\n<div class=\"interactive-block\" data-interactive>\n```python\nimport numpy as np\nfrom rust_annie import AnnIndex, Distance\n\n# Create index with sample data\nindex = AnnIndex(3, Distance.EUCLIDEAN)\ndata = np.array([\n [1.0, 2.0, 3.0],\n [4.0, 5.0, 6.0],\n [7.0, 8.0, 9.0]\n], dtype=np.float32)\nids = np.array([10, 20, 30], dtype=np.int64)\nindex.add(data, ids)\n\n# Define filter function\ndef even_ids(id: int) -> bool:\n return id % 2 == 0\n\n# Filtered search\nquery = np.array([1.0, 2.0, 3.0], dtype=np.float32)\nfiltered_ids, filtered_dists = index.search_filter_py(query, k=3, filter_fn=even_ids)\nprint(filtered_ids)\n ], dtype=np.float32) ids = np.array([10, 20, 30], dtype=np.int64) index.add(data, ids)"},{"location":"examples/#define-filter-function","title":"Define filter function","text":"def even_ids(id: int) -> bool: return id % 2 == 0
"},{"location":"examples/#filtered-search_1","title":"Filtered search","text":"query = np.array([1.0, 2.0, 3.0], dtype=np.float32) filtered_ids, filtered_dists = index.search_filter_py(query, k=3, filter_fn=even_ids)
"},{"location":"examples/#only-ids-10-and-30-will-be-returned-20-is-odd","title":"Only IDs 10 and 30 will be returned (20 is odd)","text":"## HNSW Index\n```python\n\n## HNSW Index\n<div class=\"interactive-block\" data-interactive>\n<div class=\"interactive-controls\">\n<label>Dimension: <input type=\"range\" min=\"8\" max=\"256\" value=\"128\" class=\"slider\" data-var=\"dim\" /></label>\n<span class=\"slider-value\" data-var=\"dim\">128</span>\n<label>Dataset size: <input type=\"range\" min=\"1000\" max=\"200000\" value=\"100000\" class=\"slider\" data-var=\"size\" /></label>\n<span class=\"slider-value\" data-var=\"size\">100000</span>\n</div>\n```python\nimport numpy as np\nfrom rust_annie import PyHnswIndex\n\ndim = {{dim|128}}\nsize = {{size|100000}}\n\n# Create HNSW index\nindex = PyHnswIndex(dims=dim)\n\n# Add large dataset\ndata = np.random.rand(size, dim).astype(np.float32)\nids = np.arange(size, dtype=np.int64)\nindex.add(data, ids)\n\n# Fast approximate search\nquery = np.random.rand(dim).astype(np.float32)\nneighbor_ids, _ = index.search(query, k=10)\nprint(neighbor_ids)\n from rust_annie import PyHnswIndex"},{"location":"examples/#create-hnsw-index","title":"Create HNSW index","text":"index = PyHnswIndex(dims=128)
"},{"location":"examples/#add-large-dataset","title":"Add large dataset","text":"data = np.random.rand(100000, 128).astype(np.float32) ids = np.arange(100000, dtype=np.int64) index.add(data, ids)
"},{"location":"examples/#fast-approximate-search","title":"Fast approximate search","text":"query = np.random.rand(128).astype(np.float32) neighbor_ids, _ = index.search(query, k=10)
## Saving and Loading\n```python\n# Create and save index\nindex = AnnIndex(64, Distance.COSINE)\ndata = np.random.rand(500, 64).astype(np.float32)\nids = np.arange(500, dtype=np.int64)\nindex.add(data, ids)\nindex.save(\"my_index\")\n\n# Load index\nloaded_index = AnnIndex.load(\"my_index\")\n"},{"location":"examples/#thread-safe-operations","title":"Thread-safe Operations","text":"from rust_annie import ThreadSafeAnnIndex, Distance\nfrom concurrent.futures import ThreadPoolExecutor\n\nindex = ThreadSafeAnnIndex(256, Distance.MANHATTAN)\n\n# Concurrent writes\nwith ThreadPoolExecutor() as executor:\n for i in range(10):\n data = np.random.rand(100, 256).astype(np.float32)\n ids = np.arange(i*100, (i+1)*100, dtype=np.int64)\n executor.submit(index.add, data, ids)\n\n# Concurrent reads\nwith ThreadPoolExecutor() as executor:\n futures = []\n for _ in range(100):\n query = np.random.rand(256).astype(np.float32)\n futures.append(executor.submit(index.search, query, k=3))\n\n results = [f.result() for f in futures]\n"},{"location":"examples/#minkowski-distance","title":"Minkowski Distance","text":"# Create index with custom distance\nindex = AnnIndex.new_minkowski(dim=64, p=2.5)\ndata = np.random.rand(200, 64).astype(np.float32)\nids = np.arange(200, dtype=np.int64)\nindex.add(data, ids)\n\n# Search with Minkowski distance\nquery = np.random.rand(64).astype(np.float32)\nids, dists = index.search(query, k=5)\n"},{"location":"examples/#readme","title":"README","text":"A lightning-fast, Rust-powered Approximate Nearest Neighbor library for Python with multiple backends, thread-safety, and GPU acceleration.
"},{"location":"examples/#table-of-contents_1","title":"Table of Contents","text":"# Stable release from PyPI:\npip install rust-annie\n\n# Install with GPU support (requires CUDA):\npip install rust-annie[gpu]\n\n# Or install from source:\ngit clone https://github.com/Programmers-Paradise/Annie.git\ncd Annie\npip install maturin\nmaturin develop --release\n"},{"location":"examples/#quick-start","title":"Quick Start","text":""},{"location":"examples/#brute-force-index","title":"Brute-Force Index","text":"import numpy as np\nfrom rust_annie import AnnIndex, Distance\n\n# Create index\nindex = AnnIndex(128, Distance.EUCLIDEAN)\n\n# Add data\ndata = np.random.rand(1000, 128).astype(np.float32)\nids = np.arange(1000, dtype=np.int64)\nindex.add(data, ids)\n\n# Search\nquery = np.random.rand(128).astype(np.float32)\nneighbor_ids, distances = index.search(query, k=5)\n"},{"location":"examples/#hnsw-index","title":"HNSW Index","text":"from rust_annie import PyHnswIndex\n\nindex = PyHnswIndex(dims=128)\ndata = np.random.rand(10000, 128).astype(np.float32)\nids = np.arange(10000, dtype=np.int64)\nindex.add(data, ids)\n\n# Search\nquery = np.random.rand(128).astype(np.float32)\nneighbor_ids, _ = index.search(query, k=10)\n"},{"location":"examples/#examples","title":"Examples","text":""},{"location":"examples/#brute-force-index_1","title":"Brute-Force Index","text":"from rust_annie import AnnIndex, Distance\nimport numpy as np\n\n# Create index\nidx = AnnIndex(4, Distance.COSINE)\n\n# Add data\ndata = np.random.rand(50, 4).astype(np.float32)\nids = np.arange(50, dtype=np.int64)\nidx.add(data, ids)\n\n# Search\nlabels, dists = idx.search(data[10], k=3)\nprint(labels, dists)\n"},{"location":"examples/#batch-query","title":"Batch Query","text":"from rust_annie import AnnIndex, Distance\nimport numpy as np\n\n# Create index\nidx = AnnIndex(16, Distance.EUCLIDEAN)\n\n# Add data\ndata = np.random.rand(1000, 16).astype(np.float32)\nids = np.arange(1000, dtype=np.int64)\nidx.add(data, ids)\n\n# Batch search\nqueries = data[:32]\nlabels_batch, dists_batch = idx.search_batch(queries, k=10)\nprint(labels_batch.shape) # (32, 10)\n"},{"location":"examples/#thread-safe-index","title":"Thread-Safe Index","text":"from rust_annie import ThreadSafeAnnIndex, Distance\nimport numpy as np\nfrom concurrent.futures import ThreadPoolExecutor\n\n# Create thread-safe index\nidx = ThreadSafeAnnIndex(32, Distance.EUCLIDEAN)\n\n# Add data\ndata = np.random.rand(500, 32).astype(np.float32)\nids = np.arange(500, dtype=np.int64)\nidx.add(data, ids)\n\n# Concurrent searches\ndef task(q):\n return idx.search(q, k=5)\n\nqueries = np.random.rand(100, 32).astype(np.float32)\nwith ThreadPoolExecutor(max_workers=8) as executor:\n futures = [executor.submit(task, q) for q in queries]\n for f in futures:\n print(f.result())\n"},{"location":"examples/#filtered-search_2","title":"Filtered Search","text":"from rust_annie import AnnIndex, Distance\nimport numpy as np\n\n# Create index\nindex = AnnIndex(3, Distance.EUCLIDEAN)\ndata = np.array([\n [1.0, 2.0, 3.0],\n [4.0, 5.0, 6.0],\n [7.0, 8.0, 9.0]\n], dtype=np.float32)\nids = np.array([10, 20, 30], dtype=np.int64)\nindex.add(data, ids)\n\n# Filter function\ndef even_ids(id: int) -> bool:\n return id % 2 == 0\n\n# Filtered search\nquery = np.array([1.0, 2.0, 3.0], dtype=np.float32)\nfiltered_ids, filtered_dists = index.search_filter_py(\n query, \n k=3, \n filter_fn=even_ids\n)\nprint(filtered_ids) # [10, 30] (20 is filtered out)\n"},{"location":"examples/#build-and-query-a-brute-force-annindex-in-python-complete-example","title":"Build and Query a Brute-Force AnnIndex in Python (Complete Example)","text":"This section demonstrates a complete, beginner-friendly example of how to build and query a brute-force AnnIndex using Python.
Measured on a 6-core CPU:
That\u2019s a \\~4\u00d7 speedup vs. NumPy!
Operation Dataset Size Time (ms) Speedup vs Python Single Query (Brute) 10,000 \u00d7 64 0.7 4\u00d7 Batch Query (64) 10,000 \u00d7 64 0.23 12\u00d7 HNSW Query 100,000 \u00d7 128 0.05 56\u00d7"},{"location":"examples/#view-full-benchmark-dashboard","title":"View Full Benchmark Dashboard \u2192","text":"You\u2019ll find:
"},{"location":"examples/#api-reference","title":"API Reference","text":""},{"location":"examples/#annindex","title":"AnnIndex","text":"Create a brute-force k-NN index.
Enum: Distance.EUCLIDEAN, Distance.COSINE, Distance.MANHATTAN
Same API as AnnIndex, safe for concurrent use.
CI runs on GitHub Actions, building wheels on Linux, Windows, macOS, plus:
benchmark.py & batch_benchmark.py & compare_results.py# Run tests\ncargo test\npytest tests/\n\n# Run benchmarks\npython scripts/benchmark.py\npython scripts/batch_benchmark.py\n\n# Generate documentation\nmkdocs build\n CI pipeline includes: - Cross-platform builds (Linux, Windows, macOS) - Unit tests and integration tests - Performance benchmarking - Documentation generation
"},{"location":"examples/#benchmark-automation","title":"Benchmark Automation","text":"Benchmarks are tracked over time using:
"},{"location":"examples/#gpu-acceleration","title":"GPU Acceleration","text":""},{"location":"examples/#enable-gpu-in-rust","title":"Enable GPU in Rust","text":"Enable CUDA support for brute-force calculations:
# Install with GPU support\npip install rust-annie[gpu]\n\n# Or build from source with GPU features\nmaturin develop --release --features gpu\n Supported operations: - Batch L2 distance calculations - High-dimensional similarity search
Requirements: - NVIDIA GPU with CUDA support - CUDA Toolkit installed
"},{"location":"examples/#contributing","title":"Contributing","text":"Contributions are welcome! Please:
See the main CONTRIBUTING guide for details.
"},{"location":"examples/#license","title":"License","text":"This project is licensed under the MIT License. See LICENSE for details.
"},{"location":"faq/","title":"Frequently Asked Questions (FAQ)","text":"Welcome to the Annie FAQ! Use your browser's search (Ctrl+F) to quickly find answers. Questions are grouped by category for easy navigation.
"},{"location":"faq/#general","title":"General","text":"pip install rust-annie.requirements.txt are installed.See migration guide below.
See migration guide.
For more troubleshooting, see troubleshooting.md.
"},{"location":"filtering/","title":"Filtered Search","text":"## ANN Search Filtering\n\nThis document explains how to use the filtering capabilities to improve Approximate Nearest Neighbor (ANN) search.\n\n### Why Filtering?\n\nFilters allow you to narrow down search results dynamically based on:\n- Metadata (e.g., tags, IDs, labels)\n- Numeric thresholds (e.g., only items above/below a value)\n- Custom user-defined logic\n\nThis improves both precision and flexibility of search.\n\n#### Example: Python API\n\n```python\nfrom rust_annie import AnnIndex\nimport numpy as np\n\n# 1. Create an index with vector dimension 128\nindex = AnnIndex(dimension=128)\n\n# 2. Add data with metadata\nvector0 = np.random.rand(128).astype(np.float32)\nvector1 = np.random.rand(128).astype(np.float32)\n\nindex.add_item(0, vector0, metadata={\"category\": \"A\"})\nindex.add_item(1, vector1, metadata={\"category\": \"B\"})\n\n# 3. Define a filter function (e.g., only include items where category == \"A\")\ndef category_filter(metadata):\n return metadata.get(\"category\") == \"A\"\n\n# 4. Perform search with the filter applied\nquery_vector = np.random.rand(128).astype(np.float32)\nresults = index.search(query_vector, k=5, filter=category_filter)\n\nprint(\"Filtered search results:\", results)\n"},{"location":"filtering/#supported-filters","title":"Supported Filters","text":"This library supports applying filters to narrow down ANN search results dynamically.
Filter type Example EqualsFilter.equals(\"category\", \"A\") Greater than Filter.gt(\"score\", 0.8) Less than Filter.lt(\"price\", 100) Custom predicate Filter.custom(lambda metadata: ...) Filters work on the metadata you provide when adding items to the index.
"},{"location":"filtering/#new-feature-filtered-search-with-custom-python-callbacks","title":"New Feature: Filtered Search with Custom Python Callbacks","text":"The library now supports filtered search using custom Python callbacks, allowing for more complex filtering logic directly in Python.
"},{"location":"filtering/#example-filtered-search-with-python-callback","title":"Example: Filtered Search with Python Callback","text":"from rust_annie import AnnIndex, Distance\nimport numpy as np\n\n# Create index\nindex = AnnIndex(3, Distance.EUCLIDEAN)\ndata = np.array([\n [1.0, 2.0, 3.0],\n [4.0, 5.0, 6.0],\n [7.0, 8.0, 9.0]\n], dtype=np.float32)\nids = np.array([10, 20, 30], dtype=np.int64)\nindex.add(data, ids)\n\n# Filter function\ndef even_ids(id: int) -> bool:\n return id % 2 == 0\n\n# Filtered search\nquery = np.array([1.0, 2.0, 3.0], dtype=np.float32)\nfiltered_ids, filtered_dists = index.search_filter_py(\n query, \n k=3, \n filter_fn=even_ids\n)\nprint(filtered_ids) # [10, 30] (20 is filtered out)\n"},{"location":"filtering/#sorting-behavior","title":"Sorting Behavior","text":"The BruteForceIndex now uses total_cmp for sorting, which provides NaN-resistant sorting behavior. This change ensures that any NaN values in the data are handled consistently, preventing potential issues with partial comparisons.
The library now includes a benchmarking function to evaluate the performance of different index types, specifically PyHnswIndex and AnnIndex. This function measures the average, maximum, and minimum query times, providing insights into the efficiency of each index type.
import numpy as np\nimport time\nfrom rust_annie import PyHnswIndex, AnnIndex\n\ndef benchmark(index_cls, name, dim=128, n=10_000, q=100, k=10):\n print(f\"\\nBenchmarking {name} with {n} vectors (dim={dim})...\")\n\n # Data\n data = np.random.rand(n, dim).astype(np.float32)\n ids = np.arange(n, dtype=np.int64)\n queries = np.random.rand(q, dim).astype(np.float32)\n\n # Index setup\n index = index_cls(dims=dim)\n index.add(data, ids)\n\n # Warm-up + Timing\n times = []\n for i in range(q):\n start = time.perf_counter()\n _ = index.search(queries[i], k=k)\n times.append((time.perf_counter() - start) * 1000)\n\n print(f\" Avg query time: {np.mean(times):.3f} ms\")\n print(f\" Max query time: {np.max(times):.3f} ms\")\n print(f\" Min query time: {np.min(times):.3f} ms\")\n\nif __name__ == \"__main__\":\n benchmark(PyHnswIndex, \"HNSW\")\n benchmark(AnnIndex, \"Brute-Force\")\n"},{"location":"filtering/#integration-extensibility","title":"Integration & Extensibility","text":"src/filters.rs in the Rust code.```
"},{"location":"troubleshooting/","title":"Troubleshooting Guide","text":"This guide helps you resolve common installation, build, and runtime issues with Annie and its documentation.
"},{"location":"troubleshooting/#installation-issues","title":"Installation Issues","text":"pip install rust-anniepip install maturinmkdocs.yml.pip install -r requirements.txtpip install mkdocs-material.pip install pymdown-extensions.If your issue is not listed, please open an issue and include error messages and environment details.
"},{"location":"api/ann_index/","title":"AnnIndex API Documentation","text":"Documentation for AnnIndex will be available soon.
"},{"location":"api/hnsw_index/","title":"PyHnswIndex API Documentation","text":"Documentation for PyHnswIndex will be available soon.
"},{"location":"api/threadsafe_index/","title":"ThreadSafeAnnIndex API Documentation","text":"Documentation for ThreadSafeAnnIndex will be available soon.
"},{"location":"tutorials/","title":"Annie Tutorials: Learning Path","text":"Welcome! This series will guide you from beginner to advanced usage of Annie. Each tutorial includes an estimated completion time and builds on previous lessons.
"},{"location":"tutorials/#beginner-tutorials","title":"Beginner Tutorials","text":"For more examples, see examples.md.
"},{"location":"tutorials/01-getting-started/","title":"1. Getting Started with Annie","text":"Estimated time: 5 minutes
This tutorial will help you install Annie and run your first nearest neighbor search.
"},{"location":"tutorials/01-getting-started/#prerequisites","title":"Prerequisites","text":"pip install rust-annie\nimport rust_annie\nprint(rust_annie.__version__)\nfrom rust_annie import AnnIndex, Distance\nindex = AnnIndex(128, Distance.EUCLIDEAN)\nprint(\"Index created!\")\nEstimated time: 7 minutes
Learn how to add data to your Annie index.
"},{"location":"tutorials/02-indexing-basics/#steps","title":"Steps","text":"import numpy as np\ndata = np.random.rand(1000, 128).astype(np.float32)\nids = np.arange(1000, dtype=np.int64)\nfrom rust_annie import AnnIndex, Distance\nindex = AnnIndex(128, Distance.EUCLIDEAN)\nindex.add(data, ids)\nprint(\"Data added!\")\nEstimated time: 7 minutes
Learn how to search for nearest neighbors in your index.
"},{"location":"tutorials/03-basic-search/#steps","title":"Steps","text":"query = np.random.rand(128).astype(np.float32)\nneighbor_ids, distances = index.search(query, k=5)\nprint(\"Neighbors:\", neighbor_ids)\nEstimated time: 6 minutes
Learn how to save your index to disk and load it later.
"},{"location":"tutorials/04-saving-loading/#steps","title":"Steps","text":"index.save(\"my_index.ann\")\nfrom rust_annie import AnnIndex\nindex = AnnIndex.load(\"my_index.ann\")\nprint(\"Index loaded!\")\nEstimated time: 8 minutes
Learn how to add and search multiple vectors efficiently.
"},{"location":"tutorials/05-batch-operations/#steps","title":"Steps","text":"index.add(data, ids)\nqueries = np.random.rand(10, 128).astype(np.float32)\nresults = index.batch_search(queries, k=5)\nprint(results)\nEstimated time: 10 minutes
Learn best practices for deploying Annie in production environments.
"},{"location":"tutorials/06-production-usage/#topics","title":"Topics","text":"index = AnnIndex.load(\"prod_index.ann\")\n# Add monitoring/logging hooks as needed\n"},{"location":"tutorials/06-production-usage/#next-filtering-and-metadata","title":"Next: Filtering and Metadata","text":""},{"location":"tutorials/07-filtering-metadata/","title":"7. Filtering and Metadata","text":"Estimated time: 10 minutes
Learn how to use filtering and attach metadata to your vectors.
"},{"location":"tutorials/07-filtering-metadata/#steps","title":"Steps","text":"add method with metadata if supported.# Example assumes filtering API is available\nresults = index.search(query, k=5, filter={\"category\": \"A\"})\n"},{"location":"tutorials/07-filtering-metadata/#next-debugging-and-troubleshooting","title":"Next: Debugging and Troubleshooting","text":""},{"location":"tutorials/08-debugging/","title":"8. Debugging and Troubleshooting","text":"Estimated time: 8 minutes
Learn how to debug common issues and use Annie's troubleshooting tools.
"},{"location":"tutorials/08-debugging/#topics","title":"Topics","text":"try:\n index.add(data, ids)\nexcept Exception as e:\n print(\"Error:\", e)\n"},{"location":"tutorials/08-debugging/#next-custom-distance-metrics","title":"Next: Custom Distance Metrics","text":""},{"location":"tutorials/09-custom-metrics/","title":"9. Custom Distance Metrics","text":"Estimated time: 12 minutes
Learn how to define and use custom distance metrics in Annie.
"},{"location":"tutorials/09-custom-metrics/#steps","title":"Steps","text":"from rust_annie import AnnIndex, Distance\nindex = AnnIndex(128, Distance.COSINE)\n"},{"location":"tutorials/09-custom-metrics/#next-gpu-acceleration","title":"Next: GPU Acceleration","text":""},{"location":"tutorials/10-gpu-usage/","title":"10. GPU Acceleration","text":"Estimated time: 15 minutes
Learn how to use GPU acceleration with Annie (if supported).
"},{"location":"tutorials/10-gpu-usage/#steps","title":"Steps","text":"# Example only if GPU support is available\nindex = AnnIndex(128, Distance.EUCLIDEAN, use_gpu=True)\n"},{"location":"tutorials/10-gpu-usage/#next-performance-optimization","title":"Next: Performance Optimization","text":""},{"location":"tutorials/11-performance/","title":"11. Performance Optimization","text":"Estimated time: 12 minutes
Learn how to tune Annie for maximum performance.
"},{"location":"tutorials/11-performance/#topics","title":"Topics","text":"# Adjust index parameters for your workload\nindex = AnnIndex(128, Distance.EUCLIDEAN, ef_search=100, ef_construction=200)\n For more, see Performance FAQ.
"},{"location":"tutorials/usecase-image-search/","title":"Use Case: Image Search with Annie","text":"Estimated time: 12 minutes
Learn how to use Annie for image similarity search.
"},{"location":"tutorials/usecase-image-search/#steps","title":"Steps","text":"# Index image embeddings\nindex.add(image_embeddings, image_ids)\n# Query with new image\nsimilar_images, _ = index.search(query_embedding, k=5)\n For more, see examples.md.
"},{"location":"tutorials/usecase-recommendation/","title":"Use Case: Building a Recommendation System","text":"Estimated time: 15 minutes
Learn how to use Annie to build a simple recommendation system.
"},{"location":"tutorials/usecase-recommendation/#steps","title":"Steps","text":"# Index item vectors\nindex.add(item_vectors, item_ids)\n# Query with user vector\nrecommendations, _ = index.search(user_vector, k=10)\n For more use cases, see examples.md.
"}]} \ No newline at end of file diff --git a/site/sitemap.xml b/site/sitemap.xml deleted file mode 100644 index 01d5325..0000000 --- a/site/sitemap.xml +++ /dev/null @@ -1,103 +0,0 @@ - -