Update examples (#12)

* Update examples
apify · Aug 2, 2024 · 12fb3fb · 12fb3fb
1 parent c32d5d6
commit 12fb3fb
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 7 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changelog
 
+## [0.1.2](https://github.com/apify/apify-haystack/releases/tag/0.1.2)  (2024-08-02)
+
+🐛 Bug Fixes
+- Correct examples in the documentation
+
 ## [0.1.1](https://github.com/apify/apify-haystack/releases/tag/0.1.1)  (2024-07-31)
 
 🐛 Bug Fixes

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "apify_haystack"
-version = "0.1.1"
+version = "0.1.2"
 description = "Apify-haystack integration"
 authors = ["Apify Technologies s.r.o. <support@apify.com>"]
 homepage = "https://apify.com"

diff --git a/src/apify_haystack/examples/crawl_and_process_data.py b/src/apify_haystack/examples/crawl_and_process_data.py
@@ -4,7 +4,8 @@
 This script demonstrates how to extract content from a website using Apify's Website Content Crawler.
 The content is then cleaned, split into smaller chunks, embedded, and stored in the InMemoryDocumentStore.
 
-After the pipeline is executed, the documents are retrieved from the document store using BM25 retrieval.
+After the pipeline is executed, the documents are retrieved from the document store
+using BM25 retrieval and vector similarity.
 
 The script should produce the following output (an example of a single Document):
 ......
@@ -20,7 +21,7 @@
 
 from dotenv import load_dotenv
 from haystack import Document, Pipeline
-from haystack.components.embedders import OpenAIDocumentEmbedder
+from haystack.components.embedders import OpenAIDocumentEmbedder, OpenAITextEmbedder
 from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
 from haystack.components.writers import DocumentWriter
 from haystack.document_stores.in_memory import InMemoryDocumentStore
@@ -83,7 +84,12 @@ def dataset_mapping_function(dataset_item: dict) -> Document:
 
 print(f"Added {document_store.count_documents()} to vector from Website Content Crawler")
 
-print("Retrieving documents from the document store: query='Haystack'")
+print("Retrieving documents from the document store using BM25")
 print("query='Haystack'")
 for doc in document_store.bm25_retrieval("Haystack", top_k=1):
     print(doc)
+
+print("Retrieving documents from the document store using vector similarity")
+print("query='What is Haystack'")
+for doc in document_store.embedding_retrieval(OpenAITextEmbedder().run("Haystack")["embedding"], top_k=1):
+    print(doc)