Skip to content

Commit

Permalink
Add ingestion of notion documents (#262)
Browse files Browse the repository at this point in the history
* Add ingestion of notion documents

* Small tweaks
  • Loading branch information
homanp authored Aug 11, 2023
1 parent 8b042ff commit bc5490b
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 18 deletions.
29 changes: 24 additions & 5 deletions app/lib/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@
)
from langchain.embeddings.openai import OpenAIEmbeddings
from llama_index.readers.schema.base import Document
from llama_index import download_loader

from app.lib.loaders.sitemap import SitemapLoader
from app.lib.parsers import CustomPDFPlumberLoader
from app.lib.splitters import TextSplitters
from app.lib.vectorstores.base import VectorStoreBase

NotionPageReader = download_loader("NotionPageReader")

valid_ingestion_types = [
"TXT",
"PDF",
Expand All @@ -34,6 +37,7 @@
"STRIPE",
"AIRTABLE",
"SITEMAP",
"NOTION",
]


Expand Down Expand Up @@ -65,12 +69,27 @@ def upsert_document(
if type == "STRIPE":
pass

if type == "NOTION":
integration_token: str = metadata["integration_token"]
page_ids: str = metadata["page_ids"]
loader = NotionPageReader(integration_token=integration_token)
documents = loader.load_langchain_documents(page_ids=page_ids.split(","))
newDocuments = [
document.metadata.update({"namespace": document_id}) or document
for document in documents
]
docs = TextSplitters(newDocuments, text_splitter).document_splitter()

VectorStoreBase().get_database().from_documents(
docs, embeddings, index_name=INDEX_NAME, namespace=document_id
)

if type == "AIRTABLE":
from langchain.document_loaders import AirtableLoader

api_key = metadata["api_key"]
base_id = metadata["base_id"]
table_id = metadata["table_id"]
api_key: str = metadata["api_key"]
base_id: str = metadata["base_id"]
table_id: str = metadata["table_id"]
loader = AirtableLoader(api_key, table_id, base_id)
documents = loader.load()
newDocuments = [
Expand All @@ -84,8 +103,8 @@ def upsert_document(
)

if type == "SITEMAP":
filter_urls = metadata["filter_urls"].split(",")
loader = SitemapLoader(sitemap_url=url, filter_urls=filter_urls)
filter_urls: str = metadata["filter_urls"]
loader = SitemapLoader(sitemap_url=url, filter_urls=filter_urls.split(","))
documents = loader.load()
newDocuments = [
document.metadata.update({"namespace": document_id}) or document
Expand Down
10 changes: 5 additions & 5 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- AlterEnum
ALTER TYPE "DocumentType" ADD VALUE 'NOTION';
1 change: 1 addition & 0 deletions prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ enum DocumentType {
STRIPE
AIRTABLE
SITEMAP
NOTION
}

enum ToolType {
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ gitpython = "^3.1.32"
vulture = "^2.7"
airbyte-source-stripe = "^3.17.1"
pyairtable = "^2.0.0"
langchain = "^0.0.260"
metaphor-python = "^0.1.11"
langchain = "^0.0.261"


[build-system]
Expand Down
31 changes: 24 additions & 7 deletions ui/app/datasources/_components/applications.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import {
SimpleGrid,
Avatar,
Select,
Textarea,
} from "@chakra-ui/react";
import dayjs from "dayjs";
import { useForm } from "react-hook-form";
Expand Down Expand Up @@ -65,7 +66,7 @@ function DocumentRow({ id, name, createdAt, type, onDelete, onEdit }) {
await onDelete(id);

toast({
description: "Document deleted",
description: "Datasource deleted",
position: "top",
colorScheme: "gray",
});
Expand Down Expand Up @@ -188,7 +189,7 @@ export default function Applications({ data, session }) {
await api.deleteDocument({ id });

if (process.env.NEXT_PUBLIC_SEGMENT_WRITE_KEY) {
analytics.track("Deleted Document", { id });
analytics.track("Deleted Datasource", { id });
}

setData();
Expand All @@ -209,11 +210,11 @@ export default function Applications({ data, session }) {
await api.createDocument(payload);

if (process.env.NEXT_PUBLIC_SEGMENT_WRITE_KEY) {
analytics.track("Connected application", { ...payload });
analytics.track("Connected datasource", { ...payload });
}

toast({
description: "Application connected",
description: "Datasource connected",
position: "top",
colorScheme: "gray",
});
Expand Down Expand Up @@ -246,11 +247,11 @@ export default function Applications({ data, session }) {
await api.patchDocument(selectedDocument, payload);

if (process.env.NEXT_PUBLIC_SEGMENT_WRITE_KEY) {
analytics.track("Updated Application", { ...payload });
analytics.track("Updated Datasource", { ...payload });
}

toast({
description: "Application updated",
description: "Datasource updated",
position: "top",
colorScheme: "gray",
});
Expand Down Expand Up @@ -378,21 +379,37 @@ export default function Applications({ data, session }) {
</FormControl>
{!selectedDocument &&
selectedSource.inputs.map(
({ key, name, type, required, options, helpText }) => (
({
key,
name,
type,
required,
options,
placeholder,
helpText,
}) => (
<FormControl key={key} isRequired={required}>
<FormLabel>{name}</FormLabel>
{type === "input" && (
<Input
type="text"
placeholder={placeholder}
{...register(key, { required })}
/>
)}
{type === "date" && (
<Input
type="date"
placeholder={placeholder}
{...register(key, { required })}
/>
)}
{type === "textarea" && (
<Textarea
{...register(key, { required })}
placeholder={placeholder}
/>
)}
{type === "select" && <Select></Select>}
{helpText && (
<FormHelperText>{helpText}</FormHelperText>
Expand Down
24 changes: 24 additions & 0 deletions ui/lib/datasources.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,30 @@ export const APPLICATIONS = [
},
],
},
{
id: "NOTION",
name: "Notion",
logo: "./notion.png",
is_live: true,
inputs: [
{
key: "integration_token",
name: "Notion integration token",
placeholder: "",
helpText: "Enter your token here...",
type: "input",
required: true,
},
{
key: "page_ids",
name: "Page IDs",
placeholder: "",
helpText: "Enter the ID of the pages you like to add.",
type: "textarea",
required: true,
},
],
},
{
id: "AIRTABLE",
name: "Airtable",
Expand Down
Binary file added ui/public/notion.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

1 comment on commit bc5490b

@vercel
Copy link

@vercel vercel bot commented on bc5490b Aug 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.