mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-08-02 21:22:51 +02:00
@@ -27,3 +27,4 @@ class DocumentSource(str, Enum):
|
||||
SLAB = "slab"
|
||||
JIRA = "jira"
|
||||
FILE = "file"
|
||||
NOTION = "notion"
|
||||
|
@@ -8,6 +8,7 @@ from danswer.connectors.danswer_jira.connector import JiraConnector
|
||||
from danswer.connectors.file.connector import LocalFileConnector
|
||||
from danswer.connectors.github.connector import GithubConnector
|
||||
from danswer.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from danswer.connectors.notion.connector import NotionConnector
|
||||
from danswer.connectors.interfaces import BaseConnector
|
||||
from danswer.connectors.interfaces import EventConnector
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
@@ -42,6 +43,7 @@ def identify_connector_class(
|
||||
DocumentSource.CONFLUENCE: ConfluenceConnector,
|
||||
DocumentSource.JIRA: JiraConnector,
|
||||
DocumentSource.SLAB: SlabConnector,
|
||||
DocumentSource.NOTION: NotionConnector,
|
||||
}
|
||||
connector_by_source = connector_map.get(source, {})
|
||||
|
||||
|
0
backend/danswer/connectors/notion/__init__.py
Normal file
0
backend/danswer/connectors/notion/__init__.py
Normal file
229
backend/danswer/connectors/notion/connector.py
Normal file
229
backend/danswer/connectors/notion/connector.py
Normal file
@@ -0,0 +1,229 @@
|
||||
"""Notion reader."""
|
||||
import time
|
||||
from dataclasses import dataclass, fields
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.interfaces import PollConnector
|
||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotionPage:
|
||||
"""Represents a Notion Page object"""
|
||||
|
||||
id: str
|
||||
created_time: str
|
||||
last_edited_time: str
|
||||
archived: bool
|
||||
properties: Dict[str, Any]
|
||||
url: str
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
names = set([f.name for f in fields(self)])
|
||||
for k, v in kwargs.items():
|
||||
if k in names:
|
||||
setattr(self, k, v)
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotionSearchResponse:
|
||||
"""Represents the response from the Notion Search API"""
|
||||
|
||||
results: List[Dict[str, Any]]
|
||||
next_cursor: Optional[str]
|
||||
has_more: bool = False
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
names = set([f.name for f in fields(self)])
|
||||
for k, v in kwargs.items():
|
||||
if k in names:
|
||||
setattr(self, k, v)
|
||||
|
||||
|
||||
# TODO - Add the ability to optionally limit to specific Notion databases
|
||||
class NotionConnector(LoadConnector, PollConnector):
|
||||
"""Notion Page connector that reads all Notion pages
|
||||
this integration has been granted access to.
|
||||
|
||||
Arguments:
|
||||
batch_size (int): Number of objects to index in a batch
|
||||
"""
|
||||
|
||||
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
|
||||
"""Initialize with parameters."""
|
||||
self.batch_size = batch_size
|
||||
self.headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Notion-Version": "2022-06-28",
|
||||
}
|
||||
|
||||
def _read_blocks(self, block_id: str, num_tabs: int = 0) -> str:
|
||||
"""Reads blocks for a page"""
|
||||
done = False
|
||||
result_lines_arr = []
|
||||
cur_block_id = block_id
|
||||
while not done:
|
||||
block_url = f"https://api.notion.com/v1/blocks/{cur_block_id}/children"
|
||||
query_dict: Dict[str, Any] = {}
|
||||
|
||||
res = requests.request(
|
||||
"GET", block_url, headers=self.headers, json=query_dict
|
||||
)
|
||||
data = res.json()
|
||||
|
||||
for result in data["results"]:
|
||||
result_type = result["type"]
|
||||
result_obj = result[result_type]
|
||||
|
||||
cur_result_text_arr = []
|
||||
if "rich_text" in result_obj:
|
||||
for rich_text in result_obj["rich_text"]:
|
||||
# skip if doesn't have text object
|
||||
if "text" in rich_text:
|
||||
text = rich_text["text"]["content"]
|
||||
prefix = "\t" * num_tabs
|
||||
cur_result_text_arr.append(prefix + text)
|
||||
|
||||
result_block_id = result["id"]
|
||||
has_children = result["has_children"]
|
||||
if has_children:
|
||||
children_text = self._read_blocks(
|
||||
result_block_id, num_tabs=num_tabs + 1
|
||||
)
|
||||
cur_result_text_arr.append(children_text)
|
||||
|
||||
cur_result_text = "\n".join(cur_result_text_arr)
|
||||
result_lines_arr.append(cur_result_text)
|
||||
|
||||
if data["next_cursor"] is None:
|
||||
done = True
|
||||
break
|
||||
else:
|
||||
cur_block_id = data["next_cursor"]
|
||||
|
||||
result_lines = "\n".join(result_lines_arr)
|
||||
return result_lines
|
||||
|
||||
def _read_page_title(self, page: NotionPage) -> str:
|
||||
"""Extracts the title from a Notion page"""
|
||||
page_title = None
|
||||
for _, prop in page.properties.items():
|
||||
if prop["type"] == "title" and len(prop["title"]) > 0:
|
||||
page_title = " ".join([t["plain_text"] for t in prop["title"]]).strip()
|
||||
break
|
||||
if page_title is None:
|
||||
page_title = f"Untitled Page [{page.id}]"
|
||||
return page_title
|
||||
|
||||
def _read_pages(self, pages: List[NotionPage]) -> List[Document]:
|
||||
"""Reads pages for rich text content and generates Documents"""
|
||||
docs_batch = []
|
||||
for page in pages:
|
||||
page_text = self._read_blocks(page.id)
|
||||
page_title = self._read_page_title(page)
|
||||
docs_batch.append(
|
||||
Document(
|
||||
id=page.id,
|
||||
sections=[Section(link=page.url, text=page_text)],
|
||||
source=DocumentSource.NOTION,
|
||||
semantic_identifier=page_title,
|
||||
metadata={},
|
||||
)
|
||||
)
|
||||
return docs_batch
|
||||
|
||||
def _search_notion(self, query_dict: Dict[str, Any]) -> NotionSearchResponse:
|
||||
"""Search for pages from a Notion database."""
|
||||
res = requests.post(
|
||||
"https://api.notion.com/v1/search",
|
||||
headers=self.headers,
|
||||
json=query_dict,
|
||||
)
|
||||
res.raise_for_status()
|
||||
return NotionSearchResponse(**res.json())
|
||||
|
||||
def _filter_pages_by_time(
|
||||
self,
|
||||
pages: List[Dict[str, Any]],
|
||||
start: SecondsSinceUnixEpoch,
|
||||
end: SecondsSinceUnixEpoch,
|
||||
filter_field: str = "last_edited_time",
|
||||
) -> List[NotionPage]:
|
||||
"""A helper function to filter out pages outside of a time
|
||||
range. This functionality doesn't yet exist in the Notion Search API,
|
||||
but when it does, this approach can be deprecated.
|
||||
|
||||
Arguments:
|
||||
pages (List[Dict]) - Pages to filter
|
||||
start (float) - start epoch time to filter from
|
||||
end (float) - end epoch time to filter to
|
||||
filter_field (str) - the attribute on the page to apply the filter
|
||||
"""
|
||||
filtered_pages = []
|
||||
for page in pages:
|
||||
compare_time = time.mktime(
|
||||
time.strptime(page[filter_field], "%Y-%m-%dT%H:%M:%S.000Z")
|
||||
)
|
||||
if compare_time <= end or compare_time > start:
|
||||
filtered_pages += [NotionPage(**page)]
|
||||
return filtered_pages
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
"""Applies integration token to headers"""
|
||||
self.headers[
|
||||
"Authorization"
|
||||
] = f'Bearer {credentials["notion_integration_token"]}'
|
||||
return None
|
||||
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
"""Loads all page data from a Notion workspace.
|
||||
|
||||
Returns:
|
||||
List[Document]: List of documents.
|
||||
"""
|
||||
query_dict = {
|
||||
"filter": {"property": "object", "value": "page"},
|
||||
"page_size": self.batch_size,
|
||||
}
|
||||
while True:
|
||||
db_res = self._search_notion(query_dict)
|
||||
pages = [NotionPage(**page) for page in db_res.results]
|
||||
yield self._read_pages(pages)
|
||||
if db_res.has_more:
|
||||
query_dict["start_cursor"] = db_res.next_cursor
|
||||
else:
|
||||
break
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||
) -> GenerateDocumentsOutput:
|
||||
"""Uses the Notion search API to fetch updated pages
|
||||
within a time period.
|
||||
Unfortunately the search API doesn't yet support filtering by times,
|
||||
so until they add that, we're just going to page through results until,
|
||||
we reach ones that are older than our search criteria.
|
||||
"""
|
||||
query_dict = {
|
||||
"page_size": self.batch_size,
|
||||
"sort": {"timestamp": "last_edited_time", "direction": "descending"},
|
||||
"filter": {"property": "object", "value": "page"},
|
||||
}
|
||||
while True:
|
||||
db_res = self._search_notion(query_dict)
|
||||
pages = self._filter_pages_by_time(
|
||||
db_res.results, start, end, filter_field="last_edited_time"
|
||||
)
|
||||
if len(pages) > 0:
|
||||
yield self._read_pages(pages)
|
||||
if db_res.has_more:
|
||||
query_dict["start_cursor"] = db_res.next_cursor
|
||||
else:
|
||||
break
|
229
web/src/app/admin/connectors/notion/page.tsx
Normal file
229
web/src/app/admin/connectors/notion/page.tsx
Normal file
@@ -0,0 +1,229 @@
|
||||
"use client";
|
||||
|
||||
import * as Yup from "yup";
|
||||
import { NotionIcon, TrashIcon } from "@/components/icons/icons";
|
||||
import { TextFormField } from "@/components/admin/connectors/Field";
|
||||
import { HealthCheckBanner } from "@/components/health/healthcheck";
|
||||
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
|
||||
import {
|
||||
NotionCredentialJson,
|
||||
NotionConfig,
|
||||
Credential,
|
||||
ConnectorIndexingStatus,
|
||||
} from "@/lib/types";
|
||||
import useSWR, { useSWRConfig } from "swr";
|
||||
import { fetcher } from "@/lib/fetcher";
|
||||
import { LoadingAnimation } from "@/components/Loading";
|
||||
import { deleteCredential, linkCredential } from "@/lib/credential";
|
||||
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
|
||||
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
|
||||
import { usePopup } from "@/components/admin/connectors/Popup";
|
||||
|
||||
const Main = () => {
|
||||
const { popup, setPopup } = usePopup();
|
||||
|
||||
const { mutate } = useSWRConfig();
|
||||
const {
|
||||
data: connectorIndexingStatuses,
|
||||
isLoading: isConnectorIndexingStatusesLoading,
|
||||
error: isConnectorIndexingStatusesError,
|
||||
} = useSWR<ConnectorIndexingStatus<any>[]>(
|
||||
"/api/manage/admin/connector/indexing-status",
|
||||
fetcher
|
||||
);
|
||||
const {
|
||||
data: credentialsData,
|
||||
isLoading: isCredentialsLoading,
|
||||
error: isCredentialsError,
|
||||
} = useSWR<Credential<NotionCredentialJson>[]>(
|
||||
"/api/manage/credential",
|
||||
fetcher
|
||||
);
|
||||
|
||||
if (
|
||||
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
|
||||
(!credentialsData && isCredentialsLoading)
|
||||
) {
|
||||
return <LoadingAnimation text="Loading" />;
|
||||
}
|
||||
|
||||
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
|
||||
return <div>Failed to load connectors</div>;
|
||||
}
|
||||
|
||||
if (isCredentialsError || !credentialsData) {
|
||||
return <div>Failed to load credentials</div>;
|
||||
}
|
||||
|
||||
const notionConnectorIndexingStatuses = connectorIndexingStatuses.filter(
|
||||
(connectorIndexingStatus) =>
|
||||
connectorIndexingStatus.connector.source === "notion"
|
||||
);
|
||||
const notionCredential = credentialsData.filter(
|
||||
(credential) => credential.credential_json?.notion_integration_token
|
||||
)[0];
|
||||
|
||||
return (
|
||||
<>
|
||||
{popup}
|
||||
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
|
||||
Step 1: Provide your authorization details
|
||||
</h2>
|
||||
|
||||
{notionCredential ? (
|
||||
<>
|
||||
<div className="flex mb-1 text-sm">
|
||||
<p className="my-auto">Existing Integration Token: </p>
|
||||
<p className="ml-1 italic my-auto max-w-md">
|
||||
{notionCredential.credential_json?.notion_integration_token}
|
||||
</p>
|
||||
<button
|
||||
className="ml-1 hover:bg-gray-700 rounded-full p-1"
|
||||
onClick={async () => {
|
||||
if (notionConnectorIndexingStatuses.length > 0) {
|
||||
setPopup({
|
||||
type: "error",
|
||||
message:
|
||||
"Must delete all connectors before deleting credentials",
|
||||
});
|
||||
return;
|
||||
}
|
||||
await deleteCredential(notionCredential.id);
|
||||
mutate("/api/manage/credential");
|
||||
}}
|
||||
>
|
||||
<TrashIcon />
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<p className="text-sm">
|
||||
To get started you'll need to create an internal integration in
|
||||
Notion for Danswer. Follow the instructions in the
|
||||
<a
|
||||
href="https://developers.notion.com/docs/create-a-notion-integration"
|
||||
target="_blank"
|
||||
>
|
||||
Notion Developer Documentation
|
||||
</a>
|
||||
on the Notion website, to create a new integration. Once
|
||||
you've created an integration, copy the integration secret
|
||||
token and paste it below. Follow the remaining instructions on the
|
||||
Notion docs to allow Danswer to read Notion Databases and Pages
|
||||
using the new integration.
|
||||
</p>
|
||||
<div className="border-solid border-gray-600 border rounded-md p-6 mt-2 mb-4">
|
||||
<CredentialForm<NotionCredentialJson>
|
||||
formBody={
|
||||
<TextFormField
|
||||
name="notion_integration_token"
|
||||
label="Integration Token:"
|
||||
type="password"
|
||||
/>
|
||||
}
|
||||
validationSchema={Yup.object().shape({
|
||||
notion_integration_token: Yup.string().required(
|
||||
"Please enter the Notion Integration token for the Danswer integration."
|
||||
),
|
||||
})}
|
||||
initialValues={{
|
||||
notion_integration_token: "",
|
||||
}}
|
||||
onSubmit={(isSuccess) => {
|
||||
if (isSuccess) {
|
||||
mutate("/api/manage/credential");
|
||||
mutate("/api/manage/admin/connector/indexing-status");
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{notionConnectorIndexingStatuses.length > 0 && (
|
||||
<>
|
||||
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
|
||||
Notion indexing status
|
||||
</h2>
|
||||
<p className="text-sm mb-2">
|
||||
The latest page updates are fetched from Notion every 10 minutes.
|
||||
</p>
|
||||
<div className="mb-2">
|
||||
<ConnectorsTable<NotionConfig, NotionCredentialJson>
|
||||
connectorIndexingStatuses={notionConnectorIndexingStatuses}
|
||||
liveCredential={notionCredential}
|
||||
getCredential={(credential) => {
|
||||
return (
|
||||
<div>
|
||||
<p>{credential.credential_json.notion_integration_token}</p>
|
||||
</div>
|
||||
);
|
||||
}}
|
||||
onCredentialLink={async (connectorId) => {
|
||||
if (notionCredential) {
|
||||
await linkCredential(connectorId, notionCredential.id);
|
||||
mutate("/api/manage/admin/connector/indexing-status");
|
||||
}
|
||||
}}
|
||||
onUpdate={() =>
|
||||
mutate("/api/manage/admin/connector/indexing-status")
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{notionCredential && notionConnectorIndexingStatuses.length === 0 && (
|
||||
<>
|
||||
<div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
|
||||
<h2 className="font-bold mb-3">Create Connection</h2>
|
||||
<p className="text-sm mb-4">
|
||||
Press connect below to start the connection to Notion.
|
||||
</p>
|
||||
<ConnectorForm<NotionConfig>
|
||||
nameBuilder={(values) => `NotionConnector`}
|
||||
source="notion"
|
||||
inputType="poll"
|
||||
formBody={<></>}
|
||||
validationSchema={Yup.object().shape({})}
|
||||
initialValues={{}}
|
||||
refreshFreq={10 * 60} // 10 minutes
|
||||
onSubmit={async (isSuccess, responseJson) => {
|
||||
if (isSuccess && responseJson) {
|
||||
await linkCredential(responseJson.id, notionCredential.id);
|
||||
mutate("/api/manage/admin/connector/indexing-status");
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{!notionCredential && (
|
||||
<>
|
||||
<p className="text-sm mb-4">
|
||||
Please provide your integration details in Step 1 first! Once done
|
||||
with that, you'll be able to start the connection then see
|
||||
indexing status.
|
||||
</p>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default function Page() {
|
||||
return (
|
||||
<div className="mx-auto container">
|
||||
<div className="mb-4">
|
||||
<HealthCheckBanner />
|
||||
</div>
|
||||
<div className="border-solid border-gray-600 border-b mb-4 pb-2 flex">
|
||||
<NotionIcon size="32" />
|
||||
<h1 className="text-3xl font-bold pl-2">Notion</h1>
|
||||
</div>
|
||||
<Main />
|
||||
</div>
|
||||
);
|
||||
}
|
@@ -12,6 +12,7 @@ import {
|
||||
FileIcon,
|
||||
JiraIcon,
|
||||
SlabIcon,
|
||||
NotionIcon,
|
||||
} from "@/components/icons/icons";
|
||||
import { DISABLE_AUTH } from "@/lib/constants";
|
||||
import { getCurrentUserSS } from "@/lib/userSS";
|
||||
@@ -138,6 +139,15 @@ export default async function AdminLayout({
|
||||
),
|
||||
link: "/admin/connectors/file",
|
||||
},
|
||||
{
|
||||
name: (
|
||||
<div className="flex">
|
||||
<NotionIcon size="16" />
|
||||
<div className="ml-1">Notion</div>
|
||||
</div>
|
||||
),
|
||||
link: "/admin/connectors/notion",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@@ -18,6 +18,7 @@ import {
|
||||
SiGithub,
|
||||
SiGoogledrive,
|
||||
SiJira,
|
||||
SiNotion,
|
||||
SiSlack,
|
||||
} from "react-icons/si";
|
||||
import { FaFile, FaGlobe } from "react-icons/fa";
|
||||
@@ -160,3 +161,10 @@ export const BrainIcon = ({
|
||||
}: IconProps) => {
|
||||
return <Brain size={size} className={className} />;
|
||||
};
|
||||
|
||||
export const NotionIcon = ({
|
||||
size = "16",
|
||||
className = defaultTailwindCSS,
|
||||
}: IconProps) => {
|
||||
return <SiNotion size={size} className={className} />;
|
||||
};
|
||||
|
@@ -14,6 +14,7 @@ const sources: Source[] = [
|
||||
{ displayName: "Github PRs", internalName: "github" },
|
||||
{ displayName: "Web", internalName: "web" },
|
||||
{ displayName: "File", internalName: "file" },
|
||||
{ displayName: "Notion", internalName: "notion" },
|
||||
];
|
||||
|
||||
interface SourceSelectorProps {
|
||||
|
@@ -7,6 +7,7 @@ import {
|
||||
GlobeIcon,
|
||||
GoogleDriveIcon,
|
||||
JiraIcon,
|
||||
NotionIcon,
|
||||
SlabIcon,
|
||||
SlackIcon,
|
||||
} from "./icons/icons";
|
||||
@@ -73,6 +74,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
|
||||
displayName: "Slab",
|
||||
adminPageLink: "/admin/connectors/slab",
|
||||
};
|
||||
case "notion":
|
||||
return {
|
||||
icon: NotionIcon,
|
||||
displayName: "Notion",
|
||||
adminPageLink: "/admin/connectors/notion",
|
||||
};
|
||||
default:
|
||||
throw new Error("Invalid source type");
|
||||
}
|
||||
|
@@ -16,7 +16,8 @@ export type ValidSources =
|
||||
| "confluence"
|
||||
| "jira"
|
||||
| "slab"
|
||||
| "file";
|
||||
| "file"
|
||||
| "notion";
|
||||
export type ValidInputTypes = "load_state" | "poll" | "event";
|
||||
|
||||
// CONNECTORS
|
||||
@@ -71,6 +72,8 @@ export interface FileConfig {
|
||||
file_locations: string[];
|
||||
}
|
||||
|
||||
export interface NotionConfig {}
|
||||
|
||||
export interface ConnectorIndexingStatus<T> {
|
||||
connector: Connector<T>;
|
||||
public_doc: boolean;
|
||||
@@ -124,3 +127,7 @@ export interface GoogleDriveCredentialJson {
|
||||
export interface SlabCredentialJson {
|
||||
slab_bot_token: string;
|
||||
}
|
||||
|
||||
export interface NotionCredentialJson {
|
||||
notion_integration_token: string;
|
||||
}
|
||||
|
Reference in New Issue
Block a user