Make slack periodic use the DB

This commit is contained in:
Weves
2023-05-16 17:45:06 -07:00
committed by Chris Weaver
parent 5ce5077833
commit 821df50fa9
8 changed files with 73 additions and 25 deletions

View File

@@ -3,12 +3,15 @@ from typing import cast
from danswer.configs.constants import DocumentSource from danswer.configs.constants import DocumentSource
from danswer.connectors.factory import build_connector from danswer.connectors.factory import build_connector
from danswer.connectors.factory import build_pull_connector
from danswer.connectors.models import InputType from danswer.connectors.models import InputType
from danswer.connectors.slack.config import get_pull_frequency from danswer.connectors.slack.config import get_pull_frequency
from danswer.connectors.slack.pull import PeriodicSlackLoader from danswer.connectors.slack.pull import PeriodicSlackLoader
from danswer.connectors.web.pull import WebLoader from danswer.connectors.web.pull import WebLoader
from danswer.db.index_attempt import fetch_index_attempts from danswer.db.index_attempt import fetch_index_attempts
from danswer.db.index_attempt import insert_index_attempt
from danswer.db.index_attempt import update_index_attempt from danswer.db.index_attempt import update_index_attempt
from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus from danswer.db.models import IndexingStatus
from danswer.dynamic_configs import get_dynamic_config_store from danswer.dynamic_configs import get_dynamic_config_store
from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.dynamic_configs.interface import ConfigNotFoundError
@@ -51,9 +54,21 @@ def run_update() -> None:
if last_pull is None or _check_should_run( if last_pull is None or _check_should_run(
current_time, last_pull, pull_frequency current_time, last_pull, pull_frequency
): ):
logger.info(f"Running slack pull from {last_pull or 0} to {current_time}") # TODO (chris): go back to only fetching messages that have changed
for doc_batch in PeriodicSlackLoader().load(last_pull or 0, current_time): # since the last pull. Not supported for now due to how we compute the
indexing_pipeline(doc_batch) # number of documents indexed for the admin dashboard (only look at latest)
logger.info("Scheduling periodic slack pull")
insert_index_attempt(
IndexAttempt(
source=DocumentSource.SLACK,
input_type=InputType.PULL,
status=IndexingStatus.NOT_STARTED,
connector_specific_config={},
)
)
# not 100% accurate, but the inaccuracy will result in more
# frequent pulling rather than less frequent, which is fine
# for now
dynamic_config_store.store(last_slack_pull_key, current_time) dynamic_config_store.store(last_slack_pull_key, current_time)
# TODO (chris): make this more efficient / in a single transaction to # TODO (chris): make this more efficient / in a single transaction to
@@ -79,9 +94,8 @@ def run_update() -> None:
try: try:
# TODO (chris): spawn processes to parallelize / take advantage of # TODO (chris): spawn processes to parallelize / take advantage of
# multiple cores + implement retries # multiple cores + implement retries
connector = build_connector( connector = build_pull_connector(
source=not_started_index_attempt.source, source=not_started_index_attempt.source,
input_type=InputType.PULL,
connector_specific_config=not_started_index_attempt.connector_specific_config, connector_specific_config=not_started_index_attempt.connector_specific_config,
) )

View File

@@ -1,3 +1,5 @@
import time
from collections.abc import Generator
from typing import Any from typing import Any
from danswer.configs.constants import DocumentSource from danswer.configs.constants import DocumentSource
@@ -5,11 +7,14 @@ from danswer.connectors.github.batch import BatchGithubLoader
from danswer.connectors.google_drive.batch import BatchGoogleDriveLoader from danswer.connectors.google_drive.batch import BatchGoogleDriveLoader
from danswer.connectors.interfaces import PullLoader from danswer.connectors.interfaces import PullLoader
from danswer.connectors.interfaces import RangePullLoader from danswer.connectors.interfaces import RangePullLoader
from danswer.connectors.models import Document
from danswer.connectors.models import InputType from danswer.connectors.models import InputType
from danswer.connectors.slack.batch import BatchSlackLoader from danswer.connectors.slack.batch import BatchSlackLoader
from danswer.connectors.slack.pull import PeriodicSlackLoader from danswer.connectors.slack.pull import PeriodicSlackLoader
from danswer.connectors.web.pull import WebLoader from danswer.connectors.web.pull import WebLoader
_NUM_SECONDS_IN_DAY = 86400
class ConnectorMissingException(Exception): class ConnectorMissingException(Exception):
pass pass
@@ -38,3 +43,23 @@ def build_connector(
raise ConnectorMissingException( raise ConnectorMissingException(
f"Connector not found for source={source}, input_type={input_type}" f"Connector not found for source={source}, input_type={input_type}"
) )
def build_pull_connector(
source: DocumentSource, connector_specific_config: dict[str, Any]
) -> PullLoader:
return _range_pull_to_pull(
build_connector(source, InputType.PULL, connector_specific_config)
)
def _range_pull_to_pull(range_pull_connector: RangePullLoader) -> PullLoader:
class _Connector(PullLoader):
def __init__(self) -> None:
self._connector = range_pull_connector
def load(self) -> Generator[list[Document], None, None]:
# adding some buffer to make sure we get all documents
return self._connector.load(0, time.time() + _NUM_SECONDS_IN_DAY)
return _Connector()

View File

@@ -1,7 +1,6 @@
import abc import abc
from collections.abc import Generator from collections.abc import Generator
from typing import Any from typing import Any
from typing import List
from danswer.connectors.models import Document from danswer.connectors.models import Document
@@ -12,7 +11,7 @@ SecondsSinceUnixEpoch = float
# TODO (chris): rename from Loader -> Connector # TODO (chris): rename from Loader -> Connector
class PullLoader: class PullLoader:
@abc.abstractmethod @abc.abstractmethod
def load(self) -> Generator[List[Document], None, None]: def load(self) -> Generator[list[Document], None, None]:
raise NotImplementedError raise NotImplementedError
@@ -20,11 +19,11 @@ class RangePullLoader:
@abc.abstractmethod @abc.abstractmethod
def load( def load(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> Generator[List[Document], None, None]: ) -> Generator[list[Document], None, None]:
raise NotImplementedError raise NotImplementedError
class PushLoader: class PushLoader:
@abc.abstractmethod @abc.abstractmethod
def load(self, event: Any) -> Generator[List[Document], None, None]: def load(self, event: Any) -> Generator[list[Document], None, None]:
raise NotImplementedError raise NotImplementedError

View File

@@ -2,10 +2,6 @@
import * as Yup from "yup"; import * as Yup from "yup";
import { IndexForm } from "@/components/admin/connectors/Form"; import { IndexForm } from "@/components/admin/connectors/Form";
import {
ConnectorStatus,
ConnectorStatusEnum,
} from "@/components/admin/connectors/ConnectorStatus";
import { GithubIcon } from "@/components/icons/icons"; import { GithubIcon } from "@/components/icons/icons";
import { TextFormField } from "@/components/admin/connectors/Field"; import { TextFormField } from "@/components/admin/connectors/Field";

View File

@@ -6,6 +6,7 @@ import useSWR, { useSWRConfig } from "swr";
import { SlackConfig } from "../../../../components/admin/connectors/types"; import { SlackConfig } from "../../../../components/admin/connectors/types";
import { LoadingAnimation } from "@/components/Loading"; import { LoadingAnimation } from "@/components/Loading";
import { InitialSetupForm } from "./InitialSetupForm"; import { InitialSetupForm } from "./InitialSetupForm";
import { useRouter } from "next/navigation";
const MainSection = () => { const MainSection = () => {
// TODO: add back in once this is ready // TODO: add back in once this is ready
@@ -13,6 +14,8 @@ const MainSection = () => {
// "/api/admin/connectors/web/index-attempt", // "/api/admin/connectors/web/index-attempt",
// fetcher // fetcher
// ); // );
const router = useRouter();
const { mutate } = useSWRConfig(); const { mutate } = useSWRConfig();
const { data, isLoading, error } = useSWR<SlackConfig>( const { data, isLoading, error } = useSWR<SlackConfig>(
"/api/admin/connectors/slack/config", "/api/admin/connectors/slack/config",

View File

@@ -12,7 +12,7 @@ import {
ListIndexingResponse, ListIndexingResponse,
} from "@/components/admin/connectors/types"; } from "@/components/admin/connectors/types";
import { getSourceMetadata } from "@/components/source"; import { getSourceMetadata } from "@/components/source";
import { CheckCircle } from "@phosphor-icons/react"; import { CheckCircle, XCircle } from "@phosphor-icons/react";
import { submitIndexRequest } from "@/components/admin/connectors/Form"; import { submitIndexRequest } from "@/components/admin/connectors/Form";
import { useState } from "react"; import { useState } from "react";
import { Popup } from "@/components/admin/connectors/Popup"; import { Popup } from "@/components/admin/connectors/Popup";
@@ -88,6 +88,25 @@ export default function Status() {
latestSuccessfulIndexAttemptsBySource.get( latestSuccessfulIndexAttemptsBySource.get(
getModifiedSource(indexAttempt) getModifiedSource(indexAttempt)
); );
let statusDisplay = (
<div className="text-gray-400">In Progress...</div>
);
if (indexAttempt.status === "success") {
statusDisplay = (
<div className="text-green-600 flex">
<CheckCircle className="my-auto mr-1" size="18" />
Success
</div>
);
} else if (indexAttempt.status === "failed") {
statusDisplay = (
<div className="text-red-600 flex">
<XCircle className="my-auto mr-1" size="18" />
Error
</div>
);
}
return { return {
indexed_at: indexed_at:
timeAgo(successfulIndexAttempt?.time_updated) || "-", timeAgo(successfulIndexAttempt?.time_updated) || "-",
@@ -108,15 +127,7 @@ export default function Status() {
</div> </div>
</a> </a>
), ),
status: status: statusDisplay,
indexAttempt.status === "success" ? (
<div className="text-green-600 flex">
<CheckCircle className="my-auto mr-1" size="18" />
Success
</div>
) : (
<div className="text-gray-400">In Progress...</div>
),
reindex: ( reindex: (
<button <button
className={ className={

View File

@@ -69,7 +69,7 @@ export const Header: React.FC<HeaderProps> = ({ user }) => {
} }
> >
{user.role === "admin" && ( {user.role === "admin" && (
<Link href="/admin/connectors/slack"> <Link href="/admin/indexing/status">
<div className="flex py-2 px-3 cursor-pointer hover:bg-gray-500 border-b border-gray-500"> <div className="flex py-2 px-3 cursor-pointer hover:bg-gray-500 border-b border-gray-500">
Connectors Connectors
</div> </div>

View File

@@ -8,7 +8,7 @@ export interface SlackConfig {
export interface IndexAttempt { export interface IndexAttempt {
connector_specific_config: { [key: string]: any }; connector_specific_config: { [key: string]: any };
status: "success" | "failure" | "in_progress" | "not_started"; status: "success" | "failed" | "in_progress" | "not_started";
source: ValidSources; source: ValidSources;
time_created: string; time_created: string;
time_updated: string; time_updated: string;