mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-29 11:12:02 +01:00
Make slack periodic use the DB
This commit is contained in:
parent
5ce5077833
commit
821df50fa9
@ -3,12 +3,15 @@ from typing import cast
|
||||
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.factory import build_connector
|
||||
from danswer.connectors.factory import build_pull_connector
|
||||
from danswer.connectors.models import InputType
|
||||
from danswer.connectors.slack.config import get_pull_frequency
|
||||
from danswer.connectors.slack.pull import PeriodicSlackLoader
|
||||
from danswer.connectors.web.pull import WebLoader
|
||||
from danswer.db.index_attempt import fetch_index_attempts
|
||||
from danswer.db.index_attempt import insert_index_attempt
|
||||
from danswer.db.index_attempt import update_index_attempt
|
||||
from danswer.db.models import IndexAttempt
|
||||
from danswer.db.models import IndexingStatus
|
||||
from danswer.dynamic_configs import get_dynamic_config_store
|
||||
from danswer.dynamic_configs.interface import ConfigNotFoundError
|
||||
@ -51,9 +54,21 @@ def run_update() -> None:
|
||||
if last_pull is None or _check_should_run(
|
||||
current_time, last_pull, pull_frequency
|
||||
):
|
||||
logger.info(f"Running slack pull from {last_pull or 0} to {current_time}")
|
||||
for doc_batch in PeriodicSlackLoader().load(last_pull or 0, current_time):
|
||||
indexing_pipeline(doc_batch)
|
||||
# TODO (chris): go back to only fetching messages that have changed
|
||||
# since the last pull. Not supported for now due to how we compute the
|
||||
# number of documents indexed for the admin dashboard (only look at latest)
|
||||
logger.info("Scheduling periodic slack pull")
|
||||
insert_index_attempt(
|
||||
IndexAttempt(
|
||||
source=DocumentSource.SLACK,
|
||||
input_type=InputType.PULL,
|
||||
status=IndexingStatus.NOT_STARTED,
|
||||
connector_specific_config={},
|
||||
)
|
||||
)
|
||||
# not 100% accurate, but the inaccuracy will result in more
|
||||
# frequent pulling rather than less frequent, which is fine
|
||||
# for now
|
||||
dynamic_config_store.store(last_slack_pull_key, current_time)
|
||||
|
||||
# TODO (chris): make this more efficient / in a single transaction to
|
||||
@ -79,9 +94,8 @@ def run_update() -> None:
|
||||
try:
|
||||
# TODO (chris): spawn processes to parallelize / take advantage of
|
||||
# multiple cores + implement retries
|
||||
connector = build_connector(
|
||||
connector = build_pull_connector(
|
||||
source=not_started_index_attempt.source,
|
||||
input_type=InputType.PULL,
|
||||
connector_specific_config=not_started_index_attempt.connector_specific_config,
|
||||
)
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
import time
|
||||
from collections.abc import Generator
|
||||
from typing import Any
|
||||
|
||||
from danswer.configs.constants import DocumentSource
|
||||
@ -5,11 +7,14 @@ from danswer.connectors.github.batch import BatchGithubLoader
|
||||
from danswer.connectors.google_drive.batch import BatchGoogleDriveLoader
|
||||
from danswer.connectors.interfaces import PullLoader
|
||||
from danswer.connectors.interfaces import RangePullLoader
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import InputType
|
||||
from danswer.connectors.slack.batch import BatchSlackLoader
|
||||
from danswer.connectors.slack.pull import PeriodicSlackLoader
|
||||
from danswer.connectors.web.pull import WebLoader
|
||||
|
||||
_NUM_SECONDS_IN_DAY = 86400
|
||||
|
||||
|
||||
class ConnectorMissingException(Exception):
|
||||
pass
|
||||
@ -38,3 +43,23 @@ def build_connector(
|
||||
raise ConnectorMissingException(
|
||||
f"Connector not found for source={source}, input_type={input_type}"
|
||||
)
|
||||
|
||||
|
||||
def build_pull_connector(
|
||||
source: DocumentSource, connector_specific_config: dict[str, Any]
|
||||
) -> PullLoader:
|
||||
return _range_pull_to_pull(
|
||||
build_connector(source, InputType.PULL, connector_specific_config)
|
||||
)
|
||||
|
||||
|
||||
def _range_pull_to_pull(range_pull_connector: RangePullLoader) -> PullLoader:
|
||||
class _Connector(PullLoader):
|
||||
def __init__(self) -> None:
|
||||
self._connector = range_pull_connector
|
||||
|
||||
def load(self) -> Generator[list[Document], None, None]:
|
||||
# adding some buffer to make sure we get all documents
|
||||
return self._connector.load(0, time.time() + _NUM_SECONDS_IN_DAY)
|
||||
|
||||
return _Connector()
|
||||
|
@ -1,7 +1,6 @@
|
||||
import abc
|
||||
from collections.abc import Generator
|
||||
from typing import Any
|
||||
from typing import List
|
||||
|
||||
from danswer.connectors.models import Document
|
||||
|
||||
@ -12,7 +11,7 @@ SecondsSinceUnixEpoch = float
|
||||
# TODO (chris): rename from Loader -> Connector
|
||||
class PullLoader:
|
||||
@abc.abstractmethod
|
||||
def load(self) -> Generator[List[Document], None, None]:
|
||||
def load(self) -> Generator[list[Document], None, None]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@ -20,11 +19,11 @@ class RangePullLoader:
|
||||
@abc.abstractmethod
|
||||
def load(
|
||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||
) -> Generator[List[Document], None, None]:
|
||||
) -> Generator[list[Document], None, None]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class PushLoader:
|
||||
@abc.abstractmethod
|
||||
def load(self, event: Any) -> Generator[List[Document], None, None]:
|
||||
def load(self, event: Any) -> Generator[list[Document], None, None]:
|
||||
raise NotImplementedError
|
||||
|
@ -2,10 +2,6 @@
|
||||
|
||||
import * as Yup from "yup";
|
||||
import { IndexForm } from "@/components/admin/connectors/Form";
|
||||
import {
|
||||
ConnectorStatus,
|
||||
ConnectorStatusEnum,
|
||||
} from "@/components/admin/connectors/ConnectorStatus";
|
||||
import { GithubIcon } from "@/components/icons/icons";
|
||||
import { TextFormField } from "@/components/admin/connectors/Field";
|
||||
|
||||
|
@ -6,6 +6,7 @@ import useSWR, { useSWRConfig } from "swr";
|
||||
import { SlackConfig } from "../../../../components/admin/connectors/types";
|
||||
import { LoadingAnimation } from "@/components/Loading";
|
||||
import { InitialSetupForm } from "./InitialSetupForm";
|
||||
import { useRouter } from "next/navigation";
|
||||
|
||||
const MainSection = () => {
|
||||
// TODO: add back in once this is ready
|
||||
@ -13,6 +14,8 @@ const MainSection = () => {
|
||||
// "/api/admin/connectors/web/index-attempt",
|
||||
// fetcher
|
||||
// );
|
||||
const router = useRouter();
|
||||
|
||||
const { mutate } = useSWRConfig();
|
||||
const { data, isLoading, error } = useSWR<SlackConfig>(
|
||||
"/api/admin/connectors/slack/config",
|
||||
|
@ -12,7 +12,7 @@ import {
|
||||
ListIndexingResponse,
|
||||
} from "@/components/admin/connectors/types";
|
||||
import { getSourceMetadata } from "@/components/source";
|
||||
import { CheckCircle } from "@phosphor-icons/react";
|
||||
import { CheckCircle, XCircle } from "@phosphor-icons/react";
|
||||
import { submitIndexRequest } from "@/components/admin/connectors/Form";
|
||||
import { useState } from "react";
|
||||
import { Popup } from "@/components/admin/connectors/Popup";
|
||||
@ -88,6 +88,25 @@ export default function Status() {
|
||||
latestSuccessfulIndexAttemptsBySource.get(
|
||||
getModifiedSource(indexAttempt)
|
||||
);
|
||||
|
||||
let statusDisplay = (
|
||||
<div className="text-gray-400">In Progress...</div>
|
||||
);
|
||||
if (indexAttempt.status === "success") {
|
||||
statusDisplay = (
|
||||
<div className="text-green-600 flex">
|
||||
<CheckCircle className="my-auto mr-1" size="18" />
|
||||
Success
|
||||
</div>
|
||||
);
|
||||
} else if (indexAttempt.status === "failed") {
|
||||
statusDisplay = (
|
||||
<div className="text-red-600 flex">
|
||||
<XCircle className="my-auto mr-1" size="18" />
|
||||
Error
|
||||
</div>
|
||||
);
|
||||
}
|
||||
return {
|
||||
indexed_at:
|
||||
timeAgo(successfulIndexAttempt?.time_updated) || "-",
|
||||
@ -108,15 +127,7 @@ export default function Status() {
|
||||
</div>
|
||||
</a>
|
||||
),
|
||||
status:
|
||||
indexAttempt.status === "success" ? (
|
||||
<div className="text-green-600 flex">
|
||||
<CheckCircle className="my-auto mr-1" size="18" />
|
||||
Success
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-gray-400">In Progress...</div>
|
||||
),
|
||||
status: statusDisplay,
|
||||
reindex: (
|
||||
<button
|
||||
className={
|
||||
|
@ -69,7 +69,7 @@ export const Header: React.FC<HeaderProps> = ({ user }) => {
|
||||
}
|
||||
>
|
||||
{user.role === "admin" && (
|
||||
<Link href="/admin/connectors/slack">
|
||||
<Link href="/admin/indexing/status">
|
||||
<div className="flex py-2 px-3 cursor-pointer hover:bg-gray-500 border-b border-gray-500">
|
||||
Connectors
|
||||
</div>
|
||||
|
@ -8,7 +8,7 @@ export interface SlackConfig {
|
||||
|
||||
export interface IndexAttempt {
|
||||
connector_specific_config: { [key: string]: any };
|
||||
status: "success" | "failure" | "in_progress" | "not_started";
|
||||
status: "success" | "failed" | "in_progress" | "not_started";
|
||||
source: ValidSources;
|
||||
time_created: string;
|
||||
time_updated: string;
|
||||
|
Loading…
x
Reference in New Issue
Block a user