mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-30 17:51:40 +02:00
PDF metadata + list defaults (#2341)
* validate web list * update pdf extraction of metadat * remove pdf + log * stricter type enforcing * fix up indexing widths * minor formatting * add list case * check for empty metadata
This commit is contained in:
@ -201,22 +201,28 @@ def read_pdf_file(
|
|||||||
decrypt_success = pdf_reader.decrypt(pdf_pass) != 0
|
decrypt_success = pdf_reader.decrypt(pdf_pass) != 0
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.error("Unable to decrypt pdf")
|
logger.error("Unable to decrypt pdf")
|
||||||
else:
|
|
||||||
logger.warning("No Password available to to decrypt pdf")
|
|
||||||
|
|
||||||
if not decrypt_success:
|
if not decrypt_success:
|
||||||
# By user request, keep files that are unreadable just so they
|
# By user request, keep files that are unreadable just so they
|
||||||
# can be discoverable by title.
|
# can be discoverable by title.
|
||||||
return "", metadata
|
return "", metadata
|
||||||
|
else:
|
||||||
|
logger.warning("No Password available to to decrypt pdf")
|
||||||
|
|
||||||
# Extract metadata from the PDF, removing leading '/' from keys if present
|
# Extract metadata from the PDF, removing leading '/' from keys if present
|
||||||
# This standardizes the metadata keys for consistency
|
# This standardizes the metadata keys for consistency
|
||||||
metadata = {}
|
metadata = {}
|
||||||
if pdf_reader.metadata is not None:
|
if pdf_reader.metadata is not None:
|
||||||
metadata = {
|
for key, value in pdf_reader.metadata.items():
|
||||||
k[1:] if k.startswith("/") else k: v
|
clean_key = key.lstrip("/")
|
||||||
for k, v in pdf_reader.metadata.items()
|
if isinstance(value, str) and value.strip():
|
||||||
}
|
metadata[clean_key] = value
|
||||||
|
|
||||||
|
elif isinstance(value, list) and all(
|
||||||
|
isinstance(item, str) for item in value
|
||||||
|
):
|
||||||
|
metadata[clean_key] = ", ".join(value)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
TEXT_SECTION_SEPARATOR.join(
|
TEXT_SECTION_SEPARATOR.join(
|
||||||
page.extract_text() for page in pdf_reader.pages
|
page.extract_text() for page in pdf_reader.pages
|
||||||
|
@ -95,7 +95,7 @@ export default function AddConnector({
|
|||||||
...configuration.values.reduce(
|
...configuration.values.reduce(
|
||||||
(acc, field) => {
|
(acc, field) => {
|
||||||
if (field.type === "select") {
|
if (field.type === "select") {
|
||||||
acc[field.name] = field.options ? field.options[field.default!]! : "";
|
acc[field.name] = null;
|
||||||
} else if (field.type === "list") {
|
} else if (field.type === "list") {
|
||||||
acc[field.name] = field.default || [];
|
acc[field.name] = field.default || [];
|
||||||
} else if (field.type === "checkbox") {
|
} else if (field.type === "checkbox") {
|
||||||
|
@ -25,9 +25,10 @@ export default function Sidebar() {
|
|||||||
];
|
];
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex bg-background text-default">
|
<div className="flex flex-none w-[250px] bg-background text-default">
|
||||||
<div
|
<div
|
||||||
className={`flex-none
|
className={`
|
||||||
|
fixed
|
||||||
bg-background-100
|
bg-background-100
|
||||||
h-screen
|
h-screen
|
||||||
transition-all
|
transition-all
|
||||||
|
@ -34,16 +34,6 @@ import { TOGGLED_CONNECTORS_COOKIE_NAME } from "@/lib/constants";
|
|||||||
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
|
import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
|
||||||
import { ConnectorCredentialPairStatus } from "../../connector/[ccPairId]/types";
|
import { ConnectorCredentialPairStatus } from "../../connector/[ccPairId]/types";
|
||||||
|
|
||||||
const columnWidths = {
|
|
||||||
first: "20%",
|
|
||||||
second: "15%",
|
|
||||||
third: "15%",
|
|
||||||
fourth: "15%",
|
|
||||||
fifth: "15%",
|
|
||||||
sixth: "15%",
|
|
||||||
seventh: "5%",
|
|
||||||
};
|
|
||||||
|
|
||||||
function SummaryRow({
|
function SummaryRow({
|
||||||
source,
|
source,
|
||||||
summary,
|
summary,
|
||||||
@ -61,9 +51,9 @@ function SummaryRow({
|
|||||||
return (
|
return (
|
||||||
<TableRow
|
<TableRow
|
||||||
onClick={onToggle}
|
onClick={onToggle}
|
||||||
className="border-border bg-white rounded-sm !border cursor-pointer"
|
className="border-border bg-white py-4 rounded-sm !border cursor-pointer"
|
||||||
>
|
>
|
||||||
<TableCell className={`py-4 w-[${columnWidths.first}]`}>
|
<TableCell>
|
||||||
<div className="text-xl flex items-center truncate ellipsis gap-x-2 font-semibold">
|
<div className="text-xl flex items-center truncate ellipsis gap-x-2 font-semibold">
|
||||||
<div className="cursor-pointer">
|
<div className="cursor-pointer">
|
||||||
{isOpen ? (
|
{isOpen ? (
|
||||||
@ -77,12 +67,12 @@ function SummaryRow({
|
|||||||
</div>
|
</div>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
|
|
||||||
<TableCell className={`py-4 w-[${columnWidths.first}]`}>
|
<TableCell>
|
||||||
<div className="text-sm text-gray-500">Total Connectors</div>
|
<div className="text-sm text-gray-500">Total Connectors</div>
|
||||||
<div className="text-xl font-semibold">{summary.count}</div>
|
<div className="text-xl font-semibold">{summary.count}</div>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
|
|
||||||
<TableCell className={` py-4 w-[${columnWidths.second}]`}>
|
<TableCell>
|
||||||
<div className="text-sm text-gray-500">Active Connectors</div>
|
<div className="text-sm text-gray-500">Active Connectors</div>
|
||||||
<Tooltip
|
<Tooltip
|
||||||
content={`${summary.active} out of ${summary.count} connectors are active`}
|
content={`${summary.active} out of ${summary.count} connectors are active`}
|
||||||
@ -102,7 +92,7 @@ function SummaryRow({
|
|||||||
</TableCell>
|
</TableCell>
|
||||||
|
|
||||||
{isPaidEnterpriseFeaturesEnabled && (
|
{isPaidEnterpriseFeaturesEnabled && (
|
||||||
<TableCell className={`py-4 w-[${columnWidths.fourth}]`}>
|
<TableCell>
|
||||||
<div className="text-sm text-gray-500">Public Connectors</div>
|
<div className="text-sm text-gray-500">Public Connectors</div>
|
||||||
<p className="flex text-xl mx-auto font-semibold items-center text-lg mt-1">
|
<p className="flex text-xl mx-auto font-semibold items-center text-lg mt-1">
|
||||||
{summary.public}/{summary.count}
|
{summary.public}/{summary.count}
|
||||||
@ -110,14 +100,14 @@ function SummaryRow({
|
|||||||
</TableCell>
|
</TableCell>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
<TableCell className={`py-4 w-[${columnWidths.fifth}]`}>
|
<TableCell>
|
||||||
<div className="text-sm text-gray-500">Total Docs Indexed</div>
|
<div className="text-sm text-gray-500">Total Docs Indexed</div>
|
||||||
<div className="text-xl font-semibold">
|
<div className="text-xl font-semibold">
|
||||||
{summary.totalDocsIndexed.toLocaleString()}
|
{summary.totalDocsIndexed.toLocaleString()}
|
||||||
</div>
|
</div>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
|
|
||||||
<TableCell className={`w-[${columnWidths.sixth}]`}>
|
<TableCell>
|
||||||
<div className="text-sm text-gray-500">Errors</div>
|
<div className="text-sm text-gray-500">Errors</div>
|
||||||
|
|
||||||
<div className="flex items-center text-lg gap-x-1 font-semibold">
|
<div className="flex items-center text-lg gap-x-1 font-semibold">
|
||||||
@ -126,7 +116,7 @@ function SummaryRow({
|
|||||||
</div>
|
</div>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
|
|
||||||
<TableCell className={`w-[${columnWidths.seventh}]`}></TableCell>
|
<TableCell />
|
||||||
</TableRow>
|
</TableRow>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -231,19 +221,17 @@ function ConnectorRow({
|
|||||||
router.push(`/admin/connector/${ccPairsIndexingStatus.cc_pair_id}`);
|
router.push(`/admin/connector/${ccPairsIndexingStatus.cc_pair_id}`);
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<TableCell className={`!pr-0 w-[${columnWidths.first}]`}>
|
<TableCell className="!w-[300px]">
|
||||||
<p className="w-[100px] xl:w-[200px] inline-block ellipsis truncate">
|
<p className="w-[200px] xl:w-[400px] inline-block ellipsis truncate">
|
||||||
{ccPairsIndexingStatus.name}
|
{ccPairsIndexingStatus.name}
|
||||||
</p>
|
</p>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell className={` w-[${columnWidths.fifth}]`}>
|
<TableCell>
|
||||||
{timeAgo(ccPairsIndexingStatus?.last_success) || "-"}
|
{timeAgo(ccPairsIndexingStatus?.last_success) || "-"}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell className={`w-[${columnWidths.third}]`}>
|
<TableCell>{getActivityBadge()}</TableCell>
|
||||||
{getActivityBadge()}
|
|
||||||
</TableCell>
|
|
||||||
{isPaidEnterpriseFeaturesEnabled && (
|
{isPaidEnterpriseFeaturesEnabled && (
|
||||||
<TableCell className={`w-[${columnWidths.fourth}]`}>
|
<TableCell>
|
||||||
{ccPairsIndexingStatus.public_doc ? (
|
{ccPairsIndexingStatus.public_doc ? (
|
||||||
<Badge
|
<Badge
|
||||||
size="md"
|
size="md"
|
||||||
@ -259,17 +247,15 @@ function ConnectorRow({
|
|||||||
)}
|
)}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
)}
|
)}
|
||||||
<TableCell className={`w-[${columnWidths.sixth}]`}>
|
<TableCell>{ccPairsIndexingStatus.docs_indexed}</TableCell>
|
||||||
{ccPairsIndexingStatus.docs_indexed}
|
<TableCell>
|
||||||
</TableCell>
|
|
||||||
<TableCell className={`w-[${columnWidths.second}]`}>
|
|
||||||
<IndexAttemptStatus
|
<IndexAttemptStatus
|
||||||
status={ccPairsIndexingStatus.last_finished_status || null}
|
status={ccPairsIndexingStatus.last_finished_status || null}
|
||||||
errorMsg={ccPairsIndexingStatus?.latest_index_attempt?.error_msg}
|
errorMsg={ccPairsIndexingStatus?.latest_index_attempt?.error_msg}
|
||||||
size="xs"
|
size="xs"
|
||||||
/>
|
/>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell className={`w-[${columnWidths.seventh}]`}>
|
<TableCell>
|
||||||
{isEditable && (
|
{isEditable && (
|
||||||
<CustomTooltip content="Manage Connector">
|
<CustomTooltip content="Manage Connector">
|
||||||
<FiSettings
|
<FiSettings
|
||||||
@ -447,23 +433,22 @@ export function CCPairIndexingStatusTable({
|
|||||||
}}
|
}}
|
||||||
isEditable={false}
|
isEditable={false}
|
||||||
/>
|
/>
|
||||||
<div className="-mb-10" />
|
<div className="flex items-center w-0 mt-4 gap-x-2">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
ref={searchInputRef}
|
||||||
|
placeholder="Search connectors..."
|
||||||
|
value={searchTerm}
|
||||||
|
onChange={(e) => setSearchTerm(e.target.value)}
|
||||||
|
className="ml-1 w-96 h-9 flex-none rounded-md border border-border bg-background-50 px-3 py-1 text-sm shadow-sm transition-colors placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Button className="h-9" onClick={() => toggleSources()}>
|
||||||
|
{!shouldExpand ? "Collapse All" : "Expand All"}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
|
||||||
<TableBody>
|
<TableBody>
|
||||||
<div className="flex items-center mt-4 gap-x-2">
|
|
||||||
<input
|
|
||||||
type="text"
|
|
||||||
ref={searchInputRef}
|
|
||||||
placeholder="Search connectors..."
|
|
||||||
value={searchTerm}
|
|
||||||
onChange={(e) => setSearchTerm(e.target.value)}
|
|
||||||
className="ml-1 w-96 h-9 flex-none rounded-md border border-border bg-background-50 px-3 py-1 text-sm shadow-sm transition-colors placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring"
|
|
||||||
/>
|
|
||||||
|
|
||||||
<Button className="h-9" onClick={() => toggleSources()}>
|
|
||||||
{!shouldExpand ? "Collapse All" : "Expand All"}
|
|
||||||
</Button>
|
|
||||||
</div>
|
|
||||||
{sortedSources
|
{sortedSources
|
||||||
.filter(
|
.filter(
|
||||||
(source) =>
|
(source) =>
|
||||||
@ -494,41 +479,15 @@ export function CCPairIndexingStatusTable({
|
|||||||
{connectorsToggled[source] && (
|
{connectorsToggled[source] && (
|
||||||
<>
|
<>
|
||||||
<TableRow className="border border-border">
|
<TableRow className="border border-border">
|
||||||
<TableHeaderCell
|
<TableHeaderCell>Name</TableHeaderCell>
|
||||||
className={`w-[${columnWidths.first}]`}
|
<TableHeaderCell>Last Indexed</TableHeaderCell>
|
||||||
>
|
<TableHeaderCell>Activity</TableHeaderCell>
|
||||||
Name
|
|
||||||
</TableHeaderCell>
|
|
||||||
<TableHeaderCell
|
|
||||||
className={`w-[${columnWidths.fifth}]`}
|
|
||||||
>
|
|
||||||
Last Indexed
|
|
||||||
</TableHeaderCell>
|
|
||||||
<TableHeaderCell
|
|
||||||
className={`w-[${columnWidths.second}]`}
|
|
||||||
>
|
|
||||||
Activity
|
|
||||||
</TableHeaderCell>
|
|
||||||
{isPaidEnterpriseFeaturesEnabled && (
|
{isPaidEnterpriseFeaturesEnabled && (
|
||||||
<TableHeaderCell
|
<TableHeaderCell>Permissions</TableHeaderCell>
|
||||||
className={`w-[${columnWidths.fourth}]`}
|
|
||||||
>
|
|
||||||
Permissions
|
|
||||||
</TableHeaderCell>
|
|
||||||
)}
|
)}
|
||||||
<TableHeaderCell
|
<TableHeaderCell>Total Docs</TableHeaderCell>
|
||||||
className={`w-[${columnWidths.sixth}]`}
|
<TableHeaderCell>Last Status</TableHeaderCell>
|
||||||
>
|
<TableHeaderCell></TableHeaderCell>
|
||||||
Total Docs
|
|
||||||
</TableHeaderCell>
|
|
||||||
<TableHeaderCell
|
|
||||||
className={`w-[${columnWidths.third}]`}
|
|
||||||
>
|
|
||||||
Last Status
|
|
||||||
</TableHeaderCell>
|
|
||||||
<TableHeaderCell
|
|
||||||
className={`w-[${columnWidths.seventh}]`}
|
|
||||||
></TableHeaderCell>
|
|
||||||
</TableRow>
|
</TableRow>
|
||||||
{(sourceMatches
|
{(sourceMatches
|
||||||
? groupedStatuses[source]
|
? groupedStatuses[source]
|
||||||
|
@ -104,7 +104,7 @@ export default async function RootLayout({
|
|||||||
|
|
||||||
<body className={`relative ${inter.variable} font-sans`}>
|
<body className={`relative ${inter.variable} font-sans`}>
|
||||||
<div
|
<div
|
||||||
className={`text-default bg-background ${
|
className={`text-default min-h-screen bg-background ${
|
||||||
// TODO: remove this once proper dark mode exists
|
// TODO: remove this once proper dark mode exists
|
||||||
process.env.THEME_IS_DARK?.toLowerCase() === "true" ? "dark" : ""
|
process.env.THEME_IS_DARK?.toLowerCase() === "true" ? "dark" : ""
|
||||||
}`}
|
}`}
|
||||||
|
@ -26,7 +26,6 @@ export interface Option {
|
|||||||
|
|
||||||
export interface SelectOption extends Option {
|
export interface SelectOption extends Option {
|
||||||
type: "select";
|
type: "select";
|
||||||
default?: number;
|
|
||||||
options?: StringWithDescription[];
|
options?: StringWithDescription[];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,7 +95,6 @@ export const connectorConfigs: Record<
|
|||||||
query: "Select the web connector type:",
|
query: "Select the web connector type:",
|
||||||
label: "Scrape Method",
|
label: "Scrape Method",
|
||||||
name: "web_connector_type",
|
name: "web_connector_type",
|
||||||
default: 0,
|
|
||||||
options: [
|
options: [
|
||||||
{ name: "recursive", value: "recursive" },
|
{ name: "recursive", value: "recursive" },
|
||||||
{ name: "single", value: "single" },
|
{ name: "single", value: "single" },
|
||||||
|
Reference in New Issue
Block a user