From ba514aaaa2b31055ddea3060e9624082b119cf63 Mon Sep 17 00:00:00 2001 From: SubashMohan <76524044+Subash-Mohan@users.noreply.github.com> Date: Mon, 17 Mar 2025 21:06:02 +0530 Subject: [PATCH] Highspot connector (#4277) --- .../workflows/pr-python-connector-tests.yml | 3 + backend/onyx/configs/constants.py | 1 + backend/onyx/connectors/factory.py | 2 + backend/onyx/connectors/highspot/__init__.py | 4 + backend/onyx/connectors/highspot/client.py | 280 ++++++++++++ backend/onyx/connectors/highspot/connector.py | 431 ++++++++++++++++++ backend/onyx/connectors/highspot/utils.py | 122 +++++ .../highspot/test_highspot_connector.py | 98 ++++ .../highspot/test_highspot_data.json | 5 + web/public/Highspot.png | Bin 0 -> 17488 bytes web/src/components/icons/icons.tsx | 8 + web/src/lib/connectors/connectors.tsx | 41 ++ web/src/lib/connectors/credentials.ts | 16 + web/src/lib/sources.ts | 7 + web/src/lib/types.ts | 1 + 15 files changed, 1019 insertions(+) create mode 100644 backend/onyx/connectors/highspot/__init__.py create mode 100644 backend/onyx/connectors/highspot/client.py create mode 100644 backend/onyx/connectors/highspot/connector.py create mode 100644 backend/onyx/connectors/highspot/utils.py create mode 100644 backend/tests/daily/connectors/highspot/test_highspot_connector.py create mode 100644 backend/tests/daily/connectors/highspot/test_highspot_data.json create mode 100644 web/public/Highspot.png diff --git a/.github/workflows/pr-python-connector-tests.yml b/.github/workflows/pr-python-connector-tests.yml index f7f3972b3..aa740aa8d 100644 --- a/.github/workflows/pr-python-connector-tests.yml +++ b/.github/workflows/pr-python-connector-tests.yml @@ -50,6 +50,9 @@ env: GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }} # Notion NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }} + # Highspot + HIGHSPOT_KEY: ${{ secrets.HIGHSPOT_KEY }} + HIGHSPOT_SECRET: ${{ secrets.HIGHSPOT_SECRET }} jobs: connectors-check: diff --git a/backend/onyx/configs/constants.py b/backend/onyx/configs/constants.py index 2d7445362..cbcc71245 100644 --- a/backend/onyx/configs/constants.py +++ b/backend/onyx/configs/constants.py @@ -174,6 +174,7 @@ class DocumentSource(str, Enum): FIREFLIES = "fireflies" EGNYTE = "egnyte" AIRTABLE = "airtable" + HIGHSPOT = "highspot" # Special case just for integration tests MOCK_CONNECTOR = "mock_connector" diff --git a/backend/onyx/connectors/factory.py b/backend/onyx/connectors/factory.py index 73593cc60..2f0b10743 100644 --- a/backend/onyx/connectors/factory.py +++ b/backend/onyx/connectors/factory.py @@ -30,6 +30,7 @@ from onyx.connectors.gong.connector import GongConnector from onyx.connectors.google_drive.connector import GoogleDriveConnector from onyx.connectors.google_site.connector import GoogleSitesConnector from onyx.connectors.guru.connector import GuruConnector +from onyx.connectors.highspot.connector import HighspotConnector from onyx.connectors.hubspot.connector import HubSpotConnector from onyx.connectors.interfaces import BaseConnector from onyx.connectors.interfaces import CheckpointConnector @@ -117,6 +118,7 @@ def identify_connector_class( DocumentSource.FIREFLIES: FirefliesConnector, DocumentSource.EGNYTE: EgnyteConnector, DocumentSource.AIRTABLE: AirtableConnector, + DocumentSource.HIGHSPOT: HighspotConnector, # just for integration tests DocumentSource.MOCK_CONNECTOR: MockConnector, } diff --git a/backend/onyx/connectors/highspot/__init__.py b/backend/onyx/connectors/highspot/__init__.py new file mode 100644 index 000000000..df94b5875 --- /dev/null +++ b/backend/onyx/connectors/highspot/__init__.py @@ -0,0 +1,4 @@ +""" +Highspot connector package for Onyx. +Enables integration with Highspot's knowledge base. +""" diff --git a/backend/onyx/connectors/highspot/client.py b/backend/onyx/connectors/highspot/client.py new file mode 100644 index 000000000..7879e6e79 --- /dev/null +++ b/backend/onyx/connectors/highspot/client.py @@ -0,0 +1,280 @@ +import base64 +from typing import Any +from typing import Dict +from typing import List +from typing import Optional +from urllib.parse import urljoin + +import requests +from requests.adapters import HTTPAdapter +from requests.exceptions import HTTPError +from requests.exceptions import RequestException +from requests.exceptions import Timeout +from urllib3.util.retry import Retry + +from onyx.utils.logger import setup_logger + +logger = setup_logger() + + +class HighspotClientError(Exception): + """Base exception for Highspot API client errors.""" + + def __init__(self, message: str, status_code: Optional[int] = None): + self.message = message + self.status_code = status_code + super().__init__(self.message) + + +class HighspotAuthenticationError(HighspotClientError): + """Exception raised for authentication errors.""" + + +class HighspotRateLimitError(HighspotClientError): + """Exception raised when rate limit is exceeded.""" + + def __init__(self, message: str, retry_after: Optional[str] = None): + self.retry_after = retry_after + super().__init__(message) + + +class HighspotClient: + """ + Client for interacting with the Highspot API. + + Uses basic authentication with provided key (username) and secret (password). + Implements retry logic, error handling, and connection pooling. + """ + + BASE_URL = "https://api-su2.highspot.com/v1.0/" + + def __init__( + self, + key: str, + secret: str, + base_url: str = BASE_URL, + timeout: int = 30, + max_retries: int = 3, + backoff_factor: float = 0.5, + status_forcelist: Optional[List[int]] = None, + ): + """ + Initialize the Highspot API client. + + Args: + key: API key (used as username) + secret: API secret (used as password) + base_url: Base URL for the Highspot API + timeout: Request timeout in seconds + max_retries: Maximum number of retries for failed requests + backoff_factor: Backoff factor for retries + status_forcelist: HTTP status codes to retry on + """ + if not key or not secret: + raise ValueError("API key and secret are required") + + self.key = key + self.secret = secret + self.base_url = base_url + self.timeout = timeout + + # Set up session with retry logic + self.session = requests.Session() + retry_strategy = Retry( + total=max_retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist or [429, 500, 502, 503, 504], + allowed_methods=["GET", "POST", "PUT", "DELETE"], + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + self.session.mount("http://", adapter) + self.session.mount("https://", adapter) + + # Set up authentication + self._setup_auth() + + def _setup_auth(self) -> None: + """Set up basic authentication for the session.""" + auth = f"{self.key}:{self.secret}" + encoded_auth = base64.b64encode(auth.encode()).decode() + self.session.headers.update( + { + "Authorization": f"Basic {encoded_auth}", + "Content-Type": "application/json", + "Accept": "application/json", + } + ) + + def _make_request( + self, + method: str, + endpoint: str, + params: Optional[Dict[str, Any]] = None, + data: Optional[Dict[str, Any]] = None, + json_data: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None, + ) -> Dict[str, Any]: + """ + Make a request to the Highspot API. + + Args: + method: HTTP method (GET, POST, etc.) + endpoint: API endpoint + params: URL parameters + data: Form data + json_data: JSON data + headers: Additional headers + + Returns: + API response as a dictionary + + Raises: + HighspotClientError: On API errors + HighspotAuthenticationError: On authentication errors + HighspotRateLimitError: On rate limiting + requests.exceptions.RequestException: On request failures + """ + url = urljoin(self.base_url, endpoint) + request_headers = {} + if headers: + request_headers.update(headers) + + try: + logger.debug(f"Making {method} request to {url}") + response = self.session.request( + method=method, + url=url, + params=params, + data=data, + json=json_data, + headers=request_headers, + timeout=self.timeout, + ) + response.raise_for_status() + + if response.content and response.content.strip(): + return response.json() + return {} + + except HTTPError as e: + status_code = e.response.status_code + error_msg = str(e) + + try: + error_data = e.response.json() + if isinstance(error_data, dict): + error_msg = error_data.get("message", str(e)) + except (ValueError, KeyError): + pass + + if status_code == 401: + raise HighspotAuthenticationError(f"Authentication failed: {error_msg}") + elif status_code == 429: + retry_after = e.response.headers.get("Retry-After") + raise HighspotRateLimitError( + f"Rate limit exceeded: {error_msg}", retry_after=retry_after + ) + else: + raise HighspotClientError( + f"API error {status_code}: {error_msg}", status_code=status_code + ) + + except Timeout: + raise HighspotClientError("Request timed out") + except RequestException as e: + raise HighspotClientError(f"Request failed: {str(e)}") + + def get_spots(self) -> List[Dict[str, Any]]: + """ + Get all available spots. + + Returns: + List of spots with their names and IDs + """ + params = {"right": "view"} + response = self._make_request("GET", "spots", params=params) + logger.info(f"Received {response} spots") + total_counts = response.get("counts_total") + # Fix comparison to handle None value + if total_counts is not None and total_counts > 0: + return response.get("collection", []) + return [] + + def get_spot(self, spot_id: str) -> Dict[str, Any]: + """ + Get details for a specific spot. + + Args: + spot_id: ID of the spot + + Returns: + Spot details + """ + if not spot_id: + raise ValueError("spot_id is required") + return self._make_request("GET", f"spots/{spot_id}") + + def get_spot_items( + self, spot_id: str, offset: int = 0, page_size: int = 100 + ) -> Dict[str, Any]: + """ + Get items in a specific spot. + + Args: + spot_id: ID of the spot + offset: offset number + page_size: Number of items per page + + Returns: + Items in the spot + """ + if not spot_id: + raise ValueError("spot_id is required") + + params = {"spot": spot_id, "start": offset, "limit": page_size} + return self._make_request("GET", "items", params=params) + + def get_item(self, item_id: str) -> Dict[str, Any]: + """ + Get details for a specific item. + + Args: + item_id: ID of the item + + Returns: + Item details + """ + if not item_id: + raise ValueError("item_id is required") + return self._make_request("GET", f"items/{item_id}") + + def get_item_content(self, item_id: str) -> bytes: + """ + Get the raw content of an item. + + Args: + item_id: ID of the item + + Returns: + Raw content bytes + """ + if not item_id: + raise ValueError("item_id is required") + + url = urljoin(self.base_url, f"items/{item_id}/content") + response = self.session.get(url, timeout=self.timeout) + response.raise_for_status() + return response.content + + def health_check(self) -> bool: + """ + Check if the API is accessible and credentials are valid. + + Returns: + True if API is accessible, False otherwise + """ + try: + self._make_request("GET", "spots", params={"limit": 1}) + return True + except (HighspotClientError, HighspotAuthenticationError): + return False diff --git a/backend/onyx/connectors/highspot/connector.py b/backend/onyx/connectors/highspot/connector.py new file mode 100644 index 000000000..380d878a5 --- /dev/null +++ b/backend/onyx/connectors/highspot/connector.py @@ -0,0 +1,431 @@ +from datetime import datetime +from io import BytesIO +from typing import Any +from typing import Dict +from typing import List +from typing import Optional + +from onyx.configs.app_configs import INDEX_BATCH_SIZE +from onyx.configs.constants import DocumentSource +from onyx.connectors.highspot.client import HighspotClient +from onyx.connectors.highspot.client import HighspotClientError +from onyx.connectors.highspot.utils import scrape_url_content +from onyx.connectors.interfaces import GenerateDocumentsOutput +from onyx.connectors.interfaces import GenerateSlimDocumentOutput +from onyx.connectors.interfaces import LoadConnector +from onyx.connectors.interfaces import PollConnector +from onyx.connectors.interfaces import SecondsSinceUnixEpoch +from onyx.connectors.interfaces import SlimConnector +from onyx.connectors.models import ConnectorMissingCredentialError +from onyx.connectors.models import Document +from onyx.connectors.models import SlimDocument +from onyx.connectors.models import TextSection +from onyx.file_processing.extract_file_text import extract_file_text +from onyx.file_processing.extract_file_text import VALID_FILE_EXTENSIONS +from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface +from onyx.utils.logger import setup_logger + +logger = setup_logger() +_SLIM_BATCH_SIZE = 1000 + + +class HighspotConnector(LoadConnector, PollConnector, SlimConnector): + """ + Connector for loading data from Highspot. + + Retrieves content from specified spots using the Highspot API. + If no spots are specified, retrieves content from all available spots. + """ + + def __init__( + self, + spot_names: List[str] = [], + batch_size: int = INDEX_BATCH_SIZE, + ): + """ + Initialize the Highspot connector. + + Args: + spot_names: List of spot names to retrieve content from (if empty, gets all spots) + batch_size: Number of items to retrieve in each batch + """ + self.spot_names = spot_names + self.batch_size = batch_size + self._client: Optional[HighspotClient] = None + self._spot_id_map: Dict[str, str] = {} # Maps spot names to spot IDs + self._all_spots_fetched = False + self.highspot_url: Optional[str] = None + self.key: Optional[str] = None + self.secret: Optional[str] = None + + @property + def client(self) -> HighspotClient: + if self._client is None: + if not self.key or not self.secret: + raise ConnectorMissingCredentialError("Highspot") + # Ensure highspot_url is a string, use default if None + base_url = ( + self.highspot_url + if self.highspot_url is not None + else HighspotClient.BASE_URL + ) + self._client = HighspotClient(self.key, self.secret, base_url=base_url) + return self._client + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + logger.info("Loading Highspot credentials") + self.highspot_url = credentials.get("highspot_url") + self.key = credentials.get("highspot_key") + self.secret = credentials.get("highspot_secret") + return None + + def _populate_spot_id_map(self) -> None: + """ + Populate the spot ID map with all available spots. + Keys are stored as lowercase for case-insensitive lookups. + """ + spots = self.client.get_spots() + for spot in spots: + if "title" in spot and "id" in spot: + spot_name = spot["title"] + self._spot_id_map[spot_name.lower()] = spot["id"] + + self._all_spots_fetched = True + logger.info(f"Retrieved {len(self._spot_id_map)} spots from Highspot") + + def _get_all_spot_names(self) -> List[str]: + """ + Retrieve all available spot names. + + Returns: + List of all spot names + """ + if not self._all_spots_fetched: + self._populate_spot_id_map() + + return [spot_name for spot_name in self._spot_id_map.keys()] + + def _get_spot_id_from_name(self, spot_name: str) -> str: + """ + Get spot ID from a spot name. + + Args: + spot_name: Name of the spot + + Returns: + ID of the spot + + Raises: + ValueError: If spot name is not found + """ + if not self._all_spots_fetched: + self._populate_spot_id_map() + + spot_name_lower = spot_name.lower() + if spot_name_lower not in self._spot_id_map: + raise ValueError(f"Spot '{spot_name}' not found") + + return self._spot_id_map[spot_name_lower] + + def load_from_state(self) -> GenerateDocumentsOutput: + """ + Load content from configured spots in Highspot. + If no spots are configured, loads from all spots. + + Yields: + Batches of Document objects + """ + return self.poll_source(None, None) + + def poll_source( + self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None + ) -> GenerateDocumentsOutput: + """ + Poll Highspot for content updated since the start time. + + Args: + start: Start time as seconds since Unix epoch + end: End time as seconds since Unix epoch + + Yields: + Batches of Document objects + """ + doc_batch: list[Document] = [] + + # If no spots specified, get all spots + spot_names_to_process = self.spot_names + if not spot_names_to_process: + spot_names_to_process = self._get_all_spot_names() + logger.info( + f"No spots specified, using all {len(spot_names_to_process)} available spots" + ) + + for spot_name in spot_names_to_process: + try: + spot_id = self._get_spot_id_from_name(spot_name) + if spot_id is None: + logger.warning(f"Spot ID not found for spot {spot_name}") + continue + offset = 0 + has_more = True + + while has_more: + logger.info( + f"Retrieving items from spot {spot_name}, offset {offset}" + ) + response = self.client.get_spot_items( + spot_id=spot_id, offset=offset, page_size=self.batch_size + ) + items = response.get("collection", []) + logger.info(f"Received Items: {items}") + if not items: + has_more = False + continue + + for item in items: + try: + item_id = item.get("id") + if not item_id: + logger.warning("Item without ID found, skipping") + continue + + item_details = self.client.get_item(item_id) + if not item_details: + logger.warning( + f"Item {item_id} details not found, skipping" + ) + continue + # Apply time filter if specified + if start or end: + updated_at = item_details.get("date_updated") + if updated_at: + # Convert to datetime for comparison + try: + updated_time = datetime.fromisoformat( + updated_at.replace("Z", "+00:00") + ) + if ( + start and updated_time.timestamp() < start + ) or (end and updated_time.timestamp() > end): + continue + except (ValueError, TypeError): + # Skip if date cannot be parsed + logger.warning( + f"Invalid date format for item {item_id}: {updated_at}" + ) + continue + + content = self._get_item_content(item_details) + title = item_details.get("title", "") + + doc_batch.append( + Document( + id=f"HIGHSPOT_{item_id}", + sections=[ + TextSection( + link=item_details.get( + "url", + f"https://www.highspot.com/items/{item_id}", + ), + text=content, + ) + ], + source=DocumentSource.HIGHSPOT, + semantic_identifier=title, + metadata={ + "spot_name": spot_name, + "type": item_details.get("content_type", ""), + "created_at": item_details.get( + "date_added", "" + ), + "author": item_details.get("author", ""), + "language": item_details.get("language", ""), + "can_download": str( + item_details.get("can_download", False) + ), + }, + doc_updated_at=item_details.get("date_updated"), + ) + ) + + if len(doc_batch) >= self.batch_size: + yield doc_batch + doc_batch = [] + + except HighspotClientError as e: + item_id = "ID" if not item_id else item_id + logger.error(f"Error retrieving item {item_id}: {str(e)}") + + has_more = len(items) >= self.batch_size + offset += self.batch_size + + except (HighspotClientError, ValueError) as e: + logger.error(f"Error processing spot {spot_name}: {str(e)}") + + if doc_batch: + yield doc_batch + + def _get_item_content(self, item_details: Dict[str, Any]) -> str: + """ + Get the text content of an item. + + Args: + item_details: Item details from the API + + Returns: + Text content of the item + """ + item_id = item_details.get("id", "") + content_name = item_details.get("content_name", "") + is_valid_format = content_name and "." in content_name + file_extension = content_name.split(".")[-1].lower() if is_valid_format else "" + file_extension = "." + file_extension if file_extension else "" + can_download = item_details.get("can_download", False) + content_type = item_details.get("content_type", "") + + # Extract title and description once at the beginning + title, description = self._extract_title_and_description(item_details) + default_content = f"{title}\n{description}" + logger.info(f"Processing item {item_id} with extension {file_extension}") + + try: + if content_type == "WebLink": + url = item_details.get("url") + if not url: + return default_content + content = scrape_url_content(url, True) + return content if content else default_content + + elif ( + is_valid_format + and file_extension in VALID_FILE_EXTENSIONS + and can_download + ): + # For documents, try to get the text content + if not item_id: # Ensure item_id is defined + return default_content + + content_response = self.client.get_item_content(item_id) + # Process and extract text from binary content based on type + if content_response: + text_content = extract_file_text( + BytesIO(content_response), content_name + ) + return text_content + return default_content + + else: + return default_content + + except HighspotClientError as e: + # Use item_id safely in the warning message + error_context = f"item {item_id}" if item_id else "item" + logger.warning(f"Could not retrieve content for {error_context}: {str(e)}") + return "" + + def _extract_title_and_description( + self, item_details: Dict[str, Any] + ) -> tuple[str, str]: + """ + Extract the title and description from item details. + + Args: + item_details: Item details from the API + + Returns: + Tuple of title and description + """ + title = item_details.get("title", "") + description = item_details.get("description", "") + return title, description + + def retrieve_all_slim_documents( + self, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + callback: IndexingHeartbeatInterface | None = None, + ) -> GenerateSlimDocumentOutput: + """ + Retrieve all document IDs from the configured spots. + If no spots are configured, retrieves from all spots. + + Args: + start: Optional start time filter + end: Optional end time filter + callback: Optional indexing heartbeat callback + + Yields: + Batches of SlimDocument objects + """ + slim_doc_batch: list[SlimDocument] = [] + + # If no spots specified, get all spots + spot_names_to_process = self.spot_names + if not spot_names_to_process: + spot_names_to_process = self._get_all_spot_names() + logger.info( + f"No spots specified, using all {len(spot_names_to_process)} available spots for slim documents" + ) + + for spot_name in spot_names_to_process: + try: + spot_id = self._get_spot_id_from_name(spot_name) + offset = 0 + has_more = True + + while has_more: + logger.info( + f"Retrieving slim documents from spot {spot_name}, offset {offset}" + ) + response = self.client.get_spot_items( + spot_id=spot_id, offset=offset, page_size=self.batch_size + ) + + items = response.get("collection", []) + if not items: + has_more = False + continue + + for item in items: + item_id = item.get("id") + if not item_id: + continue + + slim_doc_batch.append(SlimDocument(id=f"HIGHSPOT_{item_id}")) + + if len(slim_doc_batch) >= _SLIM_BATCH_SIZE: + yield slim_doc_batch + slim_doc_batch = [] + + has_more = len(items) >= self.batch_size + offset += self.batch_size + + except (HighspotClientError, ValueError) as e: + logger.error( + f"Error retrieving slim documents from spot {spot_name}: {str(e)}" + ) + + if slim_doc_batch: + yield slim_doc_batch + + def validate_credentials(self) -> bool: + """ + Validate that the provided credentials can access the Highspot API. + + Returns: + True if credentials are valid, False otherwise + """ + try: + return self.client.health_check() + except Exception as e: + logger.error(f"Failed to validate credentials: {str(e)}") + return False + + +if __name__ == "__main__": + spot_names: List[str] = [] + connector = HighspotConnector(spot_names) + credentials = {"highspot_key": "", "highspot_secret": ""} + connector.load_credentials(credentials=credentials) + for doc in connector.load_from_state(): + print(doc) diff --git a/backend/onyx/connectors/highspot/utils.py b/backend/onyx/connectors/highspot/utils.py new file mode 100644 index 000000000..efc00dac1 --- /dev/null +++ b/backend/onyx/connectors/highspot/utils.py @@ -0,0 +1,122 @@ +from typing import Optional +from urllib.parse import urlparse + +from bs4 import BeautifulSoup +from playwright.sync_api import sync_playwright + +from onyx.file_processing.html_utils import web_html_cleanup +from onyx.utils.logger import setup_logger + +logger = setup_logger() + +# Constants +WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS = 20 +JAVASCRIPT_DISABLED_MESSAGE = "You have JavaScript disabled in your browser" +DEFAULT_TIMEOUT = 60000 # 60 seconds + + +def scrape_url_content( + url: str, scroll_before_scraping: bool = False, timeout_ms: int = DEFAULT_TIMEOUT +) -> Optional[str]: + """ + Scrapes content from a given URL and returns the cleaned text. + + Args: + url: The URL to scrape + scroll_before_scraping: Whether to scroll through the page to load lazy content + timeout_ms: Timeout in milliseconds for page navigation and loading + + Returns: + The cleaned text content of the page or None if scraping fails + """ + playwright = None + browser = None + try: + validate_url(url) + playwright = sync_playwright().start() + browser = playwright.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + + logger.info(f"Navigating to URL: {url}") + try: + page.goto(url, timeout=timeout_ms) + except Exception as e: + logger.error(f"Failed to navigate to {url}: {str(e)}") + return None + + if scroll_before_scraping: + logger.debug("Scrolling page to load lazy content") + scroll_attempts = 0 + previous_height = page.evaluate("document.body.scrollHeight") + while scroll_attempts < WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS: + page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + try: + page.wait_for_load_state("networkidle", timeout=timeout_ms) + except Exception as e: + logger.warning(f"Network idle wait timed out: {str(e)}") + break + + new_height = page.evaluate("document.body.scrollHeight") + if new_height == previous_height: + break + previous_height = new_height + scroll_attempts += 1 + + content = page.content() + soup = BeautifulSoup(content, "html.parser") + + parsed_html = web_html_cleanup(soup) + + if JAVASCRIPT_DISABLED_MESSAGE in parsed_html.cleaned_text: + logger.debug("JavaScript disabled message detected, checking iframes") + try: + iframe_count = page.frame_locator("iframe").locator("html").count() + if iframe_count > 0: + iframe_texts = ( + page.frame_locator("iframe").locator("html").all_inner_texts() + ) + iframe_content = "\n".join(iframe_texts) + + if len(parsed_html.cleaned_text) < 700: + parsed_html.cleaned_text = iframe_content + else: + parsed_html.cleaned_text += "\n" + iframe_content + except Exception as e: + logger.warning(f"Error processing iframes: {str(e)}") + + return parsed_html.cleaned_text + + except Exception as e: + logger.error(f"Error scraping URL {url}: {str(e)}") + return None + + finally: + if browser: + try: + browser.close() + except Exception as e: + logger.debug(f"Error closing browser: {str(e)}") + if playwright: + try: + playwright.stop() + except Exception as e: + logger.debug(f"Error stopping playwright: {str(e)}") + + +def validate_url(url: str) -> None: + """ + Validates that a URL is properly formatted. + + Args: + url: The URL to validate + + Raises: + ValueError: If URL is not valid + """ + parse = urlparse(url) + if parse.scheme != "http" and parse.scheme != "https": + raise ValueError("URL must be of scheme https?://") + + if not parse.hostname: + raise ValueError("URL must include a hostname") diff --git a/backend/tests/daily/connectors/highspot/test_highspot_connector.py b/backend/tests/daily/connectors/highspot/test_highspot_connector.py new file mode 100644 index 000000000..6da06b58c --- /dev/null +++ b/backend/tests/daily/connectors/highspot/test_highspot_connector.py @@ -0,0 +1,98 @@ +import json +import os +import time +from pathlib import Path + +import pytest + +from onyx.configs.constants import DocumentSource +from onyx.connectors.highspot.connector import HighspotConnector +from onyx.connectors.models import Document + + +def load_test_data(file_name: str = "test_highspot_data.json") -> dict: + """Load test data from JSON file.""" + current_dir = Path(__file__).parent + with open(current_dir / file_name, "r") as f: + return json.load(f) + + +@pytest.fixture +def highspot_connector() -> HighspotConnector: + """Create a Highspot connector with credentials from environment variables.""" + # Check if required environment variables are set + if not os.environ.get("HIGHSPOT_KEY") or not os.environ.get("HIGHSPOT_SECRET"): + pytest.fail("HIGHSPOT_KEY or HIGHSPOT_SECRET environment variables not set") + + connector = HighspotConnector( + spot_names=["Test content"], # Use specific spot name instead of empty list + batch_size=10, # Smaller batch size for testing + ) + connector.load_credentials( + { + "highspot_key": os.environ["HIGHSPOT_KEY"], + "highspot_secret": os.environ["HIGHSPOT_SECRET"], + "highspot_url": os.environ.get( + "HIGHSPOT_URL", "https://api-su2.highspot.com/v1.0/" + ), + } + ) + return connector + + +def test_highspot_connector_basic(highspot_connector: HighspotConnector) -> None: + """Test basic functionality of the Highspot connector.""" + all_docs: list[Document] = [] + test_data = load_test_data() + target_test_doc_id = test_data.get("target_doc_id") + target_test_doc: Document | None = None + + # Test loading documents + for doc_batch in highspot_connector.poll_source(0, time.time()): + for doc in doc_batch: + all_docs.append(doc) + if doc.id == f"HIGHSPOT_{target_test_doc_id}": + target_test_doc = doc + + # Verify documents were loaded + assert len(all_docs) > 0 + + # If we have a specific test document ID, validate it + if target_test_doc_id and target_test_doc is not None: + assert target_test_doc.semantic_identifier == test_data.get( + "semantic_identifier" + ) + assert target_test_doc.source == DocumentSource.HIGHSPOT + assert target_test_doc.metadata is not None + + assert len(target_test_doc.sections) == 1 + section = target_test_doc.sections[0] + assert section.link is not None + # Only check if content exists, as exact content might change + assert section.text is not None + assert len(section.text) > 0 + + +def test_highspot_connector_slim(highspot_connector: HighspotConnector) -> None: + """Test slim document retrieval.""" + # Get all doc IDs from the full connector + all_full_doc_ids = set() + for doc_batch in highspot_connector.load_from_state(): + all_full_doc_ids.update([doc.id for doc in doc_batch]) + + # Get all doc IDs from the slim connector + all_slim_doc_ids = set() + for slim_doc_batch in highspot_connector.retrieve_all_slim_documents(): + all_slim_doc_ids.update([doc.id for doc in slim_doc_batch]) + + # The set of full doc IDs should be a subset of the slim doc IDs + assert all_full_doc_ids.issubset(all_slim_doc_ids) + # Make sure we actually got some documents + assert len(all_slim_doc_ids) > 0 + + +def test_highspot_connector_validate_credentials( + highspot_connector: HighspotConnector, +) -> None: + """Test credential validation.""" + assert highspot_connector.validate_credentials() is True diff --git a/backend/tests/daily/connectors/highspot/test_highspot_data.json b/backend/tests/daily/connectors/highspot/test_highspot_data.json new file mode 100644 index 000000000..d796b3d60 --- /dev/null +++ b/backend/tests/daily/connectors/highspot/test_highspot_data.json @@ -0,0 +1,5 @@ +{ + "target_doc_id": "67cd8eb35d3ee0487de2e704", + "semantic_identifier": "Highspot in Action _ Salesforce Integration", + "link": "https://www.highspot.com/items/67cd8eb35d3ee0487de2e704" +} diff --git a/web/public/Highspot.png b/web/public/Highspot.png new file mode 100644 index 0000000000000000000000000000000000000000..7bf874f5bba2b461543c7934fd1631d16d7edaec GIT binary patch literal 17488 zcmbTcQ*>nS*RGvT(y?u$W81cER_s*lj&0jX$L`p+&5qfzlbzom@3#-$u@Al)qpH?= z*2F!pIiFewwZawUCE#IjU_d}X;H4x*l|ewh-Tm{10{dEdgJSso^@R--(*&y6n*rU7 zoB$v~ruN1FA}L!VbAU3y$kfAe6u=7t0v2MastMGTljSzCx1~4w$A;eB*5S(=1cX<> z-NDGj8UQ3R2AEsg@sV7$casoVn(~oou*osXIfwu(EG0di04kpHswSS+CS0Z@0{le0 z?%ZDiYym(cB6nLGJ7;cpK9YaM<^DSUXPJS7=wBv4Yd(^Hi_(-+BoeWA0uXV~Gtrqa zGBOfzaM5#_n3%Ad0=QUciI^FgnHd;a7?@b;m{_?PIk=gbiT?8<`AXWpOM5$_eIO|vi$0ck&URmiHj}3 z4k#tcNAh)r-qg~Rn}ykw%ZwGkL1)Hn%tps*#%fGw#LfkvV`4L6HD+exWMgGz`){28 z&0koUgH4!;kyA{BNr;I_OoWqFOq`3ASwu{nS(rtbjs3sAQg+TjBRdnoe{)-Y<^GQ^ z$N$rpTf_-q1hjWjwYRtV?;t2z*aPjIE$khLn3?FAiGIi#nONHWvqJSxm;PO}D8R|m z6<{jvWN%CKFU;JQ|BD1B4t6ds4h|+dGb2tWI#xzDRyt!2fC(M534qIp6Tr%7!fHhF zAAi&T3*Wyd^=sv;42S=!;WlA6=Hg-mn9%`@7}@BIm{`r|xY#)XbVkfEH&nfsQ70$R;Dq=t^v_GS3 z+oNp3sc5ce=}a)Fx1fJlEny_7ZPOra9M877W8q|G_pd|YtWEw*R?jxATRReE@tU)}i z|A$N4*6DrU%Gt`{4WnmANZpD|(?TvZk0;huv^*uSNjO;TDlfbIsLk0n%CHUy1p)b(HPmd z*f_aO>)8BiF|$_(xL)ralR5k8rDD?)qv?$f0Ybp zQu`$a2~hV%^{Wd+_AOtv{~OxZEPcU5GjAZWZ-y|a{vz{*7u&iCM6&`BP={yJgl^e@ zU{Z%~+w@iVH=U}lDgDBWVqQ=0(ecF}(X<{!;}?u^%@?SzIRVkGpmJ%0Hmqgx>SXio zBCv1f@a_7lXooS-;_D7m15}n*0qJerfS{zAfSEEgZAH^uO3xOzav;tZh?WZ+B1dv^ z6>VN(Scz3<;heKm#--!YuIcNToXKnhC4{QeO)U*gHWXqT!>HJD56DfVT8Q)n>U#(C zRv9C##Sb8_;4k=&FsX(st}PXJiHz^egM+Vi7uvM5!Cy^xr~8CHcb)lIoC zp=c_xe1y1X*|Kg|D=W@{+6>P}tu)KpVoh@7z0O8^zom3#%+-6OYDzT~YK#WTi$g)k)RWo4K^KNdhoEeH z;D^ljbb3?mu$MuRE%Rcb;iNtNlX3QYPI`o?Gbw8=u`$((k;`cC`V4bj?B|XaW2;?j zmkcNeUdz3uV|YZ&N}^JEk#yDSD#jIVJuby+w&psS)fFQaK1@265#)26zr&}ny;2PO9YxsB88l@cZ3bwRc1Y`%n$hy4o* zi1t?u+#2C$P_n_YqjsRH!V^Je!0tQJCDNPEGE2>?KSTsjWQ$5nuDL^M! zU+vIFkdAN@hR6nNr#j``=^MztQQ*MOGj3fHkqyLT@Oc;!89|Z~+m?0nOU(_Xzn?)T zLkhX49niu|?g>ZD*n~ts_NKz(uZKCbjapem8~sG=5zpk+DNHFngEi~|4eu=5)J}+y zs)^imW2wMrk5|uva~7TW`pC(~WH_1w$3vJkRCu#Dl;4Uk&30j+x>WY2#W15EuSj&k z2ctPMh*2am*iaL!k*7!~qLTA(g~XK;USG6cknLD3ZqbD3?<*SWq7@x>VjfgtxX<2W z$cbJnl>Dj@M4kfng(jzkW^&EcdN2cj^kL4t8hEQB7JLMa60BJW4?y`L6{fVgLUn!! z6wqf>dMCVX*ZVC}PW&u8Xad7|zwIjryvslj{RD(}&J zw$gPdUp7PozWE=a^Y%KYA!~iKb%CC9;{>UD^@}n$d5?TUgo9su&yGey`IaIW`1F9^ z`j@ls-TRX^Ec~BK_&-Nosx^+$GQ!ek`rc*H;tAzoxNwn>1CXtXgMiT@*fUCk^%&X&Q;0)gMWwb^)vVFQpgEsj=05ov-g0Q+= zKQ=tZBQ(e$rA|*^T$+6BBhAtlw=x?X$X-gPcn-H#<9<Zy*%5rHh4PB?*To(*6wV(^CHp-1f(YAWMS)tey;gu zn2EyUUrv_E<$S~^p>ehw(X)P%+N{^D5?LkKwZbJxj*X^si=_D-`NRhBrjPT0a z9Rp)4wEEUv9O_ag>6tB+kM9Hcp~++~*#Y7>BdMu1mAf^E*J_ALr%UU(P8%F3__Q~! z%5E!p)ym-dZ5h-u}V(vkq8Ar=^N{x<#e6HQmSy(2fm#6$(gPXFH`vy~{y6c=xqrW3@=QrlP zUy13Hyl|=7a?pyidFOSJXi6>m(jU%!Qe{Zx;M}Vs=M)DMxDE6z^$p~doNX)#j&L4= z0GeGzDRp{drB5@mQhtY?0b$m%bOH+JEVq%^)>#P2# zqS&yic{@%uDgNTdbn85r-reu5#X^6doH0jAZSgkLxG>kS!zPF;snTG5AY8 zft4W6?}JkDr&{7c82)4eRJGlu6ta6otvT4{fox1K_IweCXLU=c{*mR#{clW=4UI>v z4J!x3uNIr4iWiKOhR*Xt%4ED%)f>y12u$z`UX*$&?#szZT63DsiB;*FE$U|WAP1#8 zfix+74bv20bMr%?wFbF2Ru?fT#l;DW=zT%TyN>=s);Iy5PX=n63`+o8Ij9u+^eelyzXe)t!iVkk^?=JTDhj=syfjQC;p>pY)9w?>jgnL%EkUi>K@ z+l3_c!TwN!+{3Eoke0oU05!mQ0NUac)mGQqx>F- z)gfm}NA^4v!A;*(0)0tzK>}DmI(3L==2=2P2eneF`hykD)i-f3NcUO%nZnGbJvkXc8M3_~f#u}yH?<;m z<;4_Uim9ocz>jXHU1kjJp^75W$J32tNeJKlEF8>qqV|<7hh)-_SAbJ8oEO&L5pO3( z)^*0IE$dE2n}xW#)Ng*H&#I`{90z)dn3SxD)?|zF5Pp$et6j2<{Whv3w1sR%3@>r6 zljj*?u%oI1;S8Zpg12uF-=QZ5gQm6C6jlp-=-W+U^v{Bg*}cTDFEFsKyD5wS5yjEB zoQ4DOqu%&L9hO5=q#Q4Rvm|EIh&Yt~RdaCGclHBVDc%C(PKWe%YXXCpxS-d3>uImM z^8(w~p?j5*Cz(yANOR&&J^tevJ``Nk-IK$UpIx7OmutBN9aDelBde_gzJD)9VcYT4 zQVL3T7eW#x5*K1E@GlDkWyi5t5Jioi!$HmFUK91gPGFLocPwcZ@ZN6Gpda6(udVH& zcWXr>Hlv_2nUkmR6S6L0u{Gy@&$-=9eMIDJKk}OLxH|GsKm|TO-ltMtKiA#YoVMqT zFb+jGyKL$X(TwP-%iMJPbStqQI!A1ROSo%KhVq&D1T^<=NQ{z175-V!=T|s~fd_Zp z^p+c~WXSv4~IN#kvWAZ+0PnXt?Zg_qP}ObE2yQ~Hdp&hQt&xacZeF+$2E-{D`JKS7&{O*uqQ;aUIjJjfq!wKqkE zWZMIY$C$GTGx#<_u4}^gFyJ$G??%0#&r%F$WRnpEmc`~>jNWx7>zKkU%Nx&k<1UikaB7+osbS<>cnZJ3o5KpkQ|E?OGgBjm5pV4P$MpkWH25{$A-e3W z+k`E%QnVjS@e6U0D&4x;?A2afLq~3BN4l1>)6?N~ns*W+?3BrRx!^YUVqQ zVEVn^v31W5(H)+H)5>1N#p}{k8YTL}X~-MPaQQ}KuLx8yjyVhi?Ff(1oa|;|w3EH2 za|^c;Q-=n7W6A@^6DFY{lY*Yt+-6hZC;P?N_Rs92JNJ}X_otOwT(91OWZ-^@P^t5>rb}?lb;zQNzwysHe6h=&9McT&66?`Co2{r4!!cy0K&+yq(Bdn6*;_~aKqO^~D{+q zoH6%$TAR8}YU5b~iDzj{H>}xcnjPx3SAcL~A`E*S``sA!BK4%^s&3pmam2_Fo;1xJ%&9mY`iEx+?DysI)sE@sN4Ww+;qVoO__=Xd1!y+QG0 zAMy&bcc_U&qHoM?q{}{)b$WuZm|re8kAy#8&l}QDVTv_71lL7FJI$2y`$1{l9__YC zP2{<}2`@R=;=HC-QJErpigg8-{M65RNF}6PcSXm@Z{AF1vT1jRpO-MhUrkz2dya-< zgAH;`PAH~mo)$Fgv?tQzpJ7<<=}d$tju>%MZV)vaQOq?5~Z$fd!&+y&wli~Cl-`E_N+q_j0r4@mtbk=2zM}yc%ArYtZ9Z@J^MGO z%~x*qgi24MZCRBD$()x2sl9tewk6x+xKI(+yA8PG)k3gAenih7 zrd+oyT!Og*AmoO05Gd0=3eo~>XGP43`|Xzm{uuZv{l5e_G5xFeS*5foSf5wV`?EX? zh_P$kc1w=D_LakDKQFyYfsXF=ZCG?ah z>1E!$xJ;#3&VcWuv!UW$QG7{nzqwq9s@UF(H=z)XoCVpwj=fJZYIE0YMYdc=Jy9hoH=&V8m<(VihCZZKg&Vg(C*5({Zx#s2I1UfjR;@#h}wJ#4TCi67ecLNPKJMc*XJXNnz-OIe>Mo*n5@daxF|VA zWgwL0?KYy=4qJLk? z{Ea{U3-d&WV|`MtO2j4JoPprekL^8!w4&u=x|4aoR#S|mJ&%w3&6vU}t|LoM+Osu_ zN3z2#XF#V&UAHdWarUBFNFtk?&zgSaH_&?croxeqbP5CcP(;zEf0R=~>K+NDsWn9g z@?}kFpPh4W*D@vlnzYYkN;Bk*BQhl}rLd2bxqt1g%9s%zGNtF!I27STEv=Q<$QVK@ zZatcFUcFM{iQ)W`(d_(a=tb4}dVr!v!+q0aahI|d=b4NSIw4ta>ltqpwRxw56Immu z#J^}0ztr`bitET`WWfb$t86(9bUI{u33M$9`$>^{hNaX0v=E-r&o4&a!VWpK zVzUQRc~2Bvn;ywfd4nQVC4qYN`e~K0)y&77=C}DoSmwq}&}RGDD)F>8&vwG6sc5&N z^v*Yx9VNPp&~>rCGo0@#mr<*k8NNCz7wV>}@UWO|^CC4p@Z^9n@r#6_rDoD^oKKT} zfujI^JT$o5;T+xov{%2^bYi_`{$9hHpM=>ZZr_p*%MGNzd7_~RJ`5Lngh9P?2n8aN)YiLxPa|8tU!gDy1^8i(w`Ir- zJ>Bd|5NFY))8DYAH2>zDUgz6^rAt9SSRXWR{!X<`>Ve5;YSU_IlF_wQB2kOg7}r@* zuvSfLBAE@j8ZI!?LVSyle1|N*-{LyIqzNQ740Vy`XnJ(oDIpjwv3$R4PSV`WG+XS1 zgKavZ=$t!neRiX$Y<2Ka)OeC1CbP{5^T>wrDEdp{$^G-z};i z&i6-|`!qx6`_$@3GJYd>K`wnyA&woW^P@N%e;iyti*E@4>1%;+JTCMeFKeplC{K_| zXiVf4%4Phi!H2P9eitc8+X64y+s1CB_Rh6Nl><|+F0N0fmS7h`dHUhC!a#o5Fw9Bk zW+l2#rKr{fDOhSO1u1};cA{UDpfY6*`Mm1V0Ab@NX(FH>WzeYJ+W)xf<0eD3G>BAq zgjqPhU1FPz4pei+>!xS$L|ag;fmnf!m!IYQDbP@RwCU9|bkv97+xdgO)%&;KcmTC; z|K1R*=I_=Q3NMUCxke2{La@=K9K9Lk&{KCkR5RsHG4ggB#=obRg+4^f+BmPMMrXyO zB(^t0M6dEd%_?%5o?#+pdXog{Z}pX1edf3Z$F;QmP7frpo6QPYbk(11?S#vq50F9E zo??bHGOAR<#?~HGv>fawfZ+H}+d(=Mq24tRuj}M8_M(N`0&@r!4-pJ_y=7N z1>TgjaO?fu&);%7=wNNsRNE8%mF;!9=eiF?S)B$+dA4$36;kE|Mi;-wLtEXXW@8Hb z(3bSKX-AmHS$S!b=@6uU@p2Asr5#72f!#eDfU2oQ3DKrPeydbt$xZFjPQ^#=JlpCN z(`a&uyi~i|@_HX^ZE^R(1a3@y-gSr6LOlW=Dz;vz;)@2GGa7hQsf*$vyV zL^O>JI}F8K8GEsKY=S||U|kt8DgZoaaL{qT05nuTqTxgZiHViM!Yri8*QCpoeKQUw zFRv#uvA_O|VQ}x*`3YZw5F!|WMD8cJNcc+?e%JfzX$xC1U0|n^=h4fJYx{Z1P5V!N z#Rd=Jv0OjhuA+i8Hi-dVulplD{F~N|8dK90c4Uf8#GT9iTiQFD_WEK7dNHb18cSm4 z@MlQPqp;IElVI*R1ay7f8{_k`P4DDLB7*H7gFm#R9Z-C>PSOo)ABhR#iM_KWruCu00{YV08!1ikmvhx~q2aE8Qus952!BgKl}B`l;F)3p(>m z1MFdr><%TU%^&hJ4Od*`{GHnJe%4@Vmo%lBxrLGo@~u}?^mp8K@GE-`l+|*}{2XVl z_0u#e{b)a!i=su&+Lxdp-+3qbnuL53D~@!Jre>0ZpfiSj+;e^fQ&*iatmnP=XIz*S zUiLmz%aI9~obpiM+@2c5t6)k;5#~m zwBOfMo%lwozng;=xi-4{C@A4MsKg$KKDScbVx|_>bdgbwxSwp{wK!3ze|fP0y}2zh z7;jztp*AaDM-Ge_=mOuvuGF2$c1Ex&MABHH(_GEBlniGBGx7Z z8qbwB^9dGO*tv|nu~>yVxT6MJV_Us>_)e^yBCar9ql+^clDGUnc6cYrna{NwiFM>F z`stWEZ2a^haZTpzs^g_$i`cd8DD0iod$YYijLFl*F&kgy@0h=fH>2Zt6H%fEhi;#r z)w3D>3ZWX?U)rG#4lC^oqfXeWeV1n>l(4?_p0RrpPHUfGL#l-+!Q^GdCFinW>uTu>bg0HX}Ls1K!7VGoBC-3cZP}+y1j*#?;fFmcvFA?m7X3SwRP# zeFZC14l0(>I*Bg;R8Zl^gi|9H09->Yl z5!3ZNV{G5JKMrzT!gS})%Tt2jjPuCtKmkeb;X`{zRf`L8#yKxZzLB*p)6-3(w*E;V z&n42|J8aXs@EgQx$;L+`_L;)G~?BS8;tOf@ofn&up`$NS6!e!{BhTXRMduEYv`CU40geJL|v(>A)dLG zdTGnNBjTh8qn-Sx-CrZ@T&JAfDmOkS@*5?Cq&a4NtYb{c8Df$;?sT^zKZxgH!UY;; zAvgLVP$>P9$#5*_L!GiTx*g=wXkLZ-uE{X}3v&IW%l`dK8E#W!yWb_Dx)T^=we*Q* zWNJ=_&8ePKh9y5H{Cp$ekko=G`Ih=JF6m{5k+vDo@7fr}y$9+Za7_qic$qEP!$kra zkJ6F16K=~%(1|lW@*s)1&mHsysgFZkqcAFM4lb@Fqwio%VMN~r(T-2dVCWhkTYajJ znp$uwpaOYB~cC zKf`o8MnrS)hc`r2$<(QdiPAIAKn1A)C;$tbrXUlZdi+r@gm@)EVXYdMY+)^wP(V&b zj>*pu<%y_#-Sme>R5-qG&n$y@SK2DjW&%E@5K4o)A?Ka}KTPf%;s(l^k9kd$g`927qsKH@z|weFKgiEdxvAzq{l%tB`9ueT~Bq98EK zfd5!%B=mO@p!|7?Kjm7{+Ogt-n^Z0c0>I9B?9D~AZ~*HFX)W@z{}7y&6nnT%^75xgM}YI`SYcq zjvHjN^!e0jP|bNa!(sU0HPpfIv9@SYMPJ+D3(+Sv=`cT-pREDc*uij+fQsP|S2j9t3j#3SJSw!vR`ZLh^9C1s;F_IS&(om)kFmfp*6{&z}vQ5|F^@)1y3V-IGkc z6m$H$o1?tXzqrCSMhcv{9y1L&fgjr&JV+YJ?sn2(J{qcm-JxWiCZ1j#o7pIAGgTBl zX#h-`_n-j%(37>Ppma_2zh&F(KwZX)Q89M0`A#q;nQz0mKeH09V13?uU z^ta7jZB(`y{UE@8+Ox4F` z4Lbvnw!Ygcfox(3kAX#M(cnw}7ESbGfWZ6wxiIKDr?gdfrf7wPpoP*l0|J6q{m-)i zc^oi#I=h8#$4<>kgQ zmcOb2zT|hNv!|7vvgSyCHjNF#(1X>V^B#r!9*ZjzhXO2ooeRsd(m_W-UHQvj%8YYj z&rpzq@DfuHMj+q6saADVWPj(L+sgOXJ=t3Cj-@`9-bE#&m+7=IkJBIV;bE~zif$$U zxlom}YMTo2aw<-(w*8r1wv{>GkF>e7@M&7Hn5ErSYJjWUR+Y(akqH0tjy6E%+Uuik zdMcO|Rg$nA{9!MaKILs3FeVqBGcauamqmITFJv**jo6r_v3Q0a_fEWQ!Ay(LdfZZr z68!F=JA&F((_}DWhTdz8Ak|B^S*Hn3Bu{rcW(3)#W^}24D@yQPTG;~_gsRj*OU^f# zkqXnN>vn=sCfIIF==J88jH^>peawg;le7h*C77j1eswiPT?CZjNY>yx6D9CXp;yCD zZ-Plfc?&ITgd?w(eh;ACzP;I}&UH!CphXW~1wMO*>dve)&8L%gn3y4ulnY4sDk2;4 z=aX(N@0=Sc@rDG6d>FrzMB!QhO?$2mj5e#HUw53XUx1En|z|TwSpcjKGOA~y+G7|b2#!VXA zeH8|Tr+guCN_U+}9-BHDTPXs&zwSPO?;QdK8;^{Q>kz>)3b)xvlm0W6+s#= zJk9_vFEk=qE1qAhatbxwd{98wO8`|$UVB|2HJ{Sn`(_<<<}Xf`v+GI8iwW)oY?pL% zXt4Llt;{-bJ*{APXWMR8>W5tDv`@3HoTDHdQBg3U3eRg5&?v9_NZlB^O-uk4=Wvi& zYrA9=k^T-_e#y=RTacs~zR1E53c0LczkL#0X2y28=OGdohn&u3ov^pz2CpW$k`Fh3 z!V4X7#B?hkx31DW@w`#b~1_?&CsjZofO`o$+6UQ5~D{tk7ACdWQxT z+eP|E2Lff%hs=ChM~G##KQ%kFK!ujYN5emT0uPbRlBMG0o)3-qOo7;|yjDXSVXW8L z!WpZYmES>yJ}rn!m@9Rz&%g!}(0DM29TDTiZS!gWDOCpZg(u+GW7by{*%o&HWEr$0+*REShAiDp$+* z?>L`e1~r+*!POTd)1U(9d(`k*HmPn_C?5H6Grwh^wov<$6 z=cfhS`xxJ%7Sq+kxNj|%MK%KE1;W5K8y`87A7AZJ&E%gu&Es;ob>46nj$)!a)&opC zW-oFfvjsN~Omxk&)GOM-fBi7%;nLAqSCPcwZo1F8o3ZrbzHv6ccyJ@p?ZmjEJGQHu z|9E2d*#ucNGJt3ySWXwjdvOd^hge|4s!}*2P+*cf1M!9E1g!aKKn=C825##Dejxy> zg5aG$99JP}!bP?@slbk-o>8WPdR^+y82Ip>j0#okotmhO_Z;*mvHjjarUg+ZmIwGA zE1Et?9FJf1S;lvYG|=C_v5*>GS94sswrJ*7*{`0*T<)9P9_^L$U_Twwf@Mz2;s0@c z=X9pJIa`R@pBV0lbcaIX5YU@l<+5wR`po3z;oeCD0vM~nq%fob)_MPgCy@n?i1!u*Rxy*5S01D{wi8NfS_Ku^nUt`JWoNNX#D05Us7>iJr z&KYKM(SY26A0;(TM61DuLDkbb%Maw&mYyj-&xz(lFF!mt|9O47_aW-$Ngltjbw}aU z06FGBqmW8(FPx{st|r5b#j2(P+uzytis7Lu`fMZG%XX3)&5EbIy%_Wk!l4V$l9!kF z%XM+@Cb~(Ek{R)y;L(hkFqwZ+4LCNk!b4}7d{)74W;6mn2auag8HA#mR*f71hEEOk z#=NP2n+%kA3J8(C?dXlt-ZPwS)kA!zr6Egwt;MDr{*J=DL84Tk3yi}{R}o|B*n+(94;UNUnbMvs-9nZeK`l>M zsE%12RkkmMVS8f^%Dgoy{*4Q=Sa`0~J5ZTg5al@G6+oMwTK;q|kY^qszJxCLsiG2S z=yWJ21EQDX$L!6Xn&?3~eYhk}Svk#wKE(&4 zG|v7EMPa$5R_BF^b#M||I0o~?&dQ+%^7@v2wfhkTXM?V|w80-Z*`*X8$s z4g9Co6oz4HeQTFL=rMuOI~E~)czOTmoI1_4+s&g=9eR&znDw{L$gX2rboO5hdw99HW*1e2f!ix)QMwD$88juX_G(hAk*{(MXfI}WS zhWUY60%rL1EahpUqs?{5YZpXNGY^A&H~BrrsavSzW%Sce9x)8aS7>}dg5SGyVE!$e zB!YdfZhhhL#seDBF3bzNDYmvY_+-TRj91UOXNEM7HCnGu-!b`y=)E7YTX0Byb859U zdriQ+G)eSf{y;UFh!%k63CR_I2s<3(ssv954PAFqS6I72{dhRZdm0Y)JCC2gIjeoqK?;lG_}hxE#UZN4VI z7fe8WhPw_KK3aMk{4&}=FH{1eWfxH}sNc=NfiJIR`_zIM$OKO{CZCYxYUHW?h}Mfu z9K8cKG$${}VLfym{$qv#2+a{Y8}Y{rzU+ftFKKq|Rau|xFvXkv$$wmsv3?}#3iK)) zo{H4Ey7LDNkZ?C*t#gonKyXS#OH_dXa%BMK=;Qjvm?h z%PZPyod=x<(J1maE0N{_BhsgR3wS`TA%9Uh>+St(0T-D7O3_JNI}GViL1oSpKWm}h z=`FyVacue#A5nQCWVZCl}vhP(b%-nLKnV) z-Hzt4mud?GCGX17lKx#9jmqCGm@9h&hKtf~lYzkbw7;SV|F7n?b1@FHP=DDKV+-OU zxNVIt>dX1r;?dnH)PZr0yGCZfBCJ{s$DzNDwJPy37`G#risw@g@c#M>z-DckY>kY1Sa6{5Rqy&>bs zK*k2XbxB-?!Mq!zj+V9Is)Y7aiZx6^L<$XzS-*4r4wm=P{{GCt^5{(EsF7nGwSlW- z&^x>9*_6|8{0jwqwF54$o5k6-aAbzXfP{VFJBRP3&;(&?Gto#h%z|o$Yo}azr|+uR zkBzX>{&LN=z%)Ot07bR7M$kJy*KQYe;BmeNG}T|1X+CG$T+qQr>Ql&fi|N;-r^qOV zrRaGP$#Nz}f`N_mSmqtNZx6f1_}1k9`UGNoWvGvxV|Gj_&OiQnc)XS!09K|FHB!0$ z-eCZxj+pOnr7*W^!H#3Owk!`B#`T(@40E^;LD9uP$lzOT%y(>8dhfFPW6pN2CkwJ9 zGl}&~!<)q-=%I7Wu#URX<}J!B4X7bP_A@7Kj0P`wQRH|XPjcE&-vLRg1A=V=sX42l zNZZ4Q7a{xld3m?dW|4sGTG*e||JWKiLFhA2J@lQQ!04d!7jHNh-vNFgS&o4?&v7W^ ztDlTiOJBPK-{05}M3XG%oEF*5?Nhp3?^VsyL*!ZNNq!;5MLhq}?L}K?6O8 z!l0OgaeJ*UEWs)HgFqp5-m7Mi^9VI(U?WewvS9NF7H@xr!XXxW>YiJ@8E?LuAiLH4 z#G%A}z*+txBf#?V<>O+Gxs<*mpm4knXMaI&IwBWw?OiGHay*aJU-ll8JVaj4a?qXf z!N&B^DjSl?Zy`G>V+Sb(;!P1}dn7)CD_JWt9*alu=kG}KLFJQ=4AK2%C>MJiJLS2W zddd;Ag{0~m53|eawVycOZbw7JYbup003h$?5^`mNK)=?2vVJYzVg6FDKQeI!d%g6A z2^8-D0`kyg2Y9xM1*Ej#p^ZQwrzaU9-*Ia@2o_h-y7l82D7VY^MF#T^xKZM zizt^L*go|`!tktD`k(X49*m{TnQK_z^36*y-6I~8jc}J4M11`?QUrMjy$?9WMgdiA zhZsoQVv{uwNZnCFfx8^nGM;b`PYYY|@W9g~{F6=14kSN;%ZA|_fpfT5P9(}bE$~ai znpl`rL}x1iwQ_G8G1xuA-MGqUU?+>86^Y!RCtN{6`q=NYAfQV7NZ-Mg_VKtnG*v}O z>0#aBO5q50L`C)Tt$5YNvK@iLj^@KIo4=|uq>qltyWV^a~j^}FEhN{o>ih+UOW6lTsB~1cg?leyBF{QsU zku{73Qps(Fpb0q{(>+QK1g=`aWb@la!#{m%qpF23c(wDR znlo`EqE@FWC3O0(jn&)J9$$jkIKgfIt+n2W{V^7UCB$VB>`prsF^H=QAQQW zWKYW%w=~bha+OsszxAIW;WdS23d9U0pB9U-YOidQJG}W=)FqLxW)WJP(+EmzqH}fM z8I!J-^}Cy)XkS>9|*j&J*8gw zF-MjwL!c|a1EG$bv zCv4Tjw2g6q+-2Pyy;X;Lw);Jm1E;Cs#%?%Cd<&iVEJ>mOo5I;b zN8bE!XDXE)Zf^~ppM-6Vim*k$KPuIXso>EYkIHW<4`o~??3?+Z9)x)08F3a&NKzeP zBJPxl+Txget0#oPfz#?w$!(Q=Y8>=J#tHmrOV&_?6j(IIg-*%!1DmNSHJmoHpx>gu zW+&`piFkHp7#YZ=6r9NOI_)%xVa!kp7~}53a9hYc)7h+2ySLOR+6kWz8AGT0&*9=m zf=}+u5jt?I7$Y%1tf~32s;3U`GrV-e-^&-V>U~ooPtqv{gwvZPPg6iCjx!8#s9UM? zivA*P?EjFOgRnHpLIHT#e2XG7=K3?TCP=3NRuS{u zO$F9HNjO+?qiD~Bo&H8K(Rz<&P(UR5pX;lpuIsY1BexIg2YcYeg=Lzh$EO;5t>lh0 zyg$;jKkK%8^{@crI6YW&=zV;4g~bb8rU&0mpU5rgAm(-eZAzIWE=r!*YAl(loZrj? zC@T;_;FNE-ce^}z4z~-Po{s;D*@@=*Y6hpw$t?V$q$r%T)$woub4ld(83pkmgy~No zz{BJGwj8EOVxT2^0oP~cUC(SCT{E%i(7nrK`M5)4qVe(3lffuW39LbK;<$PW{Lw?J zhib8NTKMNOS9#mhLVuuhkGh6`-Z`7awk5|E$HvU$ytJE_0C`RDT9)?#Q}FV#-)xZs zPOf(pVMNU3SK|;l%jo*`NNRVETME(3s~WZ1(PJM#0P1sB;Ob*#TLH=3L#F=H=N$Vg zf~#_c_1l?+b>|D^kPsv^iDvgsz=|N|)G(>V?djcL5iJ?z&5aG^3WILncQd{H$4}i6 z0kqUEPv^MMvmu1C+Ae=nyLKEztg?2Jl?Zop;<;WTw87Wc*Q3c9L-;|eT1dvalN78T zNxibb_w(|lB43b|61H&d6$**8O7^t!T{9t-)E@%0=Ge};4Kor9OqQFLhY;S+ORy{M zJg_KkwNM_b!fud_e)RNhrP3Si6E0`Lw#CYWzP(E0M&mm^BGRHl21dn3N zSOo5GAIU$CPU_mKY&7>X#QxUF3{C1gM#XsAhd~1*h9ld?&qvI-WuXkXm;gf1CVmw^ z$9{q0k8RoLx(Aa}ozKQVqtSe8rF&4sA_aS(pKc;%izU^*0&V_LRfuUR5jdegIZ#BA zryaOZg4C}S*Z;n+HT^r-6-lg($dRev7=B9Nr;9- z(Yo^njD6;ag5Ukpn?vYuWPip%2Gp?AN!!-6EVB5Cm4Pt{BR?o4_0&O`{tm@v&y_q4 zEJBii)DnS=t!ksora4-(&$r=2!z>|ZAz`$+#t-%|=u#I=KA~N6Wk1PL<#|mjDnI1v zag3Vs%;sJ*?)za>!KoS<9Vr=1CO?OBO3L$0k8BM;-80MIkbNLyXychoW>BMwG=Sdx5@KJZ4uwM(^x%ZjfQr$w15)Mp&B`DbS|Rn+=Tv)^xZeD)a_}T9+Y7nWc=)CFYG;BR)~f4qD6KhHNE9oL zQ6#3=aPiiJ?=L)f5YSRX$M4}l%3GdE$gVDRfK6EPO}G=`VyaLFzfo1sYCGLNJF?|? z{8f0L?KsZx3Z0rHT%c~>clxz15hWl64eqSZ#qr5b<{D5WeU{{Sde92jrSJkdTk7lE ztu|kNoN>qLEB+BTYk}h`w=59;Qe{9sjOQPXUoc$lWRpq_2|*bpj{A|l-h99Q{8kX8 z_U)uMN8lI5PPXgq^XgdzyH98=b~I|>nz^xtR6cT&{Z80t1;5++(!m@Ayk2&Q-C3Dd z4F`4-PT<>QX7`7VeGx>oI7|Mrj(BK0d^UY^v<6HU_vq|L>|VonbsLJcy0T(EK4sne zLer~{8PXhRqq*{w1H9ARQ^XE^Y+GD_N}u;mueskL=VuFlE74qiT3g>-LXVKI!tR-~ z5u#4K8%#ZdL(q1`oj6^u@V--Yp3irQN53VpYBGgg8=Zap6x45cYoDcT*i~k1k+q0Y z_mBMoyT3o8e)622+vL+DXRg&BI?850Kf8L>MxswFzW3Qw+KCvM4n!oG(AU2zc+iej3LcJI6pjbF@|P|H*s>dzBNK1>9{=KzvpHYFR@>0o=$ZH znN7l+;S69wY^$#FVS+@|vxK(0r=}>rfzegV#h(lyEe|`?&des}%2wrZ_!LGOjsg|) zo3-$yFMVIfPmGQdyYwBoE|9zqFldP$vCJNmQSGdPMU8Q)EL$kVC4u#6(K4LAqx(Mr zECSR0SJrZrC|78ppRluQl77UWK32AN9M0nixkhWVuM-1=f==ta)S*j0QN(>zaiyAk z{>U?%JL$4L!Z4!!CWUD^oaIF8ehfo1*&yGboEqYK*5(fKZVvA|#z`?Y67D4nN87so z_m9s`rL+slXTD0h^?LEq7%4b!Arx3OwB0XGUel(9elGgr(WhR{-z|~u_Sv{x`lw4z zBrcnWXIt@sL3(x9y$Fvi-?M7J?lkxv!z|Pkk1y({#Op=dM6`LeTD0^#Sui%t!%vk& zSC4;&ldd!C(+`u0xst7mOzxIepL4l%cDGW9t&GlhQsMQRe0P5`73&k4s$^wD`nST* zr6nP&YE~0(-@O?PxAAYi5e$c>22u|ux8%HaV&|1p@n=hN8AB-NrfpS}{clV0m#%Yc`?gxH6$_=RWq!8zwCoM4X_w2#A4&(sLSFs3lv?&JwoXj*F5b{) zg5xdW_Goe=(CoRm{Gp2LWa;8dfqWL9|}_2|C=UnWMT9Eb;H4?h?g!600000 r000000000000000000000Qf6cQ1dQxVVZ~800000NkvXXu0mjf5mJ)2 literal 0 HcmV?d00001 diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index 55d2ea648..e7f0a7390 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -89,6 +89,7 @@ import cohereIcon from "../../../public/Cohere.svg"; import voyageIcon from "../../../public/Voyage.png"; import googleIcon from "../../../public/Google.webp"; import xenforoIcon from "../../../public/Xenforo.svg"; +import highspotIcon from "../../../public/Highspot.png"; import { FaGithub, FaRobot } from "react-icons/fa"; import { cn } from "@/lib/utils"; @@ -2912,6 +2913,13 @@ export const GitbookIcon = ({ ); +export const HighspotIcon = ({ + size = 16, + className = defaultTailwindCSS, +}: IconProps) => { + return ; +}; + export const PinnedIcon = ({ size = 16, className = defaultTailwindCSS, diff --git a/web/src/lib/connectors/connectors.tsx b/web/src/lib/connectors/connectors.tsx index e26631864..f644bc6a4 100644 --- a/web/src/lib/connectors/connectors.tsx +++ b/web/src/lib/connectors/connectors.tsx @@ -1249,6 +1249,47 @@ For example, specifying .*-support.* as a "channel" will cause the connector to ], overrideDefaultFreq: 60 * 60 * 24, }, + highspot: { + description: "Configure Highspot connector", + values: [ + { + type: "tab", + name: "highspot_scope", + label: "What should we index from Highspot?", + optional: true, + tabs: [ + { + value: "spots", + label: "Specific Spots", + fields: [ + { + type: "list", + query: "Enter the spot name(s):", + label: "Spot Name(s)", + name: "spot_names", + optional: false, + description: "For multiple spots, enter your spot one by one.", + }, + ], + }, + { + value: "everything", + label: "Everything", + fields: [ + { + type: "string_tab", + label: "Everything", + name: "everything", + description: + "This connector will index all spots the provided credentials have access to!", + }, + ], + }, + ], + }, + ], + advanced_values: [], + }, }; export function createConnectorInitialValues( connector: ConfigurableSources diff --git a/web/src/lib/connectors/credentials.ts b/web/src/lib/connectors/credentials.ts index ac35beb29..0b34d47cc 100644 --- a/web/src/lib/connectors/credentials.ts +++ b/web/src/lib/connectors/credentials.ts @@ -226,6 +226,12 @@ export interface AirtableCredentialJson { airtable_access_token: string; } +export interface HighspotCredentialJson { + highspot_url: string; + highspot_key: string; + highspot_secret: string; +} + export const credentialTemplates: Record = { github: { github_access_token: "" } as GithubCredentialJson, gitlab: { @@ -353,6 +359,11 @@ export const credentialTemplates: Record = { gitbook: { gitbook_api_key: "", } as GitbookCredentialJson, + highspot: { + highspot_url: "", + highspot_key: "", + highspot_secret: "", + } as HighspotCredentialJson, }; export const credentialDisplayNames: Record = { @@ -488,6 +499,11 @@ export const credentialDisplayNames: Record = { // GitBook gitbook_space_id: "GitBook Space ID", gitbook_api_key: "GitBook API Key", + + //Highspot + highspot_url: "Highspot URL", + highspot_key: "Highspot Key", + highspot_secret: "Highspot Secret", }; export function getDisplayNameForCredentialKey(key: string): string { diff --git a/web/src/lib/sources.ts b/web/src/lib/sources.ts index 7a3341256..6a1bd0ce5 100644 --- a/web/src/lib/sources.ts +++ b/web/src/lib/sources.ts @@ -44,6 +44,7 @@ import { GlobeIcon2, FileIcon2, GitbookIcon, + HighspotIcon, } from "@/components/icons/icons"; import { ValidSources } from "./types"; import { @@ -329,6 +330,12 @@ export const SOURCE_METADATA_MAP: SourceMap = { category: SourceCategory.Wiki, docs: "https://docs.onyx.app/connectors/gitbook", }, + highspot: { + icon: HighspotIcon, + displayName: "Highspot", + category: SourceCategory.Wiki, + docs: "https://docs.onyx.app/connectors/highspot", + }, // currently used for the Internet Search tool docs, which is why // a globe is used not_applicable: { diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 901aa85d7..c2007f7c2 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -390,6 +390,7 @@ export enum ValidSources { Egnyte = "egnyte", Airtable = "airtable", Gitbook = "gitbook", + Highspot = "highspot", } export const validAutoSyncSources = [