Highspot connector (#4277)

This commit is contained in:
SubashMohan 2025-03-17 21:06:02 +05:30 committed by GitHub
parent f45798b5dd
commit ba514aaaa2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 1019 additions and 0 deletions

View File

@ -50,6 +50,9 @@ env:
GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
# Notion
NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }}
# Highspot
HIGHSPOT_KEY: ${{ secrets.HIGHSPOT_KEY }}
HIGHSPOT_SECRET: ${{ secrets.HIGHSPOT_SECRET }}
jobs:
connectors-check:

View File

@ -174,6 +174,7 @@ class DocumentSource(str, Enum):
FIREFLIES = "fireflies"
EGNYTE = "egnyte"
AIRTABLE = "airtable"
HIGHSPOT = "highspot"
# Special case just for integration tests
MOCK_CONNECTOR = "mock_connector"

View File

@ -30,6 +30,7 @@ from onyx.connectors.gong.connector import GongConnector
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_site.connector import GoogleSitesConnector
from onyx.connectors.guru.connector import GuruConnector
from onyx.connectors.highspot.connector import HighspotConnector
from onyx.connectors.hubspot.connector import HubSpotConnector
from onyx.connectors.interfaces import BaseConnector
from onyx.connectors.interfaces import CheckpointConnector
@ -117,6 +118,7 @@ def identify_connector_class(
DocumentSource.FIREFLIES: FirefliesConnector,
DocumentSource.EGNYTE: EgnyteConnector,
DocumentSource.AIRTABLE: AirtableConnector,
DocumentSource.HIGHSPOT: HighspotConnector,
# just for integration tests
DocumentSource.MOCK_CONNECTOR: MockConnector,
}

View File

@ -0,0 +1,4 @@
"""
Highspot connector package for Onyx.
Enables integration with Highspot's knowledge base.
"""

View File

@ -0,0 +1,280 @@
import base64
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from urllib.parse import urljoin
import requests
from requests.adapters import HTTPAdapter
from requests.exceptions import HTTPError
from requests.exceptions import RequestException
from requests.exceptions import Timeout
from urllib3.util.retry import Retry
from onyx.utils.logger import setup_logger
logger = setup_logger()
class HighspotClientError(Exception):
"""Base exception for Highspot API client errors."""
def __init__(self, message: str, status_code: Optional[int] = None):
self.message = message
self.status_code = status_code
super().__init__(self.message)
class HighspotAuthenticationError(HighspotClientError):
"""Exception raised for authentication errors."""
class HighspotRateLimitError(HighspotClientError):
"""Exception raised when rate limit is exceeded."""
def __init__(self, message: str, retry_after: Optional[str] = None):
self.retry_after = retry_after
super().__init__(message)
class HighspotClient:
"""
Client for interacting with the Highspot API.
Uses basic authentication with provided key (username) and secret (password).
Implements retry logic, error handling, and connection pooling.
"""
BASE_URL = "https://api-su2.highspot.com/v1.0/"
def __init__(
self,
key: str,
secret: str,
base_url: str = BASE_URL,
timeout: int = 30,
max_retries: int = 3,
backoff_factor: float = 0.5,
status_forcelist: Optional[List[int]] = None,
):
"""
Initialize the Highspot API client.
Args:
key: API key (used as username)
secret: API secret (used as password)
base_url: Base URL for the Highspot API
timeout: Request timeout in seconds
max_retries: Maximum number of retries for failed requests
backoff_factor: Backoff factor for retries
status_forcelist: HTTP status codes to retry on
"""
if not key or not secret:
raise ValueError("API key and secret are required")
self.key = key
self.secret = secret
self.base_url = base_url
self.timeout = timeout
# Set up session with retry logic
self.session = requests.Session()
retry_strategy = Retry(
total=max_retries,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist or [429, 500, 502, 503, 504],
allowed_methods=["GET", "POST", "PUT", "DELETE"],
)
adapter = HTTPAdapter(max_retries=retry_strategy)
self.session.mount("http://", adapter)
self.session.mount("https://", adapter)
# Set up authentication
self._setup_auth()
def _setup_auth(self) -> None:
"""Set up basic authentication for the session."""
auth = f"{self.key}:{self.secret}"
encoded_auth = base64.b64encode(auth.encode()).decode()
self.session.headers.update(
{
"Authorization": f"Basic {encoded_auth}",
"Content-Type": "application/json",
"Accept": "application/json",
}
)
def _make_request(
self,
method: str,
endpoint: str,
params: Optional[Dict[str, Any]] = None,
data: Optional[Dict[str, Any]] = None,
json_data: Optional[Dict[str, Any]] = None,
headers: Optional[Dict[str, str]] = None,
) -> Dict[str, Any]:
"""
Make a request to the Highspot API.
Args:
method: HTTP method (GET, POST, etc.)
endpoint: API endpoint
params: URL parameters
data: Form data
json_data: JSON data
headers: Additional headers
Returns:
API response as a dictionary
Raises:
HighspotClientError: On API errors
HighspotAuthenticationError: On authentication errors
HighspotRateLimitError: On rate limiting
requests.exceptions.RequestException: On request failures
"""
url = urljoin(self.base_url, endpoint)
request_headers = {}
if headers:
request_headers.update(headers)
try:
logger.debug(f"Making {method} request to {url}")
response = self.session.request(
method=method,
url=url,
params=params,
data=data,
json=json_data,
headers=request_headers,
timeout=self.timeout,
)
response.raise_for_status()
if response.content and response.content.strip():
return response.json()
return {}
except HTTPError as e:
status_code = e.response.status_code
error_msg = str(e)
try:
error_data = e.response.json()
if isinstance(error_data, dict):
error_msg = error_data.get("message", str(e))
except (ValueError, KeyError):
pass
if status_code == 401:
raise HighspotAuthenticationError(f"Authentication failed: {error_msg}")
elif status_code == 429:
retry_after = e.response.headers.get("Retry-After")
raise HighspotRateLimitError(
f"Rate limit exceeded: {error_msg}", retry_after=retry_after
)
else:
raise HighspotClientError(
f"API error {status_code}: {error_msg}", status_code=status_code
)
except Timeout:
raise HighspotClientError("Request timed out")
except RequestException as e:
raise HighspotClientError(f"Request failed: {str(e)}")
def get_spots(self) -> List[Dict[str, Any]]:
"""
Get all available spots.
Returns:
List of spots with their names and IDs
"""
params = {"right": "view"}
response = self._make_request("GET", "spots", params=params)
logger.info(f"Received {response} spots")
total_counts = response.get("counts_total")
# Fix comparison to handle None value
if total_counts is not None and total_counts > 0:
return response.get("collection", [])
return []
def get_spot(self, spot_id: str) -> Dict[str, Any]:
"""
Get details for a specific spot.
Args:
spot_id: ID of the spot
Returns:
Spot details
"""
if not spot_id:
raise ValueError("spot_id is required")
return self._make_request("GET", f"spots/{spot_id}")
def get_spot_items(
self, spot_id: str, offset: int = 0, page_size: int = 100
) -> Dict[str, Any]:
"""
Get items in a specific spot.
Args:
spot_id: ID of the spot
offset: offset number
page_size: Number of items per page
Returns:
Items in the spot
"""
if not spot_id:
raise ValueError("spot_id is required")
params = {"spot": spot_id, "start": offset, "limit": page_size}
return self._make_request("GET", "items", params=params)
def get_item(self, item_id: str) -> Dict[str, Any]:
"""
Get details for a specific item.
Args:
item_id: ID of the item
Returns:
Item details
"""
if not item_id:
raise ValueError("item_id is required")
return self._make_request("GET", f"items/{item_id}")
def get_item_content(self, item_id: str) -> bytes:
"""
Get the raw content of an item.
Args:
item_id: ID of the item
Returns:
Raw content bytes
"""
if not item_id:
raise ValueError("item_id is required")
url = urljoin(self.base_url, f"items/{item_id}/content")
response = self.session.get(url, timeout=self.timeout)
response.raise_for_status()
return response.content
def health_check(self) -> bool:
"""
Check if the API is accessible and credentials are valid.
Returns:
True if API is accessible, False otherwise
"""
try:
self._make_request("GET", "spots", params={"limit": 1})
return True
except (HighspotClientError, HighspotAuthenticationError):
return False

View File

@ -0,0 +1,431 @@
from datetime import datetime
from io import BytesIO
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.highspot.client import HighspotClient
from onyx.connectors.highspot.client import HighspotClientError
from onyx.connectors.highspot.utils import scrape_url_content
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import SlimDocument
from onyx.connectors.models import TextSection
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_processing.extract_file_text import VALID_FILE_EXTENSIONS
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger
logger = setup_logger()
_SLIM_BATCH_SIZE = 1000
class HighspotConnector(LoadConnector, PollConnector, SlimConnector):
"""
Connector for loading data from Highspot.
Retrieves content from specified spots using the Highspot API.
If no spots are specified, retrieves content from all available spots.
"""
def __init__(
self,
spot_names: List[str] = [],
batch_size: int = INDEX_BATCH_SIZE,
):
"""
Initialize the Highspot connector.
Args:
spot_names: List of spot names to retrieve content from (if empty, gets all spots)
batch_size: Number of items to retrieve in each batch
"""
self.spot_names = spot_names
self.batch_size = batch_size
self._client: Optional[HighspotClient] = None
self._spot_id_map: Dict[str, str] = {} # Maps spot names to spot IDs
self._all_spots_fetched = False
self.highspot_url: Optional[str] = None
self.key: Optional[str] = None
self.secret: Optional[str] = None
@property
def client(self) -> HighspotClient:
if self._client is None:
if not self.key or not self.secret:
raise ConnectorMissingCredentialError("Highspot")
# Ensure highspot_url is a string, use default if None
base_url = (
self.highspot_url
if self.highspot_url is not None
else HighspotClient.BASE_URL
)
self._client = HighspotClient(self.key, self.secret, base_url=base_url)
return self._client
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
logger.info("Loading Highspot credentials")
self.highspot_url = credentials.get("highspot_url")
self.key = credentials.get("highspot_key")
self.secret = credentials.get("highspot_secret")
return None
def _populate_spot_id_map(self) -> None:
"""
Populate the spot ID map with all available spots.
Keys are stored as lowercase for case-insensitive lookups.
"""
spots = self.client.get_spots()
for spot in spots:
if "title" in spot and "id" in spot:
spot_name = spot["title"]
self._spot_id_map[spot_name.lower()] = spot["id"]
self._all_spots_fetched = True
logger.info(f"Retrieved {len(self._spot_id_map)} spots from Highspot")
def _get_all_spot_names(self) -> List[str]:
"""
Retrieve all available spot names.
Returns:
List of all spot names
"""
if not self._all_spots_fetched:
self._populate_spot_id_map()
return [spot_name for spot_name in self._spot_id_map.keys()]
def _get_spot_id_from_name(self, spot_name: str) -> str:
"""
Get spot ID from a spot name.
Args:
spot_name: Name of the spot
Returns:
ID of the spot
Raises:
ValueError: If spot name is not found
"""
if not self._all_spots_fetched:
self._populate_spot_id_map()
spot_name_lower = spot_name.lower()
if spot_name_lower not in self._spot_id_map:
raise ValueError(f"Spot '{spot_name}' not found")
return self._spot_id_map[spot_name_lower]
def load_from_state(self) -> GenerateDocumentsOutput:
"""
Load content from configured spots in Highspot.
If no spots are configured, loads from all spots.
Yields:
Batches of Document objects
"""
return self.poll_source(None, None)
def poll_source(
self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
) -> GenerateDocumentsOutput:
"""
Poll Highspot for content updated since the start time.
Args:
start: Start time as seconds since Unix epoch
end: End time as seconds since Unix epoch
Yields:
Batches of Document objects
"""
doc_batch: list[Document] = []
# If no spots specified, get all spots
spot_names_to_process = self.spot_names
if not spot_names_to_process:
spot_names_to_process = self._get_all_spot_names()
logger.info(
f"No spots specified, using all {len(spot_names_to_process)} available spots"
)
for spot_name in spot_names_to_process:
try:
spot_id = self._get_spot_id_from_name(spot_name)
if spot_id is None:
logger.warning(f"Spot ID not found for spot {spot_name}")
continue
offset = 0
has_more = True
while has_more:
logger.info(
f"Retrieving items from spot {spot_name}, offset {offset}"
)
response = self.client.get_spot_items(
spot_id=spot_id, offset=offset, page_size=self.batch_size
)
items = response.get("collection", [])
logger.info(f"Received Items: {items}")
if not items:
has_more = False
continue
for item in items:
try:
item_id = item.get("id")
if not item_id:
logger.warning("Item without ID found, skipping")
continue
item_details = self.client.get_item(item_id)
if not item_details:
logger.warning(
f"Item {item_id} details not found, skipping"
)
continue
# Apply time filter if specified
if start or end:
updated_at = item_details.get("date_updated")
if updated_at:
# Convert to datetime for comparison
try:
updated_time = datetime.fromisoformat(
updated_at.replace("Z", "+00:00")
)
if (
start and updated_time.timestamp() < start
) or (end and updated_time.timestamp() > end):
continue
except (ValueError, TypeError):
# Skip if date cannot be parsed
logger.warning(
f"Invalid date format for item {item_id}: {updated_at}"
)
continue
content = self._get_item_content(item_details)
title = item_details.get("title", "")
doc_batch.append(
Document(
id=f"HIGHSPOT_{item_id}",
sections=[
TextSection(
link=item_details.get(
"url",
f"https://www.highspot.com/items/{item_id}",
),
text=content,
)
],
source=DocumentSource.HIGHSPOT,
semantic_identifier=title,
metadata={
"spot_name": spot_name,
"type": item_details.get("content_type", ""),
"created_at": item_details.get(
"date_added", ""
),
"author": item_details.get("author", ""),
"language": item_details.get("language", ""),
"can_download": str(
item_details.get("can_download", False)
),
},
doc_updated_at=item_details.get("date_updated"),
)
)
if len(doc_batch) >= self.batch_size:
yield doc_batch
doc_batch = []
except HighspotClientError as e:
item_id = "ID" if not item_id else item_id
logger.error(f"Error retrieving item {item_id}: {str(e)}")
has_more = len(items) >= self.batch_size
offset += self.batch_size
except (HighspotClientError, ValueError) as e:
logger.error(f"Error processing spot {spot_name}: {str(e)}")
if doc_batch:
yield doc_batch
def _get_item_content(self, item_details: Dict[str, Any]) -> str:
"""
Get the text content of an item.
Args:
item_details: Item details from the API
Returns:
Text content of the item
"""
item_id = item_details.get("id", "")
content_name = item_details.get("content_name", "")
is_valid_format = content_name and "." in content_name
file_extension = content_name.split(".")[-1].lower() if is_valid_format else ""
file_extension = "." + file_extension if file_extension else ""
can_download = item_details.get("can_download", False)
content_type = item_details.get("content_type", "")
# Extract title and description once at the beginning
title, description = self._extract_title_and_description(item_details)
default_content = f"{title}\n{description}"
logger.info(f"Processing item {item_id} with extension {file_extension}")
try:
if content_type == "WebLink":
url = item_details.get("url")
if not url:
return default_content
content = scrape_url_content(url, True)
return content if content else default_content
elif (
is_valid_format
and file_extension in VALID_FILE_EXTENSIONS
and can_download
):
# For documents, try to get the text content
if not item_id: # Ensure item_id is defined
return default_content
content_response = self.client.get_item_content(item_id)
# Process and extract text from binary content based on type
if content_response:
text_content = extract_file_text(
BytesIO(content_response), content_name
)
return text_content
return default_content
else:
return default_content
except HighspotClientError as e:
# Use item_id safely in the warning message
error_context = f"item {item_id}" if item_id else "item"
logger.warning(f"Could not retrieve content for {error_context}: {str(e)}")
return ""
def _extract_title_and_description(
self, item_details: Dict[str, Any]
) -> tuple[str, str]:
"""
Extract the title and description from item details.
Args:
item_details: Item details from the API
Returns:
Tuple of title and description
"""
title = item_details.get("title", "")
description = item_details.get("description", "")
return title, description
def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
"""
Retrieve all document IDs from the configured spots.
If no spots are configured, retrieves from all spots.
Args:
start: Optional start time filter
end: Optional end time filter
callback: Optional indexing heartbeat callback
Yields:
Batches of SlimDocument objects
"""
slim_doc_batch: list[SlimDocument] = []
# If no spots specified, get all spots
spot_names_to_process = self.spot_names
if not spot_names_to_process:
spot_names_to_process = self._get_all_spot_names()
logger.info(
f"No spots specified, using all {len(spot_names_to_process)} available spots for slim documents"
)
for spot_name in spot_names_to_process:
try:
spot_id = self._get_spot_id_from_name(spot_name)
offset = 0
has_more = True
while has_more:
logger.info(
f"Retrieving slim documents from spot {spot_name}, offset {offset}"
)
response = self.client.get_spot_items(
spot_id=spot_id, offset=offset, page_size=self.batch_size
)
items = response.get("collection", [])
if not items:
has_more = False
continue
for item in items:
item_id = item.get("id")
if not item_id:
continue
slim_doc_batch.append(SlimDocument(id=f"HIGHSPOT_{item_id}"))
if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
yield slim_doc_batch
slim_doc_batch = []
has_more = len(items) >= self.batch_size
offset += self.batch_size
except (HighspotClientError, ValueError) as e:
logger.error(
f"Error retrieving slim documents from spot {spot_name}: {str(e)}"
)
if slim_doc_batch:
yield slim_doc_batch
def validate_credentials(self) -> bool:
"""
Validate that the provided credentials can access the Highspot API.
Returns:
True if credentials are valid, False otherwise
"""
try:
return self.client.health_check()
except Exception as e:
logger.error(f"Failed to validate credentials: {str(e)}")
return False
if __name__ == "__main__":
spot_names: List[str] = []
connector = HighspotConnector(spot_names)
credentials = {"highspot_key": "", "highspot_secret": ""}
connector.load_credentials(credentials=credentials)
for doc in connector.load_from_state():
print(doc)

View File

@ -0,0 +1,122 @@
from typing import Optional
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from playwright.sync_api import sync_playwright
from onyx.file_processing.html_utils import web_html_cleanup
from onyx.utils.logger import setup_logger
logger = setup_logger()
# Constants
WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS = 20
JAVASCRIPT_DISABLED_MESSAGE = "You have JavaScript disabled in your browser"
DEFAULT_TIMEOUT = 60000 # 60 seconds
def scrape_url_content(
url: str, scroll_before_scraping: bool = False, timeout_ms: int = DEFAULT_TIMEOUT
) -> Optional[str]:
"""
Scrapes content from a given URL and returns the cleaned text.
Args:
url: The URL to scrape
scroll_before_scraping: Whether to scroll through the page to load lazy content
timeout_ms: Timeout in milliseconds for page navigation and loading
Returns:
The cleaned text content of the page or None if scraping fails
"""
playwright = None
browser = None
try:
validate_url(url)
playwright = sync_playwright().start()
browser = playwright.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
logger.info(f"Navigating to URL: {url}")
try:
page.goto(url, timeout=timeout_ms)
except Exception as e:
logger.error(f"Failed to navigate to {url}: {str(e)}")
return None
if scroll_before_scraping:
logger.debug("Scrolling page to load lazy content")
scroll_attempts = 0
previous_height = page.evaluate("document.body.scrollHeight")
while scroll_attempts < WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS:
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
try:
page.wait_for_load_state("networkidle", timeout=timeout_ms)
except Exception as e:
logger.warning(f"Network idle wait timed out: {str(e)}")
break
new_height = page.evaluate("document.body.scrollHeight")
if new_height == previous_height:
break
previous_height = new_height
scroll_attempts += 1
content = page.content()
soup = BeautifulSoup(content, "html.parser")
parsed_html = web_html_cleanup(soup)
if JAVASCRIPT_DISABLED_MESSAGE in parsed_html.cleaned_text:
logger.debug("JavaScript disabled message detected, checking iframes")
try:
iframe_count = page.frame_locator("iframe").locator("html").count()
if iframe_count > 0:
iframe_texts = (
page.frame_locator("iframe").locator("html").all_inner_texts()
)
iframe_content = "\n".join(iframe_texts)
if len(parsed_html.cleaned_text) < 700:
parsed_html.cleaned_text = iframe_content
else:
parsed_html.cleaned_text += "\n" + iframe_content
except Exception as e:
logger.warning(f"Error processing iframes: {str(e)}")
return parsed_html.cleaned_text
except Exception as e:
logger.error(f"Error scraping URL {url}: {str(e)}")
return None
finally:
if browser:
try:
browser.close()
except Exception as e:
logger.debug(f"Error closing browser: {str(e)}")
if playwright:
try:
playwright.stop()
except Exception as e:
logger.debug(f"Error stopping playwright: {str(e)}")
def validate_url(url: str) -> None:
"""
Validates that a URL is properly formatted.
Args:
url: The URL to validate
Raises:
ValueError: If URL is not valid
"""
parse = urlparse(url)
if parse.scheme != "http" and parse.scheme != "https":
raise ValueError("URL must be of scheme https?://")
if not parse.hostname:
raise ValueError("URL must include a hostname")

View File

@ -0,0 +1,98 @@
import json
import os
import time
from pathlib import Path
import pytest
from onyx.configs.constants import DocumentSource
from onyx.connectors.highspot.connector import HighspotConnector
from onyx.connectors.models import Document
def load_test_data(file_name: str = "test_highspot_data.json") -> dict:
"""Load test data from JSON file."""
current_dir = Path(__file__).parent
with open(current_dir / file_name, "r") as f:
return json.load(f)
@pytest.fixture
def highspot_connector() -> HighspotConnector:
"""Create a Highspot connector with credentials from environment variables."""
# Check if required environment variables are set
if not os.environ.get("HIGHSPOT_KEY") or not os.environ.get("HIGHSPOT_SECRET"):
pytest.fail("HIGHSPOT_KEY or HIGHSPOT_SECRET environment variables not set")
connector = HighspotConnector(
spot_names=["Test content"], # Use specific spot name instead of empty list
batch_size=10, # Smaller batch size for testing
)
connector.load_credentials(
{
"highspot_key": os.environ["HIGHSPOT_KEY"],
"highspot_secret": os.environ["HIGHSPOT_SECRET"],
"highspot_url": os.environ.get(
"HIGHSPOT_URL", "https://api-su2.highspot.com/v1.0/"
),
}
)
return connector
def test_highspot_connector_basic(highspot_connector: HighspotConnector) -> None:
"""Test basic functionality of the Highspot connector."""
all_docs: list[Document] = []
test_data = load_test_data()
target_test_doc_id = test_data.get("target_doc_id")
target_test_doc: Document | None = None
# Test loading documents
for doc_batch in highspot_connector.poll_source(0, time.time()):
for doc in doc_batch:
all_docs.append(doc)
if doc.id == f"HIGHSPOT_{target_test_doc_id}":
target_test_doc = doc
# Verify documents were loaded
assert len(all_docs) > 0
# If we have a specific test document ID, validate it
if target_test_doc_id and target_test_doc is not None:
assert target_test_doc.semantic_identifier == test_data.get(
"semantic_identifier"
)
assert target_test_doc.source == DocumentSource.HIGHSPOT
assert target_test_doc.metadata is not None
assert len(target_test_doc.sections) == 1
section = target_test_doc.sections[0]
assert section.link is not None
# Only check if content exists, as exact content might change
assert section.text is not None
assert len(section.text) > 0
def test_highspot_connector_slim(highspot_connector: HighspotConnector) -> None:
"""Test slim document retrieval."""
# Get all doc IDs from the full connector
all_full_doc_ids = set()
for doc_batch in highspot_connector.load_from_state():
all_full_doc_ids.update([doc.id for doc in doc_batch])
# Get all doc IDs from the slim connector
all_slim_doc_ids = set()
for slim_doc_batch in highspot_connector.retrieve_all_slim_documents():
all_slim_doc_ids.update([doc.id for doc in slim_doc_batch])
# The set of full doc IDs should be a subset of the slim doc IDs
assert all_full_doc_ids.issubset(all_slim_doc_ids)
# Make sure we actually got some documents
assert len(all_slim_doc_ids) > 0
def test_highspot_connector_validate_credentials(
highspot_connector: HighspotConnector,
) -> None:
"""Test credential validation."""
assert highspot_connector.validate_credentials() is True

View File

@ -0,0 +1,5 @@
{
"target_doc_id": "67cd8eb35d3ee0487de2e704",
"semantic_identifier": "Highspot in Action _ Salesforce Integration",
"link": "https://www.highspot.com/items/67cd8eb35d3ee0487de2e704"
}

BIN
web/public/Highspot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -89,6 +89,7 @@ import cohereIcon from "../../../public/Cohere.svg";
import voyageIcon from "../../../public/Voyage.png";
import googleIcon from "../../../public/Google.webp";
import xenforoIcon from "../../../public/Xenforo.svg";
import highspotIcon from "../../../public/Highspot.png";
import { FaGithub, FaRobot } from "react-icons/fa";
import { cn } from "@/lib/utils";
@ -2912,6 +2913,13 @@ export const GitbookIcon = ({
</div>
);
export const HighspotIcon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => {
return <LogoIcon size={size} className={className} src={highspotIcon} />;
};
export const PinnedIcon = ({
size = 16,
className = defaultTailwindCSS,

View File

@ -1249,6 +1249,47 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
],
overrideDefaultFreq: 60 * 60 * 24,
},
highspot: {
description: "Configure Highspot connector",
values: [
{
type: "tab",
name: "highspot_scope",
label: "What should we index from Highspot?",
optional: true,
tabs: [
{
value: "spots",
label: "Specific Spots",
fields: [
{
type: "list",
query: "Enter the spot name(s):",
label: "Spot Name(s)",
name: "spot_names",
optional: false,
description: "For multiple spots, enter your spot one by one.",
},
],
},
{
value: "everything",
label: "Everything",
fields: [
{
type: "string_tab",
label: "Everything",
name: "everything",
description:
"This connector will index all spots the provided credentials have access to!",
},
],
},
],
},
],
advanced_values: [],
},
};
export function createConnectorInitialValues(
connector: ConfigurableSources

View File

@ -226,6 +226,12 @@ export interface AirtableCredentialJson {
airtable_access_token: string;
}
export interface HighspotCredentialJson {
highspot_url: string;
highspot_key: string;
highspot_secret: string;
}
export const credentialTemplates: Record<ValidSources, any> = {
github: { github_access_token: "" } as GithubCredentialJson,
gitlab: {
@ -353,6 +359,11 @@ export const credentialTemplates: Record<ValidSources, any> = {
gitbook: {
gitbook_api_key: "",
} as GitbookCredentialJson,
highspot: {
highspot_url: "",
highspot_key: "",
highspot_secret: "",
} as HighspotCredentialJson,
};
export const credentialDisplayNames: Record<string, string> = {
@ -488,6 +499,11 @@ export const credentialDisplayNames: Record<string, string> = {
// GitBook
gitbook_space_id: "GitBook Space ID",
gitbook_api_key: "GitBook API Key",
//Highspot
highspot_url: "Highspot URL",
highspot_key: "Highspot Key",
highspot_secret: "Highspot Secret",
};
export function getDisplayNameForCredentialKey(key: string): string {

View File

@ -44,6 +44,7 @@ import {
GlobeIcon2,
FileIcon2,
GitbookIcon,
HighspotIcon,
} from "@/components/icons/icons";
import { ValidSources } from "./types";
import {
@ -329,6 +330,12 @@ export const SOURCE_METADATA_MAP: SourceMap = {
category: SourceCategory.Wiki,
docs: "https://docs.onyx.app/connectors/gitbook",
},
highspot: {
icon: HighspotIcon,
displayName: "Highspot",
category: SourceCategory.Wiki,
docs: "https://docs.onyx.app/connectors/highspot",
},
// currently used for the Internet Search tool docs, which is why
// a globe is used
not_applicable: {

View File

@ -390,6 +390,7 @@ export enum ValidSources {
Egnyte = "egnyte",
Airtable = "airtable",
Gitbook = "gitbook",
Highspot = "highspot",
}
export const validAutoSyncSources = [