mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-12 14:12:53 +02:00
Reworked chunking to support mega chunks (#2032)
This commit is contained in:
@ -17,14 +17,17 @@ This Python script automates the process of running search quality tests for a b
|
||||
1. Ensure you have the required dependencies installed.
|
||||
2. Configure the `search_test_config.yaml` file based on the `search_test_config.yaml.template` file.
|
||||
3. Configure the `.env_eval` file in `deployment/docker_compose` with the correct environment variables.
|
||||
4. Navigate to Danswer repo:
|
||||
4. Set up the PYTHONPATH permanently:
|
||||
Add the following line to your shell configuration file (e.g., `~/.bashrc`, `~/.zshrc`, or `~/.bash_profile`):
|
||||
```
|
||||
export PYTHONPATH=$PYTHONPATH:/path/to/danswer/backend
|
||||
```
|
||||
Replace `/path/to/danswer` with the actual path to your Danswer repository.
|
||||
After adding this line, restart your terminal or run `source ~/.bashrc` (or the appropriate config file) to apply the changes.
|
||||
5. Navigate to Danswer repo:
|
||||
```
|
||||
cd path/to/danswer
|
||||
```
|
||||
5. Set Python Path variable:
|
||||
```
|
||||
export PYTHONPATH=$PYTHONPATH:$PWD/backend
|
||||
```
|
||||
6. Navigate to the answer_quality folder:
|
||||
```
|
||||
cd backend/tests/regression/answer_quality
|
||||
|
@ -66,7 +66,6 @@ def get_answer_from_query(
|
||||
except Exception as e:
|
||||
print("Failed to answer the questions:")
|
||||
print(f"\t {str(e)}")
|
||||
print("Try restarting vespa container and trying agian")
|
||||
raise e
|
||||
|
||||
return context_data_list, answer
|
||||
|
@ -52,6 +52,7 @@ def upload_test_files(zip_file_path: str, env_name: str) -> None:
|
||||
|
||||
|
||||
def manage_file_upload(zip_file_path: str, env_name: str) -> None:
|
||||
start_time = time.time()
|
||||
unzipped_file_paths = unzip_and_get_file_paths(zip_file_path)
|
||||
total_file_count = len(unzipped_file_paths)
|
||||
problem_file_list: list[str] = []
|
||||
@ -84,15 +85,17 @@ def manage_file_upload(zip_file_path: str, env_name: str) -> None:
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
problem_file_csv_path = os.path.join(current_dir, "problem_files.csv")
|
||||
with open(problem_file_csv_path, "w", newline="") as csvfile:
|
||||
csvwriter = csv.writer(csvfile)
|
||||
csvwriter.writerow(["Problematic File Paths"])
|
||||
for problem_file in problem_file_list:
|
||||
csvwriter.writerow([problem_file])
|
||||
if problem_file_list:
|
||||
problem_file_csv_path = os.path.join(current_dir, "problem_files.csv")
|
||||
with open(problem_file_csv_path, "w", newline="") as csvfile:
|
||||
csvwriter = csv.writer(csvfile)
|
||||
csvwriter.writerow(["Problematic File Paths"])
|
||||
for problem_file in problem_file_list:
|
||||
csvwriter.writerow([problem_file])
|
||||
|
||||
for file in unzipped_file_paths:
|
||||
os.unlink(file)
|
||||
print(f"Total time taken: {(time.time() - start_time)/60} minutes")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Reference in New Issue
Block a user