mirror of
https://github.com/lnbits/lnbits.git
synced 2025-12-04 17:51:07 +01:00
feat: Add trimmed mean filtering for exchange rate outlier detection (#3206)
Co-authored-by: Vlad Stan <stan.v.vlad@gmail.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import statistics
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
@@ -187,6 +188,54 @@ def allowed_currencies() -> list[str]:
|
||||
return list(currencies.keys())
|
||||
|
||||
|
||||
def apply_trimmed_mean_filter(
|
||||
rates: list[tuple[str, float]], threshold_percentage: float = 0.01
|
||||
) -> list[tuple[str, float]]:
|
||||
"""
|
||||
Apply trimmed mean filtering to remove outliers from exchange rates.
|
||||
|
||||
Args:
|
||||
rates: List of (provider_name, rate_value) tuples
|
||||
threshold_percentage: Percentage threshold for outlier removal (default 1%)
|
||||
|
||||
Returns:
|
||||
Filtered list of rates with outliers removed
|
||||
"""
|
||||
if len(rates) < 3:
|
||||
# Need at least 3 rates to apply filtering
|
||||
return rates
|
||||
|
||||
rates_values = [r[1] for r in rates]
|
||||
median_value = statistics.median(rates_values)
|
||||
|
||||
# Filter out values that are more than threshold_percentage away from median
|
||||
filtered_rates = []
|
||||
for rate in rates:
|
||||
provider_name, value = rate
|
||||
deviation = abs(value - median_value) / median_value
|
||||
if deviation <= threshold_percentage:
|
||||
logger.debug(
|
||||
f"Keeping {provider_name}: {value} (deviation: {deviation:.4f})"
|
||||
)
|
||||
filtered_rates.append(rate)
|
||||
else:
|
||||
logger.debug(
|
||||
f"Removing outlier {provider_name}: {value} "
|
||||
f"(deviation: {deviation:.4f})"
|
||||
)
|
||||
|
||||
# If we still have at least 2 rates after filtering, use them
|
||||
if len(filtered_rates) >= 2:
|
||||
logger.debug(f"Filtered rates: {filtered_rates}")
|
||||
return filtered_rates
|
||||
else:
|
||||
# Fall back to median if filtering removed too many values
|
||||
logger.debug("Filtering removed too many values, using median instead")
|
||||
# Find the rate closest to median
|
||||
closest_rate = min(rates, key=lambda x: abs(x[1] - median_value))
|
||||
return [closest_rate]
|
||||
|
||||
|
||||
async def btc_rates(currency: str) -> list[tuple[str, float]]:
|
||||
if currency.upper() not in allowed_currencies():
|
||||
raise ValueError(f"Currency '{currency}' not allowed.")
|
||||
@@ -236,7 +285,9 @@ async def btc_rates(currency: str) -> list[tuple[str, float]]:
|
||||
]
|
||||
results = await asyncio.gather(*calls)
|
||||
|
||||
return [r for r in results if r is not None]
|
||||
all_rates = [r for r in results if r is not None]
|
||||
|
||||
return apply_trimmed_mean_filter(all_rates)
|
||||
|
||||
|
||||
async def btc_price(currency: str) -> float:
|
||||
|
||||
125
tests/unit/test_exchange_rates.py
Normal file
125
tests/unit/test_exchange_rates.py
Normal file
@@ -0,0 +1,125 @@
|
||||
from lnbits.utils.exchange_rates import (
|
||||
apply_trimmed_mean_filter,
|
||||
)
|
||||
|
||||
|
||||
class TestApplyTrimmedMeanFilter:
|
||||
"""Test the trimmed mean filtering function"""
|
||||
|
||||
def test_trimmed_mean_filter_with_outliers(self):
|
||||
"""Test filtering removes outliers that deviate more than threshold"""
|
||||
# Mock rates with one outlier (20% deviation)
|
||||
rates = [
|
||||
("Binance", 50000.0),
|
||||
("Coinbase", 51000.0),
|
||||
("Kraken", 52000.0),
|
||||
("Outlier", 60000.0), # 20% higher than others
|
||||
]
|
||||
|
||||
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
|
||||
|
||||
# Should remove the outliers (binance and outlier)
|
||||
assert len(result) == 2
|
||||
assert ("Outlier", 60000.0) not in result
|
||||
assert ("Binance", 50000.0) not in result
|
||||
assert ("Coinbase", 51000.0) in result
|
||||
assert ("Kraken", 52000.0) in result
|
||||
|
||||
def test_trimmed_mean_filter_no_outliers(self):
|
||||
"""Test filtering keeps all rates when none are outliers"""
|
||||
rates = [
|
||||
("Binance", 50000.0),
|
||||
("Coinbase", 50100.0),
|
||||
("Kraken", 50200.0),
|
||||
]
|
||||
|
||||
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
|
||||
|
||||
# Should keep all rates
|
||||
assert len(result) == 3
|
||||
assert result == rates
|
||||
|
||||
def test_trimmed_mean_filter_insufficient_data(self):
|
||||
"""Test filtering returns original data when less than 3 rates"""
|
||||
rates = [
|
||||
("Binance", 50000.0),
|
||||
("Coinbase", 51000.0),
|
||||
]
|
||||
|
||||
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
|
||||
|
||||
# Should return original rates unchanged
|
||||
assert result == rates
|
||||
|
||||
def test_trimmed_mean_filter_single_rate(self):
|
||||
"""Test filtering with single rate"""
|
||||
rates = [("Binance", 50000.0)]
|
||||
|
||||
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
|
||||
|
||||
# Should return original rate unchanged
|
||||
assert result == rates
|
||||
|
||||
def test_trimmed_mean_filter_empty_list(self):
|
||||
"""Test filtering with empty list"""
|
||||
rates = []
|
||||
|
||||
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
|
||||
|
||||
# Should return empty list
|
||||
assert result == []
|
||||
|
||||
def test_trimmed_mean_filter_too_many_outliers(self):
|
||||
"""Test fallback to median when filtering removes too many values"""
|
||||
rates = [
|
||||
("Provider1", 50000.0),
|
||||
("Provider2", 60000.0), # 20% higher
|
||||
("Provider3", 40000.0), # 20% lower
|
||||
]
|
||||
|
||||
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
|
||||
|
||||
# Should fall back to rate closest to median (Provider1)
|
||||
assert len(result) == 1
|
||||
assert result[0] == ("Provider1", 50000.0)
|
||||
|
||||
def test_trimmed_mean_filter_different_thresholds(self):
|
||||
"""Test filtering with different threshold percentages"""
|
||||
rates = [
|
||||
("Binance", 50000.0),
|
||||
("Coinbase", 51000.0),
|
||||
("Kraken", 53000.0),
|
||||
("Outlier", 55000.0),
|
||||
]
|
||||
|
||||
# For the values, the average is 52250
|
||||
# 1% either side of the average is 51727.50 and 52772.50
|
||||
# This would result in three rates being removed (Binance, Kraken and Outlier)
|
||||
result_1pct = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
|
||||
|
||||
assert len(result_1pct) == 1
|
||||
assert ("Binance", 50000.0) not in result_1pct
|
||||
assert ("Coinbase", 51000.0) in result_1pct
|
||||
assert ("Kraken", 53000.0) not in result_1pct
|
||||
assert ("Outlier", 55000.0) not in result_1pct
|
||||
|
||||
# With 5% threshold, should keep just three
|
||||
result_5pct = apply_trimmed_mean_filter(rates, threshold_percentage=0.05)
|
||||
assert len(result_5pct) == 3
|
||||
assert ("Binance", 50000.0) in result_5pct
|
||||
assert ("Coinbase", 51000.0) in result_5pct
|
||||
assert ("Kraken", 53000.0) in result_5pct
|
||||
assert ("Outlier", 55000.0) not in result_5pct
|
||||
|
||||
def test_trimmed_mean_filter_edge_case_exact_threshold(self):
|
||||
"""Test filtering with rates exactly at the threshold"""
|
||||
rates = [
|
||||
("Binance", 50000.0),
|
||||
("Coinbase", 50500.0), # Exactly 1% higher
|
||||
]
|
||||
|
||||
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
|
||||
|
||||
# Should keep the rate at exactly 1% deviation
|
||||
assert len(result) == 2
|
||||
assert result == rates
|
||||
Reference in New Issue
Block a user