feat: Add trimmed mean filtering for exchange rate outlier detection (#3206)

Co-authored-by: Vlad Stan <stan.v.vlad@gmail.com>
This commit is contained in:
blackcoffeexbt
2025-06-25 11:16:00 +01:00
committed by GitHub
parent efcc5e2148
commit edb7d66efc
2 changed files with 177 additions and 1 deletions

View File

@@ -1,4 +1,5 @@
import asyncio
import statistics
from typing import Optional
import httpx
@@ -187,6 +188,54 @@ def allowed_currencies() -> list[str]:
return list(currencies.keys())
def apply_trimmed_mean_filter(
rates: list[tuple[str, float]], threshold_percentage: float = 0.01
) -> list[tuple[str, float]]:
"""
Apply trimmed mean filtering to remove outliers from exchange rates.
Args:
rates: List of (provider_name, rate_value) tuples
threshold_percentage: Percentage threshold for outlier removal (default 1%)
Returns:
Filtered list of rates with outliers removed
"""
if len(rates) < 3:
# Need at least 3 rates to apply filtering
return rates
rates_values = [r[1] for r in rates]
median_value = statistics.median(rates_values)
# Filter out values that are more than threshold_percentage away from median
filtered_rates = []
for rate in rates:
provider_name, value = rate
deviation = abs(value - median_value) / median_value
if deviation <= threshold_percentage:
logger.debug(
f"Keeping {provider_name}: {value} (deviation: {deviation:.4f})"
)
filtered_rates.append(rate)
else:
logger.debug(
f"Removing outlier {provider_name}: {value} "
f"(deviation: {deviation:.4f})"
)
# If we still have at least 2 rates after filtering, use them
if len(filtered_rates) >= 2:
logger.debug(f"Filtered rates: {filtered_rates}")
return filtered_rates
else:
# Fall back to median if filtering removed too many values
logger.debug("Filtering removed too many values, using median instead")
# Find the rate closest to median
closest_rate = min(rates, key=lambda x: abs(x[1] - median_value))
return [closest_rate]
async def btc_rates(currency: str) -> list[tuple[str, float]]:
if currency.upper() not in allowed_currencies():
raise ValueError(f"Currency '{currency}' not allowed.")
@@ -236,7 +285,9 @@ async def btc_rates(currency: str) -> list[tuple[str, float]]:
]
results = await asyncio.gather(*calls)
return [r for r in results if r is not None]
all_rates = [r for r in results if r is not None]
return apply_trimmed_mean_filter(all_rates)
async def btc_price(currency: str) -> float:

View File

@@ -0,0 +1,125 @@
from lnbits.utils.exchange_rates import (
apply_trimmed_mean_filter,
)
class TestApplyTrimmedMeanFilter:
"""Test the trimmed mean filtering function"""
def test_trimmed_mean_filter_with_outliers(self):
"""Test filtering removes outliers that deviate more than threshold"""
# Mock rates with one outlier (20% deviation)
rates = [
("Binance", 50000.0),
("Coinbase", 51000.0),
("Kraken", 52000.0),
("Outlier", 60000.0), # 20% higher than others
]
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
# Should remove the outliers (binance and outlier)
assert len(result) == 2
assert ("Outlier", 60000.0) not in result
assert ("Binance", 50000.0) not in result
assert ("Coinbase", 51000.0) in result
assert ("Kraken", 52000.0) in result
def test_trimmed_mean_filter_no_outliers(self):
"""Test filtering keeps all rates when none are outliers"""
rates = [
("Binance", 50000.0),
("Coinbase", 50100.0),
("Kraken", 50200.0),
]
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
# Should keep all rates
assert len(result) == 3
assert result == rates
def test_trimmed_mean_filter_insufficient_data(self):
"""Test filtering returns original data when less than 3 rates"""
rates = [
("Binance", 50000.0),
("Coinbase", 51000.0),
]
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
# Should return original rates unchanged
assert result == rates
def test_trimmed_mean_filter_single_rate(self):
"""Test filtering with single rate"""
rates = [("Binance", 50000.0)]
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
# Should return original rate unchanged
assert result == rates
def test_trimmed_mean_filter_empty_list(self):
"""Test filtering with empty list"""
rates = []
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
# Should return empty list
assert result == []
def test_trimmed_mean_filter_too_many_outliers(self):
"""Test fallback to median when filtering removes too many values"""
rates = [
("Provider1", 50000.0),
("Provider2", 60000.0), # 20% higher
("Provider3", 40000.0), # 20% lower
]
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
# Should fall back to rate closest to median (Provider1)
assert len(result) == 1
assert result[0] == ("Provider1", 50000.0)
def test_trimmed_mean_filter_different_thresholds(self):
"""Test filtering with different threshold percentages"""
rates = [
("Binance", 50000.0),
("Coinbase", 51000.0),
("Kraken", 53000.0),
("Outlier", 55000.0),
]
# For the values, the average is 52250
# 1% either side of the average is 51727.50 and 52772.50
# This would result in three rates being removed (Binance, Kraken and Outlier)
result_1pct = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
assert len(result_1pct) == 1
assert ("Binance", 50000.0) not in result_1pct
assert ("Coinbase", 51000.0) in result_1pct
assert ("Kraken", 53000.0) not in result_1pct
assert ("Outlier", 55000.0) not in result_1pct
# With 5% threshold, should keep just three
result_5pct = apply_trimmed_mean_filter(rates, threshold_percentage=0.05)
assert len(result_5pct) == 3
assert ("Binance", 50000.0) in result_5pct
assert ("Coinbase", 51000.0) in result_5pct
assert ("Kraken", 53000.0) in result_5pct
assert ("Outlier", 55000.0) not in result_5pct
def test_trimmed_mean_filter_edge_case_exact_threshold(self):
"""Test filtering with rates exactly at the threshold"""
rates = [
("Binance", 50000.0),
("Coinbase", 50500.0), # Exactly 1% higher
]
result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01)
# Should keep the rate at exactly 1% deviation
assert len(result) == 2
assert result == rates