From edb7d66efc3614b3e55711cf574f567c91d91c09 Mon Sep 17 00:00:00 2001 From: blackcoffeexbt <87530449+blackcoffeexbt@users.noreply.github.com> Date: Wed, 25 Jun 2025 11:16:00 +0100 Subject: [PATCH] feat: Add trimmed mean filtering for exchange rate outlier detection (#3206) Co-authored-by: Vlad Stan --- lnbits/utils/exchange_rates.py | 53 ++++++++++++- tests/unit/test_exchange_rates.py | 125 ++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_exchange_rates.py diff --git a/lnbits/utils/exchange_rates.py b/lnbits/utils/exchange_rates.py index 0d14488c1..2c2023d08 100644 --- a/lnbits/utils/exchange_rates.py +++ b/lnbits/utils/exchange_rates.py @@ -1,4 +1,5 @@ import asyncio +import statistics from typing import Optional import httpx @@ -187,6 +188,54 @@ def allowed_currencies() -> list[str]: return list(currencies.keys()) +def apply_trimmed_mean_filter( + rates: list[tuple[str, float]], threshold_percentage: float = 0.01 +) -> list[tuple[str, float]]: + """ + Apply trimmed mean filtering to remove outliers from exchange rates. + + Args: + rates: List of (provider_name, rate_value) tuples + threshold_percentage: Percentage threshold for outlier removal (default 1%) + + Returns: + Filtered list of rates with outliers removed + """ + if len(rates) < 3: + # Need at least 3 rates to apply filtering + return rates + + rates_values = [r[1] for r in rates] + median_value = statistics.median(rates_values) + + # Filter out values that are more than threshold_percentage away from median + filtered_rates = [] + for rate in rates: + provider_name, value = rate + deviation = abs(value - median_value) / median_value + if deviation <= threshold_percentage: + logger.debug( + f"Keeping {provider_name}: {value} (deviation: {deviation:.4f})" + ) + filtered_rates.append(rate) + else: + logger.debug( + f"Removing outlier {provider_name}: {value} " + f"(deviation: {deviation:.4f})" + ) + + # If we still have at least 2 rates after filtering, use them + if len(filtered_rates) >= 2: + logger.debug(f"Filtered rates: {filtered_rates}") + return filtered_rates + else: + # Fall back to median if filtering removed too many values + logger.debug("Filtering removed too many values, using median instead") + # Find the rate closest to median + closest_rate = min(rates, key=lambda x: abs(x[1] - median_value)) + return [closest_rate] + + async def btc_rates(currency: str) -> list[tuple[str, float]]: if currency.upper() not in allowed_currencies(): raise ValueError(f"Currency '{currency}' not allowed.") @@ -236,7 +285,9 @@ async def btc_rates(currency: str) -> list[tuple[str, float]]: ] results = await asyncio.gather(*calls) - return [r for r in results if r is not None] + all_rates = [r for r in results if r is not None] + + return apply_trimmed_mean_filter(all_rates) async def btc_price(currency: str) -> float: diff --git a/tests/unit/test_exchange_rates.py b/tests/unit/test_exchange_rates.py new file mode 100644 index 000000000..9b3263b49 --- /dev/null +++ b/tests/unit/test_exchange_rates.py @@ -0,0 +1,125 @@ +from lnbits.utils.exchange_rates import ( + apply_trimmed_mean_filter, +) + + +class TestApplyTrimmedMeanFilter: + """Test the trimmed mean filtering function""" + + def test_trimmed_mean_filter_with_outliers(self): + """Test filtering removes outliers that deviate more than threshold""" + # Mock rates with one outlier (20% deviation) + rates = [ + ("Binance", 50000.0), + ("Coinbase", 51000.0), + ("Kraken", 52000.0), + ("Outlier", 60000.0), # 20% higher than others + ] + + result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01) + + # Should remove the outliers (binance and outlier) + assert len(result) == 2 + assert ("Outlier", 60000.0) not in result + assert ("Binance", 50000.0) not in result + assert ("Coinbase", 51000.0) in result + assert ("Kraken", 52000.0) in result + + def test_trimmed_mean_filter_no_outliers(self): + """Test filtering keeps all rates when none are outliers""" + rates = [ + ("Binance", 50000.0), + ("Coinbase", 50100.0), + ("Kraken", 50200.0), + ] + + result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01) + + # Should keep all rates + assert len(result) == 3 + assert result == rates + + def test_trimmed_mean_filter_insufficient_data(self): + """Test filtering returns original data when less than 3 rates""" + rates = [ + ("Binance", 50000.0), + ("Coinbase", 51000.0), + ] + + result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01) + + # Should return original rates unchanged + assert result == rates + + def test_trimmed_mean_filter_single_rate(self): + """Test filtering with single rate""" + rates = [("Binance", 50000.0)] + + result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01) + + # Should return original rate unchanged + assert result == rates + + def test_trimmed_mean_filter_empty_list(self): + """Test filtering with empty list""" + rates = [] + + result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01) + + # Should return empty list + assert result == [] + + def test_trimmed_mean_filter_too_many_outliers(self): + """Test fallback to median when filtering removes too many values""" + rates = [ + ("Provider1", 50000.0), + ("Provider2", 60000.0), # 20% higher + ("Provider3", 40000.0), # 20% lower + ] + + result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01) + + # Should fall back to rate closest to median (Provider1) + assert len(result) == 1 + assert result[0] == ("Provider1", 50000.0) + + def test_trimmed_mean_filter_different_thresholds(self): + """Test filtering with different threshold percentages""" + rates = [ + ("Binance", 50000.0), + ("Coinbase", 51000.0), + ("Kraken", 53000.0), + ("Outlier", 55000.0), + ] + + # For the values, the average is 52250 + # 1% either side of the average is 51727.50 and 52772.50 + # This would result in three rates being removed (Binance, Kraken and Outlier) + result_1pct = apply_trimmed_mean_filter(rates, threshold_percentage=0.01) + + assert len(result_1pct) == 1 + assert ("Binance", 50000.0) not in result_1pct + assert ("Coinbase", 51000.0) in result_1pct + assert ("Kraken", 53000.0) not in result_1pct + assert ("Outlier", 55000.0) not in result_1pct + + # With 5% threshold, should keep just three + result_5pct = apply_trimmed_mean_filter(rates, threshold_percentage=0.05) + assert len(result_5pct) == 3 + assert ("Binance", 50000.0) in result_5pct + assert ("Coinbase", 51000.0) in result_5pct + assert ("Kraken", 53000.0) in result_5pct + assert ("Outlier", 55000.0) not in result_5pct + + def test_trimmed_mean_filter_edge_case_exact_threshold(self): + """Test filtering with rates exactly at the threshold""" + rates = [ + ("Binance", 50000.0), + ("Coinbase", 50500.0), # Exactly 1% higher + ] + + result = apply_trimmed_mean_filter(rates, threshold_percentage=0.01) + + # Should keep the rate at exactly 1% deviation + assert len(result) == 2 + assert result == rates