Skip to content

Commit a8f0632

Browse files
committed
Add data completeness check
1 parent 11f1111 commit a8f0632

File tree

4 files changed

+89
-5
lines changed

4 files changed

+89
-5
lines changed

investing_algorithm_framework/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
get_ohlcv_data_completeness_chart, get_equity_curve_chart
1111
from .domain import ApiException, combine_backtests, PositionSize, \
1212
OrderType, OperationalException, OrderStatus, OrderSide, \
13-
TimeUnit, TimeInterval, Order, Portfolio, Backtest, \
13+
TimeUnit, TimeInterval, Order, Portfolio, Backtest, DataError, \
1414
Position, TimeFrame, INDEX_DATETIME, MarketCredential, \
1515
PortfolioConfiguration, RESOURCE_DIRECTORY, AWS_LAMBDA_LOGGING_CONFIG, \
1616
Trade, SYMBOLS, RESERVED_BALANCES, APP_MODE, AppMode, DATETIME_FORMAT, \
@@ -191,5 +191,6 @@
191191
"get_number_of_trades",
192192
"BacktestRun",
193193
"load_backtests_from_directory",
194-
"save_backtests_to_directory"
194+
"save_backtests_to_directory",
195+
"DataError"
195196
]

investing_algorithm_framework/app/app.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from datetime import datetime, timezone
66
from typing import List, Optional, Any, Dict, Tuple
77

8+
import pandas as pd
89
from flask import Flask
910

1011
from investing_algorithm_framework.app.algorithm import Algorithm
@@ -16,7 +17,7 @@
1617
SQLALCHEMY_DATABASE_URI, OperationalException, StateHandler, \
1718
BACKTESTING_START_DATE, BACKTESTING_END_DATE, APP_MODE, MarketCredential, \
1819
AppMode, BacktestDateRange, DATABASE_DIRECTORY_NAME, DataSource, \
19-
BACKTESTING_INITIAL_AMOUNT, SNAPSHOT_INTERVAL, Backtest, \
20+
BACKTESTING_INITIAL_AMOUNT, SNAPSHOT_INTERVAL, Backtest, DataError, \
2021
PortfolioConfiguration, SnapshotInterval, DataType, combine_backtests, \
2122
PortfolioProvider, OrderExecutor, ImproperlyConfigured, \
2223
DataProvider, INDEX_DATETIME, tqdm, BacktestPermutationTest, \
@@ -32,6 +33,7 @@
3233
from .eventloop import EventLoopService
3334
from .analysis import create_ohlcv_permutation
3435

36+
3537
logger = logging.getLogger("investing_algorithm_framework")
3638
COLOR_RESET = '\033[0m'
3739
COLOR_GREEN = '\033[92m'
@@ -789,6 +791,69 @@ def get_market_credentials(self) -> List[MarketCredential]:
789791
.market_credential_service()
790792
return market_credential_service.get_all()
791793

794+
def check_data_completeness(
795+
self,
796+
strategies: List[TradingStrategy],
797+
backtest_date_range: BacktestDateRange
798+
) -> None:
799+
"""
800+
Function to check the data completeness for a set of strategies
801+
over a given backtest date range. This method checks if all data
802+
sources required by the strategies have complete data for the
803+
specified date range.
804+
805+
Args:
806+
strategies (List[TradingStrategy]): List of strategy objects
807+
to check data completeness for.
808+
backtest_date_range (BacktestDateRange): The date range to
809+
check data completeness for.
810+
Returns:
811+
None
812+
"""
813+
data_sources = []
814+
815+
for strategy in strategies:
816+
data_sources.extend(strategy.data_sources)
817+
818+
self.initialize_data_sources_backtest(
819+
data_sources,
820+
backtest_date_range,
821+
show_progress=True
822+
)
823+
data_provider_service = self.container.data_provider_service()
824+
825+
for strategy in strategies:
826+
827+
for data_source in strategy.data_sources:
828+
829+
if DataType.OHLCV.equals(data_source.data_type):
830+
df = data_provider_service.get_ohlcv_data(
831+
symbol=data_source.symbol,
832+
start_date=backtest_date_range.start_date,
833+
end_date=backtest_date_range.end_date,
834+
pandas=True
835+
)
836+
df = df.copy()
837+
df['Datetime'] = pd.to_datetime(df['Datetime'])
838+
df = df.sort_values('Datetime').tail(data_source.window_size)
839+
start = df['Datetime'].iloc[0]
840+
end = df['Datetime'].iloc[-1]
841+
freq = pd.to_timedelta(data_source.time_frame.value)
842+
expected = pd.date_range(start, end, freq=freq)
843+
actual = df['Datetime']
844+
missing = expected.difference(actual)
845+
846+
# Calculate the percentage completeness
847+
completeness = len(actual) / len(expected) * 100
848+
849+
if completeness < 100:
850+
raise DataError(
851+
f"Data completeness for data source "
852+
f"{data_source.data_provider_identifier} "
853+
f"({data_source.symbol}) is {completeness:.2f}% "
854+
f"complete. Missing data points: {len(missing)}"
855+
)
856+
792857
def run_vector_backtests(
793858
self,
794859
initial_amount,

investing_algorithm_framework/domain/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from .data_provider import DataProvider
1616
from .data_structures import PeekableQueue
1717
from .decimal_parsing import parse_decimal_to_string, parse_string_to_decimal
18-
from .exceptions import OperationalException, ApiException, \
18+
from .exceptions import OperationalException, ApiException, DataError, \
1919
PermissionDeniedApiException, ImproperlyConfigured, NetworkError
2020
from .models import OrderStatus, OrderSide, OrderType, TimeInterval, \
2121
TimeUnit, TimeFrame, PortfolioConfiguration, Portfolio, Position, \
@@ -143,5 +143,6 @@
143143
"BacktestEvaluationFocus",
144144
'combine_backtests',
145145
'PositionSize',
146-
'generate_backtest_summary_metrics'
146+
'generate_backtest_summary_metrics',
147+
'DataError'
147148
]

investing_algorithm_framework/domain/exceptions.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,20 @@ def to_response(self):
8484
"status": "error",
8585
"message": self.error_message
8686
}
87+
88+
89+
class DataError(Exception):
90+
"""
91+
Class DataError: Exception class indicating a problem occurred
92+
during data retrieval or processing
93+
"""
94+
95+
def __init__(self, message) -> None:
96+
super(DataError, self).__init__(message)
97+
self.error_message = message
98+
99+
def to_response(self):
100+
return {
101+
"status": "error",
102+
"message": self.error_message
103+
}

0 commit comments

Comments
 (0)