|
5 | 5 | from datetime import datetime, timezone |
6 | 6 | from typing import List, Optional, Any, Dict, Tuple |
7 | 7 |
|
| 8 | +import pandas as pd |
8 | 9 | from flask import Flask |
9 | 10 |
|
10 | 11 | from investing_algorithm_framework.app.algorithm import Algorithm |
|
16 | 17 | SQLALCHEMY_DATABASE_URI, OperationalException, StateHandler, \ |
17 | 18 | BACKTESTING_START_DATE, BACKTESTING_END_DATE, APP_MODE, MarketCredential, \ |
18 | 19 | AppMode, BacktestDateRange, DATABASE_DIRECTORY_NAME, DataSource, \ |
19 | | - BACKTESTING_INITIAL_AMOUNT, SNAPSHOT_INTERVAL, Backtest, \ |
| 20 | + BACKTESTING_INITIAL_AMOUNT, SNAPSHOT_INTERVAL, Backtest, DataError, \ |
20 | 21 | PortfolioConfiguration, SnapshotInterval, DataType, combine_backtests, \ |
21 | 22 | PortfolioProvider, OrderExecutor, ImproperlyConfigured, \ |
22 | 23 | DataProvider, INDEX_DATETIME, tqdm, BacktestPermutationTest, \ |
|
32 | 33 | from .eventloop import EventLoopService |
33 | 34 | from .analysis import create_ohlcv_permutation |
34 | 35 |
|
| 36 | + |
35 | 37 | logger = logging.getLogger("investing_algorithm_framework") |
36 | 38 | COLOR_RESET = '\033[0m' |
37 | 39 | COLOR_GREEN = '\033[92m' |
@@ -789,6 +791,69 @@ def get_market_credentials(self) -> List[MarketCredential]: |
789 | 791 | .market_credential_service() |
790 | 792 | return market_credential_service.get_all() |
791 | 793 |
|
| 794 | + def check_data_completeness( |
| 795 | + self, |
| 796 | + strategies: List[TradingStrategy], |
| 797 | + backtest_date_range: BacktestDateRange |
| 798 | + ) -> None: |
| 799 | + """ |
| 800 | + Function to check the data completeness for a set of strategies |
| 801 | + over a given backtest date range. This method checks if all data |
| 802 | + sources required by the strategies have complete data for the |
| 803 | + specified date range. |
| 804 | +
|
| 805 | + Args: |
| 806 | + strategies (List[TradingStrategy]): List of strategy objects |
| 807 | + to check data completeness for. |
| 808 | + backtest_date_range (BacktestDateRange): The date range to |
| 809 | + check data completeness for. |
| 810 | + Returns: |
| 811 | + None |
| 812 | + """ |
| 813 | + data_sources = [] |
| 814 | + |
| 815 | + for strategy in strategies: |
| 816 | + data_sources.extend(strategy.data_sources) |
| 817 | + |
| 818 | + self.initialize_data_sources_backtest( |
| 819 | + data_sources, |
| 820 | + backtest_date_range, |
| 821 | + show_progress=True |
| 822 | + ) |
| 823 | + data_provider_service = self.container.data_provider_service() |
| 824 | + |
| 825 | + for strategy in strategies: |
| 826 | + |
| 827 | + for data_source in strategy.data_sources: |
| 828 | + |
| 829 | + if DataType.OHLCV.equals(data_source.data_type): |
| 830 | + df = data_provider_service.get_ohlcv_data( |
| 831 | + symbol=data_source.symbol, |
| 832 | + start_date=backtest_date_range.start_date, |
| 833 | + end_date=backtest_date_range.end_date, |
| 834 | + pandas=True |
| 835 | + ) |
| 836 | + df = df.copy() |
| 837 | + df['Datetime'] = pd.to_datetime(df['Datetime']) |
| 838 | + df = df.sort_values('Datetime').tail(data_source.window_size) |
| 839 | + start = df['Datetime'].iloc[0] |
| 840 | + end = df['Datetime'].iloc[-1] |
| 841 | + freq = pd.to_timedelta(data_source.time_frame.value) |
| 842 | + expected = pd.date_range(start, end, freq=freq) |
| 843 | + actual = df['Datetime'] |
| 844 | + missing = expected.difference(actual) |
| 845 | + |
| 846 | + # Calculate the percentage completeness |
| 847 | + completeness = len(actual) / len(expected) * 100 |
| 848 | + |
| 849 | + if completeness < 100: |
| 850 | + raise DataError( |
| 851 | + f"Data completeness for data source " |
| 852 | + f"{data_source.data_provider_identifier} " |
| 853 | + f"({data_source.symbol}) is {completeness:.2f}% " |
| 854 | + f"complete. Missing data points: {len(missing)}" |
| 855 | + ) |
| 856 | + |
792 | 857 | def run_vector_backtests( |
793 | 858 | self, |
794 | 859 | initial_amount, |
|
0 commit comments