|
43 | 43 | _DatasetT = TypeVar('_DatasetT') |
44 | 44 |
|
45 | 45 | DATA_KEY = 'data' |
| 46 | +ASYNC_LOAD_SLEEP_TIME = 0.05 |
46 | 47 |
|
47 | 48 | # def orjson_dumps(v, *, default): |
48 | 49 | # # orjson.dumps returns bytes, to match standard json.dumps we need to decode |
@@ -588,19 +589,22 @@ def _load_http_urls(self, http_url_dataset: 'HttpUrlDataset') -> list[asyncio.Ta |
588 | 589 |
|
589 | 590 | async def load_all(): |
590 | 591 | tasks = [] |
591 | | - client_sessions = {} |
| 592 | + |
592 | 593 | for host in hosts: |
593 | | - client_sessions[host] = RateLimitingClientSession( |
594 | | - self.config.http_config_for_host[host].requests_per_time_period, |
595 | | - self.config.http_config_for_host[host].time_period_in_secs) |
596 | | - |
597 | | - for host, indices in hosts.items(): |
598 | | - task = ( |
599 | | - get_json_from_api_endpoint.refine(output_dataset_param='output_dataset').run( |
600 | | - http_url_dataset[indices], |
601 | | - client_session=client_sessions[host], |
602 | | - output_dataset=self)) |
603 | | - tasks.append(task) |
| 594 | + async with RateLimitingClientSession( |
| 595 | + self.config.http_config_for_host[host].requests_per_time_period, |
| 596 | + self.config.http_config_for_host[host].time_period_in_secs |
| 597 | + ) as client_session: |
| 598 | + indices = hosts[host] |
| 599 | + task = ( |
| 600 | + get_json_from_api_endpoint.refine( |
| 601 | + output_dataset_param='output_dataset').run( |
| 602 | + http_url_dataset[indices], |
| 603 | + client_session=client_session, |
| 604 | + output_dataset=self)) |
| 605 | + tasks.append(task) |
| 606 | + while not task.done(): |
| 607 | + await asyncio.sleep(ASYNC_LOAD_SLEEP_TIME) |
604 | 608 |
|
605 | 609 | await asyncio.gather(*tasks) |
606 | 610 | return self |
|
0 commit comments