Skip to content

Commit 537a6fc

Browse files
Merge pull request #1930 from kili-technology/feature/lab-3811-aakd-i-want-to-know-the-number-of-annotations-per-asset
feat(LAB-3811): add count asset annotations for batching in list asse…
2 parents 22c3408 + a04d6fc commit 537a6fc

File tree

2 files changed

+28
-5
lines changed

2 files changed

+28
-5
lines changed

src/kili/adapters/kili_api_gateway/asset/operations.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,9 @@ def get_assets_query(fragment: str) -> str:
3030
external_ids: filterExistingAssets(projectID: $projectID, externalIDs: $externalIDs)
3131
}
3232
"""
33+
34+
GQL_COUNT_ASSET_ANNOTATIONS = """
35+
query countAssetAnnotations($where: AssetWhere!) {
36+
data: countAssetAnnotations(where: $where)
37+
}
38+
"""

src/kili/adapters/kili_api_gateway/asset/operations_mixin.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
)
1212
from kili.adapters.kili_api_gateway.asset.mappers import asset_where_mapper
1313
from kili.adapters.kili_api_gateway.asset.operations import (
14+
GQL_COUNT_ASSET_ANNOTATIONS,
1415
GQL_COUNT_ASSETS,
1516
GQL_CREATE_UPLOAD_BUCKET_SIGNED_URLS,
1617
GQL_FILTER_EXISTING_ASSETS,
@@ -27,6 +28,12 @@
2728
from kili.domain.asset import AssetFilters
2829
from kili.domain.types import ListOrTuple
2930

31+
# Threshold for batching based on number of annotations
32+
# This is used to determine whether to use a single batch or multiple batches
33+
# when fetching assets. If the number of annotations counted exceeds this threshold,
34+
# the asset fetch will be done in multiple smaller batches to avoid performance issues.
35+
THRESHOLD_FOR_BATCHING = 200
36+
3037

3138
class AssetOperationMixin(BaseOperationMixin):
3239
"""Mixin extending Kili API Gateway class with Assets related operations."""
@@ -74,13 +81,15 @@ def list_assets_split(
7481
self, filters: AssetFilters, fields: ListOrTuple[str], options: QueryOptions, project_info
7582
) -> Generator[Dict, None, None]:
7683
"""List assets with given options."""
77-
options = QueryOptions(
78-
options.disable_tqdm,
79-
options.first,
80-
options.skip,
81-
min(options.batch_size, 10 if project_info["inputType"] == "VIDEO" else 50),
84+
nb_annotations = self.count_assets_annotations(filters)
85+
assets_batch_max_amount = 10 if project_info["inputType"] == "VIDEO" else 50
86+
batch_size_to_use = min(options.batch_size, assets_batch_max_amount)
87+
batch_size = (
88+
1 if nb_annotations / batch_size_to_use > THRESHOLD_FOR_BATCHING else batch_size_to_use
8289
)
8390

91+
options = QueryOptions(options.disable_tqdm, options.first, options.skip, batch_size)
92+
8493
inner_annotation_fragment = get_annotation_fragment()
8594
annotation_fragment = f"""
8695
annotations {{
@@ -149,3 +158,11 @@ def filter_existing_assets(self, project_id: str, assets_external_ids: ListOrTup
149158
}
150159
external_id_response = self.graphql_client.execute(GQL_FILTER_EXISTING_ASSETS, payload)
151160
return external_id_response["external_ids"]
161+
162+
def count_assets_annotations(self, filters: AssetFilters) -> int:
163+
"""Count the number of annotations for assets matching the filters."""
164+
where = asset_where_mapper(filters)
165+
payload = {"where": where}
166+
count_result = self.graphql_client.execute(GQL_COUNT_ASSET_ANNOTATIONS, payload)
167+
count: int = count_result["data"]
168+
return count

0 commit comments

Comments
 (0)