From 5ab170aa414f67fe0207a043fb65f4e3665326d6 Mon Sep 17 00:00:00 2001 From: Vaghinak Basentsyan Date: Wed, 3 Jul 2024 11:05:14 +0400 Subject: [PATCH 1/6] GenAI csv upload --- .../lib/app/interface/sdk_interface.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index c86cd48a5..b04cf6a41 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -2657,7 +2657,7 @@ def attach_items( :return: uploaded, failed and duplicated item names :rtype: tuple of list of strs - Example: + Example for Vector, Video, Document, PointCloud projects: :: client = SAClient() @@ -2666,6 +2666,23 @@ def attach_items( attachments=[{"name": "item", "url": "https://..."}] ) + Example for GenAI projects: + :: + + client = SAClient() + client.attach_items( + project="Medical Annotations", + attachments=[ + { + "_item_name": "item", + "_folder": "QA1", + "_item_category": "karyology", + "component_id_0": "val", + ... + } + ] + ) + Example of attaching items from custom integration: :: From a144dbd6175a279a9651eb492db6b9e9d5bb53a4 Mon Sep 17 00:00:00 2001 From: Vaghinak Basentsyan Date: Thu, 11 Jul 2024 16:24:42 +0400 Subject: [PATCH 2/6] added GenAI csv upload --- pytest.ini | 2 +- src/superannotate/__init__.py | 2 +- src/superannotate/lib/app/helpers.py | 44 +++ .../lib/app/interface/sdk_interface.py | 187 ++++++---- .../lib/core/entities/__init__.py | 2 + .../lib/core/entities/project.py | 23 ++ .../lib/core/serviceproviders.py | 25 +- .../lib/core/usecases/annotations.py | 5 + src/superannotate/lib/core/usecases/items.py | 250 +++++++++++++- src/superannotate/lib/core/utils.py | 4 + .../lib/infrastructure/controller.py | 22 ++ .../lib/infrastructure/services/annotation.py | 3 + .../lib/infrastructure/services/item.py | 3 +- .../lib/infrastructure/services/project.py | 76 +++++ tests/data_set/attach_genai.csv | 26 ++ tests/integration/items/test_attach_items.py | 318 ++++++++++++++++++ 16 files changed, 914 insertions(+), 78 deletions(-) create mode 100644 src/superannotate/lib/core/utils.py create mode 100644 tests/data_set/attach_genai.csv diff --git a/pytest.ini b/pytest.ini index c33efcaae..3b63ad975 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,5 +3,5 @@ minversion = 3.7 log_cli=true python_files = test_*.py ;pytest_plugins = ['pytest_profiling'] -addopts = -n auto --dist=loadscope +;addopts = -n auto --dist=loadscope diff --git a/src/superannotate/__init__.py b/src/superannotate/__init__.py index e24178021..47c703583 100644 --- a/src/superannotate/__init__.py +++ b/src/superannotate/__init__.py @@ -3,7 +3,7 @@ import sys -__version__ = "4.4.22" +__version__ = "4.4.23dev" sys.path.append(os.path.split(os.path.realpath(__file__))[0]) diff --git a/src/superannotate/lib/app/helpers.py b/src/superannotate/lib/app/helpers.py index 25c7529a2..6d0d42745 100644 --- a/src/superannotate/lib/app/helpers.py +++ b/src/superannotate/lib/app/helpers.py @@ -77,6 +77,50 @@ def get_s3_annotation_paths(folder_path, s3_bucket, annotation_paths, recursive) return list(set(annotation_paths)) +def convert_column_to_lowercase(df, column_name): + actual_column_name = next( + (col for col in df.columns if col.lower() == column_name.lower()), None + ) + if actual_column_name: + df = df.rename(columns={actual_column_name: column_name}) + else: + raise Exception(f"Column '{column_name}' not found.") + return df + + +def truncate_long_names(name, length=120): + if len(name) > length: + return name[:length] + else: + return name + + +def get_gen_ai_csv_data(csv_path): + def serializer_name(val): + if not str(val).strip(): + val = str(uuid.uuid4()) + val = truncate_long_names(val) + return val + + def df_preprocessing(df): + """ + Convert the name column to lowercase + Fill all empty cells with empty strings + Truncating the name column or generating UUID for empties + :param df: + :return: df + """ + df = convert_column_to_lowercase(df, "_item_name") + df = df.fillna("") + df["_item_name"] = df["_item_name"].apply(serializer_name) + return df + + df = pd.read_csv(csv_path, engine="python", quotechar='"', dtype=str) + df = df.drop(columns=["_folder"], errors="ignore") + df = df_preprocessing(df) + return df.to_dict(orient="records") + + def get_name_url_duplicated_from_csv(csv_path): image_data = pd.read_csv(csv_path, dtype=str) image_data.replace({pd.NA: None}, inplace=True) diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index b04cf6a41..d5fdbd52b 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -29,6 +29,7 @@ import lib.core as constants from lib.app.helpers import get_annotation_paths from lib.app.helpers import get_name_url_duplicated_from_csv +from lib.app.helpers import get_gen_ai_csv_data from lib.app.helpers import wrap_error as wrap_validation_errors from lib.app.interface.base_interface import BaseInterfaceFacade from lib.app.interface.base_interface import TrackableMeta @@ -45,6 +46,7 @@ from lib.core.conditions import Condition from lib.core.conditions import EmptyCondition from lib.core.entities import AttachmentEntity +from lib.core.entities import GenAIAttachmentEntity from lib.core.entities import WorkflowEntity from lib.core.entities import SettingEntity from lib.core.entities.classes import AnnotationClassEntity @@ -112,6 +114,12 @@ class Attachment(TypedDict, total=False): integration: NotRequired[str] # noqa +class GenAIAttachment(TypedDict, total=False): + _item_name: Optional[str] + _item_category: Optional[str] + # compoenmt id value map + + class SAClient(BaseInterfaceFacade, metaclass=TrackableMeta): """Create SAClient instance to authorize SDK in a team scope. In case of no argument has been provided, SA_TOKEN environmental variable @@ -1187,8 +1195,22 @@ def prepare_export( :param only_pinned: enable only pinned output in export. This option disables all other types of output. :type only_pinned: bool - :param kwargs: Arbitrary kwarg ``integration_name`` - can be provided which will be used as a storage to store export file + :param kwargs: + Arbitrary kwargs: + * integration_name: can be provided which will be used as a storage to store export file + * format: can be CSV for the Gen AI projects + + Request Example: + :: + client = SAClient() + + export = client.prepare_export( + project = "Project Name", + folder_names = ["Folder 1", "Folder 2"], + annotation_statuses = ["Completed","QualityCheck"], + export_type = "CSV") + + client.download_export("Project Name", export, "path_to_download") :return: metadata object of the prepared export :rtype: dict @@ -1216,6 +1238,12 @@ def prepare_export( break else: raise AppException("Integration not found.") + _export_type = None + export_type = kwargs.get("format") + if export_type: + export_type = export_type.lower() + if export_type == "csv": + _export_type = 3 response = self.controller.prepare_export( project_name=project_name, folder_names=folders, @@ -1223,6 +1251,7 @@ def prepare_export( only_pinned=only_pinned, annotation_statuses=annotation_statuses, integration_id=integration_id, + export_type=_export_type, ) if response.errors: raise AppException(response.errors) @@ -2632,7 +2661,7 @@ def search_items( def attach_items( self, project: Union[NotEmptyStr, dict], - attachments: Union[NotEmptyStr, Path, conlist(Attachment, min_items=1)], + attachments: Union[NotEmptyStr, Path, List[dict]], annotation_status: Optional[ANNOTATION_STATUS] = "NotStarted", ): """Link items from external storage to SuperAnnotate using URLs. @@ -2657,7 +2686,7 @@ def attach_items( :return: uploaded, failed and duplicated item names :rtype: tuple of list of strs - Example for Vector, Video, Document, PointCloud projects: + Example: :: client = SAClient() @@ -2666,7 +2695,7 @@ def attach_items( attachments=[{"name": "item", "url": "https://..."}] ) - Example for GenAI projects: + Example of attaching items from custom integration: :: client = SAClient() @@ -2674,16 +2703,14 @@ def attach_items( project="Medical Annotations", attachments=[ { - "_item_name": "item", - "_folder": "QA1", - "_item_category": "karyology", - "component_id_0": "val", - ... - } - ] - ) + "name": "item", + "url": "https://bucket-name.s3…/example.png" + "integration": "custom-integration-name" + } + ] + ) - Example of attaching items from custom integration: + Example of attaching items for GenAI projects: :: client = SAClient() @@ -2691,71 +2718,97 @@ def attach_items( project="Medical Annotations", attachments=[ { - "name": "item", - "url": "https://bucket-name.s3…/example.png" - "integration": "custom-integration-name" + "_item_name": "item", + "_category": "heart", + "category_text_input": "value1", + "category_numeric_input": "value1", + "category_approve_input": 0, + "category_rating_input": 4, + "category_slider_input": 23, + "category_multiselect": ["Option 1"] + "category_checkbox_input": ["Option 1","Option 3"], } ] ) """ project_name, folder_name = extract_project_folder(project) - try: - attachments = parse_obj_as(List[AttachmentEntity], attachments) - unique_attachments = set(attachments) - duplicate_attachments = [ - item - for item, count in collections.Counter(attachments).items() - if count > 1 - ] - except ValidationError: - ( - unique_attachments, - duplicate_attachments, - ) = get_name_url_duplicated_from_csv(attachments) - if duplicate_attachments: - logger.info("Dropping duplicates.") - unique_attachments = parse_obj_as(List[AttachmentEntity], unique_attachments) + project, folder = self.controller.get_project_folder(project_name, folder_name) uploaded, fails, duplicated = [], [], [] - _unique_attachments = [] - if any(i.integration for i in unique_attachments): - integtation_item_map = { - i.name: i - for i in self.controller.integrations.list().data - if i.type == IntegrationTypeEnum.CUSTOM - } - invalid_integrations = set() - for attachment in unique_attachments: - if attachment.integration: - if attachment.integration in integtation_item_map: - attachment.integration_id = integtation_item_map[ - attachment.integration - ].id - else: - invalid_integrations.add(attachment.integration) - continue - _unique_attachments.append(attachment) - if invalid_integrations: - logger.error( - f"The ['{','.join(invalid_integrations)}'] integrations specified for the items doesn't exist in the " - "list of integrations on the platform. Any associated items will be skipped." + if project.type == ProjectType.GEN_AI.value: + if isinstance(attachments, (str, Path)): + attachments = parse_obj_as( + List[GenAIAttachmentEntity], + get_gen_ai_csv_data(csv_path=attachments), ) - else: - _unique_attachments = unique_attachments - - if _unique_attachments: - logger.info( - f"Attaching {len(_unique_attachments)} file(s) to project {project}." - ) - project, folder = self.controller.get_project_folder( - project_name, folder_name - ) - response = self.controller.items.attach( + else: + attachments = parse_obj_as(List[GenAIAttachmentEntity], attachments) + response = self.controller.items.attach_gen_ai_data( project=project, folder=folder, - attachments=_unique_attachments, + attachments=attachments, annotation_status=annotation_status, + user=self.controller.current_user, ) + uploaded, duplicated, failed = response.data + else: + try: + attachments = parse_obj_as(List[AttachmentEntity], attachments) + unique_attachments = set(attachments) + duplicate_attachments = [ + item + for item, count in collections.Counter(attachments).items() + if count > 1 + ] + except ValidationError: + ( + unique_attachments, + duplicate_attachments, + ) = get_name_url_duplicated_from_csv(attachments) + if duplicate_attachments: + logger.info("Dropping duplicates.") + unique_attachments = parse_obj_as( + List[AttachmentEntity], unique_attachments + ) + _unique_attachments = [] + if any(i.integration for i in unique_attachments): + integtation_item_map = { + i.name: i + for i in self.controller.integrations.list().data + if i.type == IntegrationTypeEnum.CUSTOM + } + invalid_integrations = set() + for attachment in unique_attachments: + if attachment.integration: + if attachment.integration in integtation_item_map: + attachment.integration_id = integtation_item_map[ + attachment.integration + ].id + else: + invalid_integrations.add(attachment.integration) + continue + _unique_attachments.append(attachment) + if invalid_integrations: + logger.error( + f"The ['{','.join(invalid_integrations)}'] integrations specified for the items doesn't exist in the " + "list of integrations on the platform. Any associated items will be skipped." + ) + else: + _unique_attachments = unique_attachments + + if _unique_attachments: + logger.info( + f"Attaching {len(_unique_attachments)} file(s) to project {project}." + ) + project, folder = self.controller.get_project_folder( + project_name, folder_name + ) + response = self.controller.items.attach( + project=project, + folder=folder, + attachments=_unique_attachments, + annotation_status=annotation_status, + ) if response.errors: raise AppException(response.errors) uploaded, duplicated = response.data diff --git a/src/superannotate/lib/core/entities/__init__.py b/src/superannotate/lib/core/entities/__init__.py index 84d280b5c..bc7a07acd 100644 --- a/src/superannotate/lib/core/entities/__init__.py +++ b/src/superannotate/lib/core/entities/__init__.py @@ -12,6 +12,7 @@ from lib.core.entities.items import VideoEntity from lib.core.entities.project import AttachmentEntity from lib.core.entities.project import ContributorEntity +from lib.core.entities.project import GenAIAttachmentEntity from lib.core.entities.project import MLModelEntity from lib.core.entities.project import ProjectEntity from lib.core.entities.project import SettingEntity @@ -37,6 +38,7 @@ "DocumentEntity", # Utils "AttachmentEntity", + "GenAIAttachmentEntity", # project "ProjectEntity", "ContributorEntity", diff --git a/src/superannotate/lib/core/entities/project.py b/src/superannotate/lib/core/entities/project.py index e6bc65cb8..30c228edd 100644 --- a/src/superannotate/lib/core/entities/project.py +++ b/src/superannotate/lib/core/entities/project.py @@ -50,6 +50,29 @@ def __hash__(self): return hash(self.name) +class GenAIAttachmentEntity(BaseModel): + _item_name: Optional[str] = Field(default_factory=lambda: str(uuid.uuid4())) + _item_category: Optional[str] = Field(default=None) + + integration: Optional[str] = None + integration_id: Optional[int] = None + + @property + def name(self): + return self._item_name + + @property + def item_categoty(self): + return self._item_category + + class Config: + include_private_fields = True + extra = Extra.allow + + def __hash__(self): + return hash(self.name) + + class WorkflowEntity(BaseModel): id: Optional[int] project_id: Optional[int] diff --git a/src/superannotate/lib/core/serviceproviders.py b/src/superannotate/lib/core/serviceproviders.py index fe5cc0e5b..398e65598 100644 --- a/src/superannotate/lib/core/serviceproviders.py +++ b/src/superannotate/lib/core/serviceproviders.py @@ -28,7 +28,6 @@ from lib.core.service_types import UserLimitsResponse from lib.core.service_types import UserResponse from lib.core.types import Attachment -from lib.core.types import AttachmentMeta class BaseClient(ABC): @@ -154,6 +153,26 @@ def upload_priority_scores( ) -> ServiceResponse: raise NotImplementedError + @abstractmethod + def list_categories( + self, + project_id: int, + ): + raise NotImplementedError + + @abstractmethod + def create_categories(self, project_id: int, categories: List[str]): + raise NotImplementedError + + @abstractmethod + def attach_categories( + self, + project_id: int, + folder_id: int, + item_id_category_id_map: Dict[int, dict], + ): + raise NotImplementedError + class BaseFolderService(SuperannotateServiceProvider): @abstractmethod @@ -262,7 +281,7 @@ def attach( attachments: List[Attachment], annotation_status_code, upload_state_code, - meta: Dict[str, AttachmentMeta], + meta: Dict[str, dict], ) -> ServiceResponse: raise NotImplementedError @@ -376,6 +395,7 @@ async def upload_small_annotations( project: entities.ProjectEntity, folder: entities.FolderEntity, items_name_data_map: Dict[str, dict], + transform_version: str = None, ) -> UploadAnnotationsResponse: raise NotImplementedError @@ -571,6 +591,7 @@ def prepare_export( include_fuse: bool, only_pinned: bool, integration_id: int, + export_type: int = None, ) -> ServiceResponse: raise NotImplementedError diff --git a/src/superannotate/lib/core/usecases/annotations.py b/src/superannotate/lib/core/usecases/annotations.py index 25cbd3b56..c12edf9e3 100644 --- a/src/superannotate/lib/core/usecases/annotations.py +++ b/src/superannotate/lib/core/usecases/annotations.py @@ -183,6 +183,7 @@ async def upload_small_annotations( reporter: Reporter, report: Report, callback: Callable = None, + transform_version: str = None, ): async def upload(_chunk: List[ItemToUpload]): failed_annotations, missing_classes, missing_attr_groups, missing_attrs = ( @@ -199,6 +200,7 @@ async def upload(_chunk: List[ItemToUpload]): project=project, folder=folder, items_name_data_map=items_name_data_map, + transform_version=transform_version, ) if response.ok: if response.data.failed_items: # noqa @@ -299,6 +301,7 @@ def __init__( service_provider: BaseServiceProvider, user: UserEntity, keep_status: bool = False, + transform_version: str = None, ): super().__init__(reporter) self._project = project @@ -308,6 +311,7 @@ def __init__( self._keep_status = keep_status self._report = Report([], [], [], []) self._user = user + self._transform_version = transform_version def validate_project_type(self): if self._project.type == constants.ProjectType.PIXEL.value: @@ -413,6 +417,7 @@ async def run_workers(self, items_to_upload: List[ItemToUpload]): service_provider=self._service_provider, reporter=self.reporter, report=self._report, + transform_version=self._transform_version, ) ) diff --git a/src/superannotate/lib/core/usecases/items.py b/src/superannotate/lib/core/usecases/items.py index 2c4c67397..c89693e92 100644 --- a/src/superannotate/lib/core/usecases/items.py +++ b/src/superannotate/lib/core/usecases/items.py @@ -1,13 +1,17 @@ import copy +import json import logging import traceback from collections import defaultdict from concurrent.futures import as_completed from concurrent.futures import ThreadPoolExecutor +from contextlib import suppress from typing import Dict from typing import List from typing import Optional +from typing import TypedDict +import requests import superannotate.lib.core as constants from lib.core.conditions import Condition from lib.core.conditions import CONDITION_EQ as EQ @@ -15,9 +19,11 @@ from lib.core.entities import BaseItemEntity from lib.core.entities import DocumentEntity from lib.core.entities import FolderEntity +from lib.core.entities import GenAIAttachmentEntity from lib.core.entities import ImageEntity from lib.core.entities import ProjectEntity from lib.core.entities import SubSetEntity +from lib.core.entities import UserEntity from lib.core.entities import VideoEntity from lib.core.exceptions import AppException from lib.core.exceptions import AppValidationException @@ -27,11 +33,14 @@ from lib.core.serviceproviders import BaseServiceProvider from lib.core.types import Attachment from lib.core.types import AttachmentMeta +from lib.core.usecases import UploadAnnotationsUseCase from lib.core.usecases.base import BaseReportableUseCase from lib.core.usecases.base import BaseUseCase from lib.core.usecases.folders import SearchFoldersUseCase +from lib.core.utils import chunkify from lib.infrastructure.utils import extract_project_folder + logger = logging.getLogger("sa") @@ -418,12 +427,11 @@ def execute(self) -> Response: duplications = [] attached = [] self.reporter.start_progress(self.attachments_count, "Attaching URLs") - for i in range(0, self.attachments_count, self.CHUNK_SIZE): - attachments = self._attachments[i : i + self.CHUNK_SIZE] # noqa: E203 + for chunk in chunkify(self._attachments, self.CHUNK_SIZE): response = self._service_provider.items.list_by_names( project=self._project, folder=self._folder, - names=[attachment.name for attachment in attachments], + names=[attachment.name for attachment in chunk], ) if not response.ok: raise AppException(response.error) @@ -431,7 +439,7 @@ def execute(self) -> Response: duplications.extend([image.name for image in response.data]) to_upload: List[Attachment] = [] to_upload_meta: Dict[str, AttachmentMeta] = {} - for attachment in attachments: + for attachment in chunk: if attachment.name not in duplications: to_upload.append( Attachment(name=attachment.name, path=attachment.url) @@ -452,12 +460,244 @@ def execute(self) -> Response: self._response.errors = AppException(backend_response.error) else: attached.extend([i.name for i in to_upload]) - self.reporter.update_progress(len(attachments)) + self.reporter.update_progress(len(chunk)) self.reporter.finish_progress() self._response.data = attached, duplications return self._response +class ItemPayload(TypedDict): + category: Optional["str"] + data: dict + + +class AttachGenAIItems(BaseReportableUseCase): + CHUNK_SIZE = 500 + + def __init__( + self, + reporter: Reporter, + project: ProjectEntity, + folder: FolderEntity, + attachments: List[GenAIAttachmentEntity], + annotation_status: str, + service_provider: BaseServiceProvider, + user: UserEntity, + upload_state_code: int = constants.UploadState.EXTERNAL.value, + ): + super().__init__(reporter) + self._project = project + self._folder = folder + self._attachments = attachments + self._user = user + self._annotation_status_code = constants.AnnotationStatus.get_value( + annotation_status + ) + self._upload_state_code = upload_state_code + self._service_provider = service_provider + self._attachments_count = None + + @property + def attachments_count(self): + if not self._attachments_count: + self._attachments_count = len(self._attachments) + return self._attachments_count + + def validate_limitations(self): + attachments_count = self.attachments_count + response = self._service_provider.get_limitations( + project=self._project, folder=self._folder + ) + if not response.ok: + raise AppValidationException(response.error) + if attachments_count > response.data.folder_limit.remaining_image_count: + raise AppValidationException(constants.ATTACH_FOLDER_LIMIT_ERROR_MESSAGE) + elif attachments_count > response.data.project_limit.remaining_image_count: + raise AppValidationException(constants.ATTACH_PROJECT_LIMIT_ERROR_MESSAGE) + elif ( + response.data.user_limit + and attachments_count > response.data.user_limit.remaining_image_count + ): + raise AppValidationException(constants.ATTACH_USER_LIMIT_ERROR_MESSAGE) + + def validate_upload_state(self): + if self._project.upload_state == constants.UploadState.BASIC.value: + raise AppValidationException(constants.ATTACHING_UPLOAD_STATE_ERROR) + + @staticmethod + def get_dummy_meta(): + return { + "width": None, + "height": None, + "annotation_json_path": None, + } + + @staticmethod + def generate_annotation_json(row_data: GenAIAttachmentEntity) -> dict: + def serialzie_value(val): + if isinstance(val, str) and val.startswith("[") and val.endswith("]"): + try: + val = json.loads(val) + except json.JSONDecodeError: + pass + return val + + data = {"data": {}} + for class_name, attr_value in row_data.dict().items(): + if class_name == "_item_name": + data["metadata"] = {"name": attr_value} + elif class_name in ["_folder", "_item_category"]: + continue + data["data"][class_name] = {"value": serialzie_value(attr_value)} + return data + + def get_name_item_map(self, data: List[GenAIAttachmentEntity]) -> Dict[str, dict]: + name_item_map = {} + for item_data in data: + name_item_map[item_data.name] = { + "category": item_data.item_categoty, + "data": self.generate_annotation_json(item_data), + } + return name_item_map + + def execute(self) -> Response: + category_name_id_map: Dict[str, int] = {} + existing_categories = self._service_provider.projects.list_categories( + project_id=self._project.id + ).data + category_name_id_map.update( + {i["name"]: i["id"] for i in existing_categories["data"]} + ) + if self.is_valid(): + duplicated_item_names: List[str] = [] + attached_item_names: List[str] = [] + self.reporter.start_progress(self.attachments_count, "Attaching URLs") + for chunk in chunkify(self._attachments, self.CHUNK_SIZE): + response = self._service_provider.items.list_by_names( + project=self._project, + folder=self._folder, + names=[attachment.name for attachment in chunk], + ) + if not response.ok: + raise AppException(response.error) + + duplicated_item_names.extend([image.name for image in response.data]) + to_upload: List[Attachment] = [] + to_upload_meta: Dict[str, dict] = {} + for attachment in chunk: + if attachment.name not in duplicated_item_names: + to_upload.append( + Attachment( + name=attachment._item_name, path="custom_llm" + ) # noqa + ) + to_upload_meta[attachment.name] = self.get_dummy_meta() + if to_upload: + attach_response = self._service_provider.items.attach( + project=self._project, + folder=self._folder, + attachments=to_upload, + annotation_status_code=self._annotation_status_code, + upload_state_code=self._upload_state_code, + meta=to_upload_meta, + ) + + if not attach_response.ok: + raise AppException(attach_response.error) + else: + attached_items = attach_response.data + attached_item_names.extend([i["name"] for i in attached_items]) + name_item_map = self.get_name_item_map(chunk) + id_annotation_map = {} + for _item in attached_items: + _annotation = name_item_map[_item["name"]]["data"] + if _annotation: + id_annotation_map[_item["id"]] = { + "annotation": _annotation, + "item_name": _item["name"], + } + upload_use_case = UploadAnnotationsUseCase( + project=self._project, + folder=self._folder, + annotations=list( + [i["annotation"] for i in id_annotation_map.values()] + ), + service_provider=self._service_provider, + reporter=Reporter(log_info=False), + user=self._user, + keep_status=True, + transform_version="llmJson", + ) + annotations_response = upload_use_case.execute() + if annotations_response.errors: + raise AppException(annotations_response.errors) + failed_annotations = annotations_response.data["failed"] + if failed_annotations: + self._service_provider.items.delete_multiple( + project=self._project, + item_ids=list(map(int, failed_annotations)), + ) + logger.warning( + f"Failed annotations [{','.join(failed_annotations)}]" + ) + item_id_category_id_map = {} + item_id_category_name_map = {} + for item in attached_items: + _category = name_item_map[item["name"]]["category"] + if _category: + item_id_category_name_map[item["id"]] = _category + for _id, category in item_id_category_name_map.items(): + try: + item_id_category_id_map[_id] = category_name_id_map[ + category + ] + except KeyError: + continue + categories_to_create_item_id_map = {} + for item_id, category in item_id_category_name_map.items(): + if item_id not in item_id_category_id_map: + categories_to_create_item_id_map[category] = item_id + if categories_to_create_item_id_map: + with suppress(requests.HTTPError): + _categories = ( + self._service_provider.projects.create_categories( + project_id=self._project.id, + categories=list( + categories_to_create_item_id_map.keys() + ), + )["data"] + ) + for data in _categories: + item_id_category_id_map[ + categories_to_create_item_id_map[data["name"]] + ] = data["id"] + category_name_id_map[data["name"]] = data["id"] + for item_id in item_id_category_name_map: + with suppress(KeyError): + item_id_category_id_map[item_id] = category_name_id_map[ + item_id_category_name_map[item_id] + ] + if item_id_category_id_map: + self._service_provider.projects.attach_categories( + project_id=self._project.id, + folder_id=self._folder.id, + item_id_category_id_map=item_id_category_id_map, + ) + self.reporter.update_progress(len(chunk)) + self.reporter.finish_progress() + failed_item_names = ( + {i._item_name for i in self._attachments} + - set(attached_item_names) + - set(duplicated_item_names) + ) + self._response.data = ( + attached_item_names, + duplicated_item_names, + failed_item_names, + ) + return self._response + + class CopyItems(BaseReportableUseCase): """ Copy items in bulk between folders in a project. diff --git a/src/superannotate/lib/core/utils.py b/src/superannotate/lib/core/utils.py new file mode 100644 index 000000000..63b92a5a3 --- /dev/null +++ b/src/superannotate/lib/core/utils.py @@ -0,0 +1,4 @@ +def chunkify(lst, n): + """Divide the list `lst` into chunks of size `n`.""" + for i in range(0, len(lst), n): + yield lst[i : i + n] diff --git a/src/superannotate/lib/infrastructure/controller.py b/src/superannotate/lib/infrastructure/controller.py index d75d81ef3..4d695ebf4 100644 --- a/src/superannotate/lib/infrastructure/controller.py +++ b/src/superannotate/lib/infrastructure/controller.py @@ -18,6 +18,7 @@ from lib.core.entities import ConfigEntity from lib.core.entities import ContributorEntity from lib.core.entities import FolderEntity +from lib.core.entities import GenAIAttachmentEntity from lib.core.entities import ImageEntity from lib.core.entities import MLModelEntity from lib.core.entities import ProjectEntity @@ -397,6 +398,25 @@ def attach( ) return use_case.execute() + def attach_gen_ai_data( + self, + project: ProjectEntity, + folder: FolderEntity, + attachments: List[GenAIAttachmentEntity], + annotation_status: str, + user: UserEntity, + ): + use_case = usecases.AttachGenAIItems( + reporter=Reporter(), + project=project, + folder=folder, + attachments=attachments, + annotation_status=annotation_status, + service_provider=self.service_provider, + user=user, + ) + return use_case.execute() + def delete( self, project: ProjectEntity, @@ -1013,6 +1033,7 @@ def prepare_export( only_pinned: bool, annotation_statuses: List[str] = None, integration_id: int = None, + export_type: int = None, ): project = self.get_project(project_name) use_case = usecases.PrepareExportUseCase( @@ -1023,6 +1044,7 @@ def prepare_export( only_pinned=only_pinned, annotation_statuses=annotation_statuses, integration_id=integration_id, + export_type=export_type, ) return use_case.execute() diff --git a/src/superannotate/lib/infrastructure/services/annotation.py b/src/superannotate/lib/infrastructure/services/annotation.py index f27aa45ac..82fe3cd64 100644 --- a/src/superannotate/lib/infrastructure/services/annotation.py +++ b/src/superannotate/lib/infrastructure/services/annotation.py @@ -252,6 +252,7 @@ async def upload_small_annotations( project: entities.ProjectEntity, folder: entities.FolderEntity, items_name_data_map: Dict[str, dict], + transform_version: str = None, ) -> UploadAnnotationsResponse: params = [ ("team_id", project.team_id), @@ -259,6 +260,8 @@ async def upload_small_annotations( ("folder_id", folder.id), *[("image_names[]", item_name) for item_name in items_name_data_map.keys()], ] + if transform_version: + params.append(("transform_version", transform_version)) url = urljoin(self.assets_provider_url, f"{self.URL_UPLOAD_ANNOTATIONS}") headers = copy.copy(self.client.default_headers) del headers["Content-Type"] diff --git a/src/superannotate/lib/infrastructure/services/item.py b/src/superannotate/lib/infrastructure/services/item.py index af9cf143d..a06c1cdf7 100644 --- a/src/superannotate/lib/infrastructure/services/item.py +++ b/src/superannotate/lib/infrastructure/services/item.py @@ -18,7 +18,6 @@ from lib.core.service_types import VideoResponse from lib.core.serviceproviders import BaseItemService from lib.core.types import Attachment -from lib.core.types import AttachmentMeta class ItemService(BaseItemService): @@ -136,7 +135,7 @@ def attach( attachments: List[Attachment], annotation_status_code, upload_state_code, - meta: Dict[str, AttachmentMeta], + meta: Dict[str, dict], ): data = { "project_id": project.id, diff --git a/src/superannotate/lib/infrastructure/services/project.py b/src/superannotate/lib/infrastructure/services/project.py index 277a2710a..9ea2fbed1 100644 --- a/src/superannotate/lib/infrastructure/services/project.py +++ b/src/superannotate/lib/infrastructure/services/project.py @@ -1,5 +1,10 @@ +import base64 +import json +from typing import Dict from typing import List +from urllib.parse import urljoin +import lib.core as constants from lib.core import entities from lib.core.conditions import Condition from lib.core.service_types import ProjectResponse @@ -9,6 +14,7 @@ class ProjectService(BaseProjectService): + WORK_MANAGAMENT_VERSION = "v1" URL = "project" URL_LIST = "projects" URL_GET = "project/{}" @@ -20,6 +26,15 @@ class ProjectService(BaseProjectService): URL_UPLOAD_PRIORITY_SCORES = "images/updateEntropy" URL_ASSIGN_ITEMS = "images/editAssignment/" URL_GET_BY_ID = "project/{project_id}" + URL_ATTACH_CATEGORIES = "items/bulk/setcategory" + URL_LIST_CATEGORIES = "categories" + URL_CREATE_CATEGORIES = "categories/bulk" + + @property + def assets_work_management_url(self): + if self.client.api_url != constants.BACKEND_URL: + return f"https://work-management-api.devsuperannotate.com/api/{self.WORK_MANAGAMENT_VERSION}/" + return f"https://work-management-api.superannotate.com/api/{self.WORK_MANAGAMENT_VERSION}/" def get_by_id(self, project_id: int): params = {} @@ -172,3 +187,64 @@ def upload_priority_scores( }, data={"image_entropies": priorities}, ) + + def get_entitiy_context(self, project_id: int): + return base64.b64encode( + json.dumps( + { + "team_id": self.client.team_id, + "project_id": project_id, + } + ).encode() + ) + + def list_categories( + self, + project_id: int, + ): + params = [ + ("project_id", project_id), + ] + return self.client.request( + urljoin(self.assets_work_management_url, self.URL_LIST_CATEGORIES), + "get", + params=params, + headers={"x-sa-entity-context": self.get_entitiy_context(project_id)}, + ) + + def create_categories(self, project_id: int, categories: List[str]): + params = [ + ("project_id", project_id), + ] + res = self.client.request( + urljoin(self.assets_work_management_url, self.URL_CREATE_CATEGORIES), + "post", + params=params, + data={"bulk": [{"name": i} for i in categories]}, + headers={"x-sa-entity-context": self.get_entitiy_context(project_id)}, + ) + return res.data + + def attach_categories( + self, + project_id: int, + folder_id: int, + item_id_category_id_map: Dict[int, dict], + ): + params = [ + ("project_id", project_id), + ("folder_id", folder_id), + ] + + res = self.client.request( + self.URL_ATTACH_CATEGORIES, + "post", + params=params, + data={ + "bulk": [ + {"item_id": item_id, "categories": [category]} + for item_id, category in item_id_category_id_map.items() + ] + }, + ) + return res diff --git a/tests/data_set/attach_genai.csv b/tests/data_set/attach_genai.csv new file mode 100644 index 000000000..d18192caf --- /dev/null +++ b/tests/data_set/attach_genai.csv @@ -0,0 +1,26 @@ +_item_name,_folder,_item_category,slider,checkbox,radio,approve,rating,component_id_0,component_id_1,component_id_2 +i_000017,f1,_item_category1,,,,,,,, +i_00008,f1,_item_category1,,,,,,,, +i_00004,f1,_item_category1,,,,,,,, +i_000013,f1,_item_category1,67,"[""Option 2"",""Option 3""]","[""Option 2""]",0,5,"[""Option 1""]",6,"[""Option 2""]" +i_000022,f1,,,,,,,,, +i_00002,f1,_item_category1,,,,,,,, +i_000011,f1,_item_category2,,,,,,,6, +i_000020,f1,,,,,,,,, +i_000019,f1,,,,,,,,, +i_000015,f1,_item_category2,,,,,,,6, +i_00006,f1,_item_category2,,,,,,,, +i_000024,f1,_item_category2,,,,,,,, +i_00001,f1,_item_category2,,,,,,,6, +i_000010,f1,_item_category2,,,,,,,4, +i_00009,f1,_item_category3,,,,,,,, +i_000018,f1,_item_category3,,,,,,,, +i_000023,f1,_item_category3,,,,,,,, +i_000014,f1,_item_category3,,,,,,,6, +i_00005,f1,_item_category3,,,,,,,, +i_000021,f1,,,,,,,,, +i_000012,f1,_item_category3,,,,,,,6, +i_00003,f1,_item_category3,,,,,,,, +i_000025,f1,,,,,,,,, +i_00007,f1,,,,,,,,, +i_000016,f1,,,,,,,,, \ No newline at end of file diff --git a/tests/integration/items/test_attach_items.py b/tests/integration/items/test_attach_items.py index 3ec51de58..62dbfbac8 100644 --- a/tests/integration/items/test_attach_items.py +++ b/tests/integration/items/test_attach_items.py @@ -102,3 +102,321 @@ def test_attach_items_invalid_payload(self): pattern = r"(\s+)" + r"(\s+)".join(error_msg) with self.assertRaisesRegexp(AppException, pattern): sa.attach_items(self.PROJECT_NAME, [{"name": "name"}]) + + +def test_attach_gen_ai(): + try: + sa.delete_folders("TEST_LLM_SCV_CATEGORIES_UPLOAD", ["f1", "f2"]) + except Exception as e: + print(e) + sa.create_folder("TEST_LLM_SCV_CATEGORIES_UPLOAD", "f1") + sa.create_folder("TEST_LLM_SCV_CATEGORIES_UPLOAD", "f2") + csv_path = os.path.join( + Path(__file__).parent.parent.parent, "data_set/attach_genai.csv" + ) + sa.attach_items("TEST_LLM_SCV_CATEGORIES_UPLOAD/f1", csv_path) + sa.attach_items( + "TEST_LLM_SCV_CATEGORIES_UPLOAD/f2", + [ + { + "_item_name": "i_000017", + "_item_category": "_item_category1", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_00008", + "_item_category": "_item_category1", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_00004", + "_item_category": "_item_category1", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000013", + "_item_category": "_item_category1", + "slider": 23, + "checkbox": ["Option 1", "Option 3"], + "radio": ["Option 1"], + "approve": 1, + "rating": 4, + "component_id_0": ["Option 2"], + "component_id_1": "6", + "component_id_2": ["Option 1"], + }, + { + "_item_name": "i_000022", + "_item_category": "", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_00002", + "_item_category": "_item_category1", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000011", + "_item_category": "_item_category2", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": '["Option 2"]', + "component_id_1": "6", + "component_id_2": '["Option 1"]', + }, + { + "_item_name": "i_000020", + "_item_category": "", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000019", + "_item_category": "", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000015", + "_item_category": "_item_category2", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": '["Option 1"]', + "component_id_1": "6", + "component_id_2": '["Option 1"]', + }, + { + "_item_name": "i_00006", + "_item_category": "_item_category2", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000024", + "_item_category": "_item_category2", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_00001", + "_item_category": "_item_category2", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": '["Option 2"]', + "component_id_1": "6", + "component_id_2": '["Option 2"]', + }, + { + "_item_name": "i_000010", + "_item_category": "_item_category2", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": '["Option 1"]', + "component_id_1": "4", + "component_id_2": '["Option 2"]', + }, + { + "_item_name": "i_00009", + "_item_category": "_item_category3", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000018", + "_item_category": "_item_category3", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000023", + "_item_category": "_item_category3", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000014", + "_item_category": "_item_category3", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": '["Option 1"]', + "component_id_1": "6", + "component_id_2": '["Option 3"]', + }, + { + "_item_name": "i_00005", + "_item_category": "_item_category3", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000021", + "_item_category": "", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000012", + "_item_category": "_item_category3", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": '["Option 1"]', + "component_id_1": "6", + "component_id_2": '["Option 3"]', + }, + { + "_item_name": "i_00003", + "_item_category": "_item_category3", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000025", + "_item_category": "", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_00007", + "_item_category": "", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + { + "_item_name": "i_000016", + "_item_category": "", + "slider": "", + "checkbox": "", + "radio": "", + "approve": "", + "rating": "", + "component_id_0": "", + "component_id_1": "", + "component_id_2": "", + }, + ], + ) From f590cdf5518e607a365d4efca050ed499aa0ad70 Mon Sep 17 00:00:00 2001 From: Vaghinak Basentsyan Date: Thu, 11 Jul 2024 17:12:20 +0400 Subject: [PATCH 3/6] tod --- src/superannotate/lib/core/reporter.py | 1 + src/superannotate/lib/core/usecases/items.py | 2 +- tests/integration/items/test_attach_items.py | 16 ++++++++-------- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/superannotate/lib/core/reporter.py b/src/superannotate/lib/core/reporter.py index 32cbea5c2..0ad46fb16 100644 --- a/src/superannotate/lib/core/reporter.py +++ b/src/superannotate/lib/core/reporter.py @@ -107,6 +107,7 @@ def start_progress( description: str = "Processing", disable=False, ): + disable = disable or not self._log_info self.progress_bar = self.get_progress_bar(iterations, description, disable) @staticmethod diff --git a/src/superannotate/lib/core/usecases/items.py b/src/superannotate/lib/core/usecases/items.py index c89693e92..10a56c8c0 100644 --- a/src/superannotate/lib/core/usecases/items.py +++ b/src/superannotate/lib/core/usecases/items.py @@ -623,7 +623,7 @@ def execute(self) -> Response: [i["annotation"] for i in id_annotation_map.values()] ), service_provider=self._service_provider, - reporter=Reporter(log_info=False), + reporter=Reporter(log_info=False, log_debug=False), user=self._user, keep_status=True, transform_version="llmJson", diff --git a/tests/integration/items/test_attach_items.py b/tests/integration/items/test_attach_items.py index 62dbfbac8..c991f9cca 100644 --- a/tests/integration/items/test_attach_items.py +++ b/tests/integration/items/test_attach_items.py @@ -408,15 +408,15 @@ def test_attach_gen_ai(): }, { "_item_name": "i_000016", - "_item_category": "", - "slider": "", - "checkbox": "", + "_item_category": None, + "slider": None, + "checkbox": None, "radio": "", - "approve": "", - "rating": "", - "component_id_0": "", - "component_id_1": "", - "component_id_2": "", + "approve": None, + "rating": None, + "component_id_0": None, + "component_id_1": None, + "component_id_2": None, }, ], ) From 77e755b75e277552c6129e5bc2cdfbb2d0444fcb Mon Sep 17 00:00:00 2001 From: Vaghinak Basentsyan Date: Mon, 15 Jul 2024 10:23:32 +0400 Subject: [PATCH 4/6] Doc string update --- src/superannotate/lib/app/interface/sdk_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index d5fdbd52b..f6db1039a 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -2721,7 +2721,7 @@ def attach_items( "_item_name": "item", "_category": "heart", "category_text_input": "value1", - "category_numeric_input": "value1", + "category_numeric_input": 5, "category_approve_input": 0, "category_rating_input": 4, "category_slider_input": 23, From b4cf6a9b9f694979c88dce86ffb3264137055068 Mon Sep 17 00:00:00 2001 From: Vaghinak Basentsyan Date: Tue, 3 Sep 2024 16:26:10 +0400 Subject: [PATCH 5/6] GenAIAttachmentEntity update --- src/superannotate/lib/core/entities/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/superannotate/lib/core/entities/project.py b/src/superannotate/lib/core/entities/project.py index 30c228edd..8a46cf7d7 100644 --- a/src/superannotate/lib/core/entities/project.py +++ b/src/superannotate/lib/core/entities/project.py @@ -52,7 +52,7 @@ def __hash__(self): class GenAIAttachmentEntity(BaseModel): _item_name: Optional[str] = Field(default_factory=lambda: str(uuid.uuid4())) - _item_category: Optional[str] = Field(default=None) + _item_category: Optional[str] = None integration: Optional[str] = None integration_id: Optional[int] = None From 8433b83baddb271e15ae5ed7abf177bf8d545e6d Mon Sep 17 00:00:00 2001 From: Vaghinak Basentsyan <84702976+VaghinakDev@users.noreply.github.com> Date: Wed, 4 Sep 2024 14:23:55 +0400 Subject: [PATCH 6/6] Update __init__.py --- src/superannotate/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/superannotate/__init__.py b/src/superannotate/__init__.py index 1949b5b8b..9ab324301 100644 --- a/src/superannotate/__init__.py +++ b/src/superannotate/__init__.py @@ -3,7 +3,7 @@ import sys -__version__ = "4.4.24dev1" +__version__ = "4.4.24dev2" sys.path.append(os.path.split(os.path.realpath(__file__))[0])