From b2e94ceb6a2ffc6cb625d2ce2ba21f1a089b8245 Mon Sep 17 00:00:00 2001 From: Ayumu-Nono Date: Mon, 22 Dec 2025 16:43:21 +0900 Subject: [PATCH 1/6] =?UTF-8?q?storage=E6=94=B9=E4=BF=AE=E4=B8=80=E6=97=A6?= =?UTF-8?q?=E5=AE=8C=E4=BA=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.local | 9 + .gitignore | 3 +- app/api/response_model.py | 2 + app/api/route/runs.py | 12 + app/api/route/storage.py | 582 ++++++++++++++++++++ app/api/route/storage_v2.py | 346 ++++++++++++ app/api/route/users.py | 7 +- app/define_db/database.py | 20 + app/define_db/models.py | 79 ++- app/main.py | 5 +- app/migrate_storage_address.py | 90 +++ app/services/hal/__init__.py | 217 ++++++++ app/services/hal/db_backend.py | 203 +++++++ app/services/hal/hybrid_access_layer.py | 632 ++++++++++++++++++++++ app/services/hal/models.py | 105 ++++ app/services/s3_service.py | 229 ++++++++ app/services/storage/__init__.py | 22 + app/services/storage/backends/__init__.py | 14 + app/services/storage/backends/base.py | 138 +++++ app/services/storage/backends/local.py | 157 ++++++ app/services/storage/backends/s3.py | 173 ++++++ app/services/storage/config.py | 67 +++ app/services/storage/exceptions.py | 29 + app/services/storage/registry.py | 65 +++ app/services/storage/service.py | 148 +++++ app/services/storage_service.py | 41 ++ app/services/zip_service.py | 237 ++++++++ app/test_s3_connection.py | 188 +++++++ playground_merlin/test_storage.py | 304 +++++++++++ requirements.txt | 2 + scripts/migrate_storage_address.py | 102 ++++ 31 files changed, 4223 insertions(+), 5 deletions(-) create mode 100644 .env.local create mode 100644 app/api/route/storage.py create mode 100644 app/api/route/storage_v2.py create mode 100644 app/migrate_storage_address.py create mode 100644 app/services/hal/__init__.py create mode 100644 app/services/hal/db_backend.py create mode 100644 app/services/hal/hybrid_access_layer.py create mode 100644 app/services/hal/models.py create mode 100644 app/services/s3_service.py create mode 100644 app/services/storage/__init__.py create mode 100644 app/services/storage/backends/__init__.py create mode 100644 app/services/storage/backends/base.py create mode 100644 app/services/storage/backends/local.py create mode 100644 app/services/storage/backends/s3.py create mode 100644 app/services/storage/config.py create mode 100644 app/services/storage/exceptions.py create mode 100644 app/services/storage/registry.py create mode 100644 app/services/storage/service.py create mode 100644 app/services/storage_service.py create mode 100644 app/services/zip_service.py create mode 100644 app/test_s3_connection.py create mode 100644 playground_merlin/test_storage.py create mode 100644 scripts/migrate_storage_address.py diff --git a/.env.local b/.env.local new file mode 100644 index 0000000..9253e70 --- /dev/null +++ b/.env.local @@ -0,0 +1,9 @@ +# ローカルモード設定 +STORAGE_MODE=local +LOCAL_STORAGE_PATH=/data/storage + +# ダミーS3設定(ローカルモードでは使用されない) +AWS_ACCESS_KEY_ID=dummy +AWS_SECRET_ACCESS_KEY=dummy +AWS_DEFAULT_REGION=ap-northeast-1 +S3_BUCKET_NAME=labcode-dev-artifacts diff --git a/.gitignore b/.gitignore index 4d6813e..5596f89 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .venv __pycache__ -*.db \ No newline at end of file +*.db +.env \ No newline at end of file diff --git a/app/api/response_model.py b/app/api/response_model.py index 49d39b2..5cc9004 100644 --- a/app/api/response_model.py +++ b/app/api/response_model.py @@ -34,6 +34,7 @@ class RunResponse(BaseModel): finished_at: Optional[datetime] status: str storage_address: str + storage_mode: Optional[str] = None # ★追加: 's3' または 'local' deleted_at: datetime | None display_visible: bool # project: Optional[ProjectResponse] # リレーション @@ -55,6 +56,7 @@ class RunResponseWithProjectName(BaseModel): finished_at: Optional[datetime] status: str storage_address: str + storage_mode: Optional[str] = None # ★追加: 's3' または 'local' deleted_at: datetime | None display_visible: bool # project: Optional[ProjectResponse] # リレーション diff --git a/app/api/route/runs.py b/app/api/route/runs.py index 3058440..95fa1b2 100644 --- a/app/api/route/runs.py +++ b/app/api/route/runs.py @@ -3,6 +3,7 @@ from api.response_model import RunResponse, OperationResponseWithProcessStorageAddress, ProcessResponseEnhanced, ProcessDetailResponse from api.route.processes import load_port_info_from_db from services.port_auto_generator import auto_generate_ports_for_run +from services.hal import infer_storage_mode_for_run from fastapi import APIRouter from fastapi import Form from fastapi import HTTPException @@ -53,6 +54,10 @@ def read(id: int): run = session.query(Run).filter(Run.id == id, Run.deleted_at.is_(None)).first() if not run: raise HTTPException(status_code=404, detail="Run not found") + # storage_mode=nullの場合は推論して値を設定(DBに永続化) + # 2回目以降はキャッシュヒットでS3/DBアクセスなし + if run.storage_mode is None: + run.storage_mode = infer_storage_mode_for_run(session, run) return RunResponse.model_validate(run) @@ -204,6 +209,13 @@ def patch(id: int, attribute: str = Form(), new_value: str = Form()): detail="display_visible must be 'true' or 'false'" ) run.display_visible = (new_value.lower() == "true") + case "storage_mode": + if new_value not in ("s3", "local"): + raise HTTPException( + status_code=400, + detail="storage_mode must be 's3' or 'local'" + ) + run.storage_mode = new_value case _: raise HTTPException(status_code=400, detail="Invalid attribute") session.commit() diff --git a/app/api/route/storage.py b/app/api/route/storage.py new file mode 100644 index 0000000..b97c08e --- /dev/null +++ b/app/api/route/storage.py @@ -0,0 +1,582 @@ +"""ストレージAPI(S3連携) + +S3バケット内のファイル操作を行うAPIエンドポイント: +- GET /api/storage/list: ファイル一覧取得 +- GET /api/storage/preview: ファイルプレビュー +- GET /api/storage/download: ダウンロードURL生成 +- POST /api/storage/batch-download: 一括ダウンロード(ZIP形式) +""" + +from datetime import datetime, timedelta +from typing import Optional, List +from fastapi import APIRouter, HTTPException, Query, Depends +from fastapi.responses import StreamingResponse +from pydantic import BaseModel, Field +from sqlalchemy.orm import Session +from botocore.exceptions import ClientError +from services.s3_service import S3Service, get_content_type +from services.zip_service import ( + ZipStreamService, + SizeLimitExceededError, + RunNotFoundError +) +from define_db.database import get_db +from define_db.models import Run +from services.storage_service import get_storage +import logging +import os + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +# ==================== Response Models ==================== + +class StorageInfoResponse(BaseModel): + """ストレージ情報レスポンス""" + mode: str # 's3' or 'local' + bucket_name: Optional[str] = None # S3バケット名(S3モードのみ) + local_path: Optional[str] = None # ローカルパス(ローカルモードのみ) + db_path: Optional[str] = None # SQLiteデータベースパス(ローカルモードのみ) + +class FileItem(BaseModel): + """ファイル情報""" + name: str + type: str # 'file' or 'directory' + path: str + size: Optional[int] = None + last_modified: Optional[str] = None + extension: Optional[str] = None + + +class DirectoryItem(BaseModel): + """ディレクトリ情報""" + name: str + type: str = 'directory' + path: str + + +class PaginationInfo(BaseModel): + """ページネーション情報""" + total: int + page: int + per_page: int + total_pages: int + + +class ListResponse(BaseModel): + """ファイル一覧レスポンス""" + files: List[FileItem] + directories: List[DirectoryItem] + pagination: PaginationInfo + + +class PreviewResponse(BaseModel): + """プレビューレスポンス""" + content: str + content_type: str + size: int + last_modified: str + truncated: bool + + +class DownloadResponse(BaseModel): + """ダウンロードURLレスポンス""" + download_url: str + expires_at: str + + +class BatchDownloadRequest(BaseModel): + """バッチダウンロードリクエスト""" + run_ids: List[int] = Field( + ..., + min_length=1, + max_length=100, + description="ダウンロード対象のランIDリスト" + ) + + +class BatchDownloadEstimate(BaseModel): + """バッチダウンロード推定サイズレスポンス""" + run_count: int + estimated_size: int + estimated_size_mb: float + can_download: bool + message: Optional[str] = None + + +# ==================== Utility Functions ==================== + +def sort_files(files: List[dict], sort_by: str, order: str) -> List[dict]: + """ + ファイルリストをソートする + + Args: + files: ファイルリスト + sort_by: ソート対象 ('name', 'size', 'last_modified') + order: ソート順 ('asc', 'desc') + + Returns: + ソート済みファイルリスト + """ + reverse = order == 'desc' + + if sort_by == 'name': + return sorted(files, key=lambda x: x.get('name', '').lower(), reverse=reverse) + elif sort_by == 'size': + return sorted(files, key=lambda x: x.get('size', 0) or 0, reverse=reverse) + elif sort_by == 'last_modified': + return sorted(files, key=lambda x: x.get('last_modified', ''), reverse=reverse) + else: + return files + + +# ==================== Endpoints ==================== + +@router.get("/storage/info", tags=["storage"], response_model=StorageInfoResponse) +async def get_storage_info(): + """ + ストレージモード情報を取得する + + Returns: + StorageInfoResponse: ストレージモード、バケット名またはローカルパス + """ + try: + storage = get_storage() + + if storage.mode == 's3': + return StorageInfoResponse( + mode='s3', + bucket_name=os.getenv('S3_BUCKET_NAME', 'labcode-dev-artifacts'), + local_path=None + ) + else: + return StorageInfoResponse( + mode='local', + bucket_name=None, + local_path=os.getenv('LOCAL_STORAGE_PATH', '/data/storage'), + db_path=os.getenv('DATABASE_URL', 'sqlite:////data/sql_app.db').replace('sqlite:///', '') + ) + except Exception as e: + logger.error(f"Error getting storage info: {e}") + raise HTTPException(status_code=500, detail="Failed to get storage info") + + +@router.get("/storage/list", tags=["storage"], response_model=ListResponse) +async def list_files( + prefix: str = Query(..., description="S3プレフィックス(例: runs/1/)"), + sort_by: str = Query("name", description="ソート対象: name, size, last_modified"), + order: str = Query("asc", description="ソート順: asc, desc"), + page: int = Query(1, ge=1, description="ページ番号"), + per_page: int = Query(50, ge=1, le=100, description="1ページあたりの件数") +): + """ + S3バケット内のファイル・フォルダ一覧を取得する + + Args: + prefix: S3プレフィックス + sort_by: ソート対象 + order: ソート順 + page: ページ番号 + per_page: 1ページあたりの件数 + + Returns: + ListResponse: ファイル一覧、ディレクトリ一覧、ページネーション情報 + """ + # パラメータバリデーション + if sort_by not in ['name', 'size', 'last_modified']: + raise HTTPException(status_code=400, detail="sort_by must be 'name', 'size', or 'last_modified'") + if order not in ['asc', 'desc']: + raise HTTPException(status_code=400, detail="order must be 'asc' or 'desc'") + + try: + s3 = S3Service() + response = s3.list_objects(prefix=prefix) + + # ファイル一覧の構築 + files = [] + for obj in response['contents']: + key = obj['Key'] + # prefixと同一のキーは除外(フォルダ自体) + if key != prefix and not key.endswith('/'): + name = key.split('/')[-1] + extension = name.split('.')[-1].lower() if '.' in name else '' + files.append({ + 'name': name, + 'type': 'file', + 'path': key, + 'size': obj['Size'], + 'last_modified': obj['LastModified'].isoformat(), + 'extension': extension + }) + + # ディレクトリ一覧の構築 + directories = [] + for prefix_info in response['common_prefixes']: + dir_path = prefix_info['Prefix'] + dir_name = dir_path.rstrip('/').split('/')[-1] + directories.append({ + 'name': dir_name, + 'type': 'directory', + 'path': dir_path + }) + + # ソート + files = sort_files(files, sort_by, order) + + # ページネーション + total = len(files) + start = (page - 1) * per_page + end = start + per_page + paginated_files = files[start:end] + + return ListResponse( + files=[FileItem(**f) for f in paginated_files], + directories=[DirectoryItem(**d) for d in directories], + pagination=PaginationInfo( + total=total, + page=page, + per_page=per_page, + total_pages=(total + per_page - 1) // per_page if total > 0 else 1 + ) + ) + + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', 'Unknown') + logger.error(f"S3 ClientError: {error_code} - {e}") + if error_code == 'NoSuchBucket': + raise HTTPException(status_code=500, detail="Bucket not found") + elif error_code == 'AccessDenied': + raise HTTPException(status_code=403, detail="Access denied to S3") + else: + raise HTTPException(status_code=500, detail=f"Failed to connect to S3: {error_code}") + except Exception as e: + logger.error(f"Unexpected error in list_files: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + +@router.get("/storage/preview", tags=["storage"], response_model=PreviewResponse) +async def preview_file( + file_path: str = Query(..., description="S3キー(例: runs/1/output.json)"), + max_lines: int = Query(1000, ge=1, le=10000, description="最大行数") +): + """ + テキストファイルの内容を取得してプレビューする + + Args: + file_path: S3キー + max_lines: 最大行数(デフォルト: 1000) + + Returns: + PreviewResponse: ファイル内容、コンテンツタイプ、サイズ等 + """ + # ファイルタイプ判定 + extension = file_path.split('.')[-1].lower() if '.' in file_path else '' + content_type = get_content_type(extension) + + if content_type == 'binary': + raise HTTPException( + status_code=415, + detail="Binary files cannot be previewed" + ) + + try: + s3 = S3Service() + response = s3.get_object(key=file_path) + + # 内容をデコード + try: + content = response['body'].decode('utf-8') + except UnicodeDecodeError: + raise HTTPException( + status_code=415, + detail="File encoding is not UTF-8, cannot preview" + ) + + # 行数制限 + lines = content.split('\n') + truncated = len(lines) > max_lines + if truncated: + content = '\n'.join(lines[:max_lines]) + + return PreviewResponse( + content=content, + content_type=content_type, + size=response['content_length'], + last_modified=response['last_modified'].isoformat(), + truncated=truncated + ) + + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', 'Unknown') + logger.error(f"S3 ClientError: {error_code} - {e}") + if error_code == 'NoSuchKey': + raise HTTPException(status_code=404, detail="File not found") + elif error_code == 'AccessDenied': + raise HTTPException(status_code=403, detail="Access denied to S3") + else: + raise HTTPException(status_code=500, detail=f"Failed to fetch file: {error_code}") + except Exception as e: + logger.error(f"Unexpected error in preview_file: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + +@router.get("/storage/download", tags=["storage"], response_model=DownloadResponse) +async def download_file( + file_path: str = Query(..., description="S3キー(例: runs/1/output.json)"), + expires_in: int = Query(3600, ge=60, le=86400, description="有効期限(秒)") +): + """ + ダウンロード用の事前署名URLを生成する + + Args: + file_path: S3キー + expires_in: 有効期限(秒)、デフォルト3600秒(1時間) + + Returns: + DownloadResponse: 事前署名URL、有効期限 + """ + try: + s3 = S3Service() + + # ファイル存在確認 + try: + s3.head_object(key=file_path) + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', 'Unknown') + if error_code == '404' or error_code == 'NoSuchKey': + raise HTTPException(status_code=404, detail="File not found") + raise + + # 事前署名URL生成 + url = s3.generate_presigned_url(key=file_path, expires_in=expires_in) + + # 有効期限計算 + expires_at = datetime.utcnow() + timedelta(seconds=expires_in) + + return DownloadResponse( + download_url=url, + expires_at=expires_at.isoformat() + 'Z' + ) + + except HTTPException: + raise + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', 'Unknown') + logger.error(f"S3 ClientError: {error_code} - {e}") + if error_code == 'AccessDenied': + raise HTTPException(status_code=403, detail="Access denied to S3") + else: + raise HTTPException(status_code=500, detail=f"Failed to generate download URL: {error_code}") + except Exception as e: + logger.error(f"Unexpected error in download_file: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + +@router.get("/storage/download-direct", tags=["storage"]) +async def download_file_direct( + file_path: str = Query(..., description="ファイルパス(例: runs/1/protocol.yaml)") +): + """ + ファイルを直接ダウンロードする(ローカルモード用) + + Args: + file_path: ファイルパス + + Returns: + StreamingResponse: ファイルストリーム + """ + try: + s3 = S3Service() + + # ファイル存在確認 + try: + s3.head_object(key=file_path) + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', 'Unknown') + if error_code == '404' or error_code == 'NoSuchKey': + raise HTTPException(status_code=404, detail="File not found") + raise + + # ファイル名を抽出 + filename = file_path.split('/')[-1] + + # ストリーミングでファイルを返す + def file_generator(): + for chunk in s3.get_object_stream(file_path): + yield chunk + + return StreamingResponse( + file_generator(), + media_type='application/octet-stream', + headers={ + 'Content-Disposition': f'attachment; filename="{filename}"' + } + ) + + except HTTPException: + raise + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', 'Unknown') + logger.error(f"Storage ClientError in download_file_direct: {error_code} - {e}") + if error_code == 'AccessDenied': + raise HTTPException(status_code=403, detail="Access denied") + else: + raise HTTPException(status_code=500, detail=f"Failed to download file: {error_code}") + except Exception as e: + logger.error(f"Unexpected error in download_file_direct: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + +@router.post("/storage/batch-download", tags=["storage"]) +async def batch_download( + request: BatchDownloadRequest, + db: Session = Depends(get_db) +): + """ + 複数ランのファイルをZIP形式で一括ダウンロードする + + Args: + request: バッチダウンロードリクエスト(run_ids) + db: データベースセッション + + Returns: + StreamingResponse: ZIPファイルストリーム + """ + try: + # ランIDのバリデーション + if not request.run_ids: + raise HTTPException( + status_code=400, + detail="run_ids is required and must not be empty" + ) + + # データベースからラン情報を取得 + runs = db.query(Run).filter(Run.id.in_(request.run_ids)).all() + + if not runs: + raise HTTPException( + status_code=404, + detail="No runs found for the specified IDs" + ) + + # ラン情報を辞書リストに変換 + runs_data = [ + { + 'id': run.id, + 'storage_address': run.storage_address, + 'file_name': run.file_name, + 'status': run.status + } + for run in runs + ] + + # 見つからなかったランIDを警告 + found_ids = {run.id for run in runs} + missing_ids = set(request.run_ids) - found_ids + if missing_ids: + logger.warning(f"Some runs not found: {missing_ids}") + + # ZIPストリームを生成 + zip_service = ZipStreamService() + zip_stream = zip_service.create_zip_stream(runs_data) + filename = zip_service.generate_filename() + + return StreamingResponse( + zip_stream, + media_type='application/zip', + headers={ + 'Content-Disposition': f'attachment; filename="{filename}"' + } + ) + + except SizeLimitExceededError as e: + raise HTTPException(status_code=413, detail=str(e)) + except HTTPException: + raise + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', 'Unknown') + logger.error(f"S3 ClientError in batch_download: {error_code} - {e}") + if error_code == 'AccessDenied': + raise HTTPException(status_code=403, detail="Access denied to S3") + else: + raise HTTPException( + status_code=503, + detail="Failed to connect to storage service" + ) + except Exception as e: + logger.error(f"Unexpected error in batch_download: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + +@router.post("/storage/batch-download/estimate", tags=["storage"], response_model=BatchDownloadEstimate) +async def estimate_batch_download( + request: BatchDownloadRequest, + db: Session = Depends(get_db) +): + """ + バッチダウンロードの推定サイズを取得する + + Args: + request: バッチダウンロードリクエスト(run_ids) + db: データベースセッション + + Returns: + BatchDownloadEstimate: 推定サイズ情報 + """ + try: + # データベースからラン情報を取得 + runs = db.query(Run).filter(Run.id.in_(request.run_ids)).all() + + if not runs: + return BatchDownloadEstimate( + run_count=0, + estimated_size=0, + estimated_size_mb=0.0, + can_download=False, + message="No runs found for the specified IDs" + ) + + # ラン情報を辞書リストに変換 + runs_data = [ + { + 'id': run.id, + 'storage_address': run.storage_address + } + for run in runs + ] + + # サイズ推定 + zip_service = ZipStreamService() + estimated_size = zip_service.estimate_zip_size( + request.run_ids, + runs_data + ) + + # 500MB上限チェック + max_size = 500 * 1024 * 1024 + can_download = estimated_size <= max_size + + message = None + if not can_download: + message = f"Estimated size ({estimated_size // (1024*1024)}MB) exceeds limit (500MB)" + + return BatchDownloadEstimate( + run_count=len(runs), + estimated_size=estimated_size, + estimated_size_mb=round(estimated_size / (1024 * 1024), 2), + can_download=can_download, + message=message + ) + + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', 'Unknown') + logger.error(f"S3 ClientError in estimate_batch_download: {error_code} - {e}") + raise HTTPException( + status_code=503, + detail="Failed to connect to storage service" + ) + except Exception as e: + logger.error(f"Unexpected error in estimate_batch_download: {e}") + raise HTTPException(status_code=500, detail="Internal server error") diff --git a/app/api/route/storage_v2.py b/app/api/route/storage_v2.py new file mode 100644 index 0000000..10f1db7 --- /dev/null +++ b/app/api/route/storage_v2.py @@ -0,0 +1,346 @@ +"""Storage API v2 + +Hybrid Access Layer (HAL) を使用した新しいストレージAPI。 +Run IDベースのアクセスで、S3/ローカル/DBデータを統一的に扱う。 +""" + +import logging +import os +import tempfile +import sqlite3 +from typing import Optional +from fastapi import APIRouter, Depends, Query, HTTPException +from fastapi.responses import PlainTextResponse, FileResponse +from sqlalchemy.orm import Session + +from define_db.database import get_db +from define_db.models import Run, Process, Operation, Edge, Port +from services.hal import HybridAccessLayer + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/api/v2/storage", tags=["storage-v2"]) + + +@router.get("/list/{run_id}") +def list_run_contents( + run_id: int, + prefix: str = Query("", description="仮想パスプレフィックス"), + db: Session = Depends(get_db) +): + """ + Run内のコンテンツ一覧を取得 + + S3モード: S3ファイル一覧 + ローカルモード: DBデータを仮想ファイルとして表示 + """ + try: + hal = HybridAccessLayer(db) + items = hal.list_contents(run_id, prefix) + return { + "run_id": run_id, + "prefix": prefix, + "items": [item.to_dict() for item in items] + } + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + except RuntimeError as e: + logger.error(f"Runtime error in list_run_contents: {e}") + raise HTTPException(status_code=503, detail=str(e)) + except Exception as e: + logger.error(f"Unexpected error in list_run_contents: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + +@router.get("/content/{run_id}") +def load_content( + run_id: int, + path: str = Query(..., description="仮想パス"), + db: Session = Depends(get_db) +): + """ + コンテンツを取得(プレビュー用) + + テキストファイルの場合は文字列として返却 + """ + try: + hal = HybridAccessLayer(db) + content = hal.load_content(run_id, path) + + if content is None: + raise HTTPException(status_code=404, detail="Content not found") + + # テキストとして返却 + try: + text = content.decode('utf-8') + return {"content": text, "encoding": "utf-8"} + except UnicodeDecodeError: + # バイナリの場合はBase64エンコード + import base64 + return {"content": base64.b64encode(content).decode(), "encoding": "base64"} + + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + except HTTPException: + raise + except Exception as e: + logger.error(f"Unexpected error in load_content: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + +@router.get("/download/{run_id}") +def get_download_url( + run_id: int, + path: str = Query(..., description="仮想パス"), + db: Session = Depends(get_db) +): + """ダウンロードURLを取得""" + try: + hal = HybridAccessLayer(db) + url = hal.get_download_url(run_id, path) + return {"url": url, "run_id": run_id, "path": path} + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + except Exception as e: + logger.error(f"Unexpected error in get_download_url: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + +@router.get("/info/{run_id}") +def get_storage_info( + run_id: int, + db: Session = Depends(get_db) +): + """Runのストレージ情報を取得""" + try: + hal = HybridAccessLayer(db) + info = hal.get_storage_info(run_id) + return info.to_dict() + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + except Exception as e: + logger.error(f"Unexpected error in get_storage_info: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + + +@router.get("/db-content/{run_id}") +def get_db_content( + run_id: int, + path: str = Query(..., description="仮想パス"), + op_id: Optional[int] = Query(None, description="Operation ID"), + db: Session = Depends(get_db) +): + """DBに保存されたコンテンツを直接取得""" + # オペレーションログの取得 + if "operations/" in path and path.endswith("log.txt") and op_id: + operation = db.query(Operation).filter(Operation.id == op_id).first() + if operation and operation.log: + return PlainTextResponse( + content=operation.log, + media_type="text/plain", + headers={"Content-Disposition": f"attachment; filename=log_{op_id}.txt"} + ) + + raise HTTPException(status_code=404, detail="Content not found in database") + + +@router.get("/dump/{run_id}") +def download_sql_dump( + run_id: int, + db: Session = Depends(get_db) +): + """ + Run関連データのSQLiteダンプをダウンロード + + ローカルモードのRunに対して、関連する全データを + 独立したSQLiteファイルとしてエクスポートする。 + + 含まれるデータ: + - runs: 該当Run + - processes: Run内のProcess + - operations: Process内のOperation + - edges: Run内のEdge + - ports: Process内のPort + """ + # Runの存在確認とモードチェック + run = db.query(Run).filter(Run.id == run_id).first() + if not run: + raise HTTPException(status_code=404, detail=f"Run {run_id} not found") + + if run.storage_mode != 'local': + raise HTTPException( + status_code=400, + detail=f"SQL dump is only available for local mode runs. This run uses '{run.storage_mode}' mode." + ) + + try: + # 一時ファイルを作成 + temp_file = tempfile.NamedTemporaryFile( + delete=False, + suffix='.db', + prefix=f'run_{run_id}_' + ) + temp_path = temp_file.name + temp_file.close() + + # 新しいSQLiteデータベースを作成 + conn = sqlite3.connect(temp_path) + cursor = conn.cursor() + + # テーブル作成 + cursor.execute(''' + CREATE TABLE runs ( + id INTEGER PRIMARY KEY, + project_id INTEGER, + file_name TEXT, + checksum TEXT, + user_id INTEGER, + added_at TEXT, + started_at TEXT, + finished_at TEXT, + status TEXT, + storage_address TEXT, + storage_mode TEXT, + deleted_at TEXT, + display_visible INTEGER + ) + ''') + + cursor.execute(''' + CREATE TABLE processes ( + id INTEGER PRIMARY KEY, + name TEXT, + run_id INTEGER, + storage_address TEXT, + process_type TEXT, + FOREIGN KEY (run_id) REFERENCES runs(id) + ) + ''') + + cursor.execute(''' + CREATE TABLE operations ( + id INTEGER PRIMARY KEY, + process_id INTEGER, + name TEXT, + parent_id INTEGER, + started_at TEXT, + finished_at TEXT, + status TEXT, + storage_address TEXT, + is_transport INTEGER, + is_data INTEGER, + log TEXT, + FOREIGN KEY (process_id) REFERENCES processes(id) + ) + ''') + + cursor.execute(''' + CREATE TABLE edges ( + id INTEGER PRIMARY KEY, + run_id INTEGER, + from_id INTEGER, + to_id INTEGER, + FOREIGN KEY (run_id) REFERENCES runs(id) + ) + ''') + + cursor.execute(''' + CREATE TABLE ports ( + id INTEGER PRIMARY KEY, + process_id INTEGER, + port_name TEXT, + port_type TEXT, + data_type TEXT, + position INTEGER, + is_required INTEGER, + default_value TEXT, + description TEXT, + FOREIGN KEY (process_id) REFERENCES processes(id) + ) + ''') + + # Runデータを挿入 + cursor.execute(''' + INSERT INTO runs VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + run.id, run.project_id, run.file_name, run.checksum, run.user_id, + run.added_at.isoformat() if run.added_at else None, + run.started_at.isoformat() if run.started_at else None, + run.finished_at.isoformat() if run.finished_at else None, + run.status, run.storage_address, run.storage_mode, + run.deleted_at.isoformat() if run.deleted_at else None, + 1 if run.display_visible else 0 + )) + + # Processデータを取得・挿入 + processes = db.query(Process).filter(Process.run_id == run_id).all() + process_ids = [p.id for p in processes] + + for p in processes: + cursor.execute(''' + INSERT INTO processes VALUES (?, ?, ?, ?, ?) + ''', (p.id, p.name, p.run_id, p.storage_address, p.process_type)) + + # Operationデータを挿入 + if process_ids: + operations = db.query(Operation).filter( + Operation.process_id.in_(process_ids) + ).all() + + for op in operations: + cursor.execute(''' + INSERT INTO operations VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + op.id, op.process_id, op.name, op.parent_id, + op.started_at.isoformat() if op.started_at else None, + op.finished_at.isoformat() if op.finished_at else None, + op.status, op.storage_address, + 1 if op.is_transport else 0, + 1 if op.is_data else 0, + op.log + )) + + # Portデータを挿入 + ports = db.query(Port).filter( + Port.process_id.in_(process_ids) + ).all() + + for port in ports: + cursor.execute(''' + INSERT INTO ports VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + port.id, port.process_id, port.port_name, port.port_type, + port.data_type, port.position, + 1 if port.is_required else 0, + getattr(port, 'default_value', None), + getattr(port, 'description', None) + )) + + # Edgeデータを挿入 + edges = db.query(Edge).filter(Edge.run_id == run_id).all() + for e in edges: + cursor.execute(''' + INSERT INTO edges VALUES (?, ?, ?, ?) + ''', (e.id, e.run_id, e.from_id, e.to_id)) + + conn.commit() + conn.close() + + # ファイルサイズをログ + file_size = os.path.getsize(temp_path) + logger.info(f"Created SQL dump for run {run_id}: {file_size} bytes") + + # FileResponseで返却(cleanup後に自動削除) + return FileResponse( + path=temp_path, + filename=f"run_{run_id}_dump.db", + media_type="application/x-sqlite3", + background=None # 同期的に処理 + ) + + except Exception as e: + logger.error(f"Error creating SQL dump for run {run_id}: {e}") + # 一時ファイルがあれば削除 + if 'temp_path' in locals() and os.path.exists(temp_path): + os.unlink(temp_path) + raise HTTPException(status_code=500, detail=f"Failed to create SQL dump: {str(e)}") diff --git a/app/api/route/users.py b/app/api/route/users.py index acd6f23..af0ded1 100644 --- a/app/api/route/users.py +++ b/app/api/route/users.py @@ -1,6 +1,7 @@ from define_db.models import User, Run, Project from define_db.database import SessionLocal from api.response_model import UserResponse, RunResponseWithProjectName +from services.hal import batch_infer_storage_modes from fastapi import Form from fastapi import APIRouter from fastapi import HTTPException @@ -62,9 +63,13 @@ def read_runs(id: int, include_hidden: bool = False): query = query.filter(Run.display_visible == True) runs = query.all() + + # バッチ最適化: 未キャッシュのRunのstorage_modeを一括推論・永続化 + # 1回のS3リクエスト + 1回のDBクエリで全Run判定(N回→2回に削減) + batch_infer_storage_modes(session, runs) + for run in runs: run.project_name = run.project.name - # return [run for run, _ in runs] return runs diff --git a/app/define_db/database.py b/app/define_db/database.py index e021748..4974d93 100644 --- a/app/define_db/database.py +++ b/app/define_db/database.py @@ -29,3 +29,23 @@ def set_sqlite_pragma(dbapi_conn, connection_record): class Base(DeclarativeBase): pass + + +def get_db(): + """ + FastAPI Dependency Injection用のデータベースセッションジェネレータ + + 使用例: + @router.post("/endpoint") + async def endpoint(db: Session = Depends(get_db)): + # dbを使用してクエリを実行 + pass + + Yields: + Session: SQLAlchemyセッション + """ + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/app/define_db/models.py b/app/define_db/models.py index ce174c3..334319d 100644 --- a/app/define_db/models.py +++ b/app/define_db/models.py @@ -5,8 +5,8 @@ from sqlalchemy import ForeignKey from sqlalchemy.types import String from sqlalchemy.types import Text -from sqlalchemy import CheckConstraint, UniqueConstraint -from typing import List +from sqlalchemy import CheckConstraint, UniqueConstraint, Index +from typing import List, Optional from define_db.database import Base, engine from datetime import datetime @@ -81,6 +81,13 @@ class Run(Base): ) status: Mapped[str] = mapped_column(String(10)) storage_address: Mapped[str] = mapped_column(String(256)) + # ★追加: ストレージモード + storage_mode: Mapped[str] = mapped_column( + String(10), + nullable=True, + default=None, + comment="ストレージモード: 's3' または 'local'" + ) deleted_at: Mapped[datetime] = mapped_column( DateTime(), nullable=True, @@ -115,6 +122,13 @@ class Process(Base): cascade="all, delete-orphan" ) + # ★追加: ProcessOperationへの逆参照(多対多リレーション用) + process_operations: Mapped[List["ProcessOperation"]] = relationship( + "ProcessOperation", + back_populates="process", + cascade="all, delete-orphan" + ) + class Operation(Base): __tablename__ = "operations" @@ -151,6 +165,13 @@ class Operation(Base): ) log: Mapped[str] = mapped_column(Text, nullable=True) + # ★追加: ProcessOperationへの逆参照(多対多リレーション用) + process_operations: Mapped[List["ProcessOperation"]] = relationship( + "ProcessOperation", + back_populates="operation", + cascade="all, delete-orphan" + ) + class Edge(Base): __tablename__ = "edges" @@ -248,6 +269,60 @@ class Port(Base): ) +class ProcessOperation(Base): + """プロセス-オペレーション間リレーションテーブル + + プロセスとオペレーションの多対多の関係を管理する。 + 2025年11月6日MTG決定事項に基づき作成。 + + 目的: + - プロセスに紐づくオペレーションを効率的にフィルタリング + - ランでフィルタリング → プロセスでフィルタリングの2段階検索を実現 + """ + __tablename__ = "process_operations" + + # 主キー + id: Mapped[int] = mapped_column( + primary_key=True, + autoincrement=True + ) + + # 外部キー: Process + process_id: Mapped[int] = mapped_column( + ForeignKey("processes.id", ondelete="CASCADE"), + nullable=False + ) + process: Mapped["Process"] = relationship( + "Process", + foreign_keys=[process_id], + back_populates="process_operations" + ) + + # 外部キー: Operation + operation_id: Mapped[int] = mapped_column( + ForeignKey("operations.id", ondelete="CASCADE"), + nullable=False + ) + operation: Mapped["Operation"] = relationship( + "Operation", + foreign_keys=[operation_id], + back_populates="process_operations" + ) + + # 作成日時 + created_at: Mapped[datetime] = mapped_column( + DateTime(), + default=datetime.utcnow + ) + + # インデックスとユニーク制約 + __table_args__ = ( + Index('ix_process_operations_process_id', 'process_id'), + Index('ix_process_operations_operation_id', 'operation_id'), + UniqueConstraint('process_id', 'operation_id', name='uq_process_operation'), + ) + + class PortConnection(Base): """ポート接続情報テーブル diff --git a/app/main.py b/app/main.py index 7a4a28f..7d9e6b7 100644 --- a/app/main.py +++ b/app/main.py @@ -1,5 +1,5 @@ from fastapi import FastAPI -from api.route import users, projects, runs, processes, operations, edges, ports +from api.route import users, projects, runs, processes, operations, edges, ports, storage, storage_v2 from fastapi.middleware.cors import CORSMiddleware app = FastAPI() @@ -23,3 +23,6 @@ app.include_router(operations.router, prefix="/api") app.include_router(edges.router, prefix="/api") app.include_router(ports.router, prefix="/api") +app.include_router(storage.router, prefix="/api") +# HAL (Hybrid Access Layer) を使用した新API +app.include_router(storage_v2.router) diff --git a/app/migrate_storage_address.py b/app/migrate_storage_address.py new file mode 100644 index 0000000..a74fb09 --- /dev/null +++ b/app/migrate_storage_address.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +""" +マイグレーションスクリプト: Google Drive URL → S3パス + +既存RunデータのGoogle Drive URLをS3パス形式に変換する。 + +使用方法: + Docker内で実行: + docker exec labcode_log_server python /app/migrate_storage_address.py [--dry-run] + +オプション: + --dry-run 実際に更新せず、対象レコードを表示するだけ + +作成日: 2025-12-21 +作成者: Astra エージェント +""" + +import sys +import argparse +from pathlib import Path + +# プロジェクトルートをパスに追加 +sys.path.insert(0, '/app') + +from define_db.database import SessionLocal +from define_db.models import Run + + +def migrate_storage_address(dry_run: bool = False): + """Google Drive URLをS3パスに移行""" + + print("=" * 60) + print("Storage Address Migration: Google Drive URL → S3 Path") + print("=" * 60) + + with SessionLocal() as session: + # Google Drive URLを持つRunを検索 + runs_with_url = session.query(Run).filter( + Run.storage_address.like('https://drive.google.com%') + ).all() + + print(f"\n対象レコード数: {len(runs_with_url)}") + + if not runs_with_url: + print("✅ 移行対象のレコードはありません。") + return + + print("\n移行対象:") + print("-" * 60) + + for run in runs_with_url: + old_value = run.storage_address + new_value = f"runs/{run.id}/" + + print(f" Run ID: {run.id}") + print(f" 旧: {old_value[:50]}...") + print(f" 新: {new_value}") + print() + + if not dry_run: + run.storage_address = new_value + + if dry_run: + print("-" * 60) + print("🔍 [DRY RUN] 実際の更新は行われませんでした。") + print(" 実行するには --dry-run オプションを外してください。") + else: + session.commit() + print("-" * 60) + print(f"✅ {len(runs_with_url)} 件のレコードを更新しました。") + + print("=" * 60) + + +def main(): + parser = argparse.ArgumentParser( + description="Google Drive URLをS3パスに移行" + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='実際に更新せず、対象レコードを表示するだけ' + ) + + args = parser.parse_args() + migrate_storage_address(dry_run=args.dry_run) + + +if __name__ == "__main__": + main() diff --git a/app/services/hal/__init__.py b/app/services/hal/__init__.py new file mode 100644 index 0000000..00e3a8d --- /dev/null +++ b/app/services/hal/__init__.py @@ -0,0 +1,217 @@ +"""Hybrid Access Layer (HAL) + +S3/ローカルファイルとDBデータを統一的に扱うアクセス抽象化レイヤー。 +Run.storage_modeとデータ種別に基づいて適切なデータソースを選択。 + +使用例: + from services.hal import HybridAccessLayer + + hal = HybridAccessLayer(db_session) + items = hal.list_contents(run_id=22) + content = hal.load_content(run_id=22, path="operations/172/log.txt") + + # Runリストのstorage_mode推論(キャッシュ付き) + from services.hal import infer_storage_mode_for_run + inferred_mode = infer_storage_mode_for_run(db_session, run) + + # バッチ推論(Run List用最適化) + from services.hal import batch_infer_storage_modes + batch_infer_storage_modes(db_session, runs) +""" + +import logging +from typing import List, Set + +from .models import StorageMode, ContentType, DataSource, ContentItem, StorageInfo +from .hybrid_access_layer import HybridAccessLayer + +logger = logging.getLogger(__name__) + + +def infer_storage_mode_for_run(db_session, run, persist: bool = True) -> str: + """ + Runのstorage_modeを推論して文字列で返す(キャッシュ付き) + + storage_mode=nullの場合は、DBログの有無やS3ファイルの有無から推論。 + S3とDBの両方にデータがある場合は'hybrid'を返す。 + 明示的に設定されている場合はそのまま返す(キャッシュヒット)。 + + 推論結果はDBに永続化され、次回以降はS3/DBアクセスなしで取得可能。 + + Args: + db_session: SQLAlchemy Session + run: Runエンティティ + persist: 推論結果をDBに永続化するかどうか(デフォルト: True) + + Returns: + str: 's3', 'local', 'hybrid', or 'unknown' + """ + # キャッシュヒット: storage_modeが既に設定されている場合 + if run.storage_mode is not None: + return run.storage_mode + + # キャッシュミス: 推論を実行 + hal = HybridAccessLayer(db_session) + + # get_storage_infoでハイブリッド判定を含めた完全な情報を取得 + storage_info = hal.get_storage_info(run.id) + + # ハイブリッドの場合は'hybrid' + if storage_info.is_hybrid: + inferred_mode = 'hybrid' + else: + # それ以外は推論されたモードを取得 + mode = hal._infer_storage_mode(run) + inferred_mode = mode.value + + # DBに永続化(次回以降はキャッシュヒット) + if persist: + run.storage_mode = inferred_mode + db_session.commit() + logger.info(f"Persisted inferred storage_mode for Run {run.id}: {inferred_mode}") + + return inferred_mode + + +def batch_infer_storage_modes(db_session, runs: List) -> None: + """ + 複数Runのstorage_modeを一括推論・永続化 + + 最適化: + - S3にあるRun IDを一括取得(1回のS3リクエスト) + - DBにログがあるRun IDを一括取得(1回のDBクエリ) + - 未キャッシュのRunのみ処理 + + Args: + db_session: SQLAlchemy Session + runs: Runエンティティのリスト + """ + # 未キャッシュのRunのみ抽出 + uncached_runs = [r for r in runs if r.storage_mode is None] + + if not uncached_runs: + logger.debug("All runs have cached storage_mode, skipping batch inference") + return + + logger.info(f"Batch inferring storage_mode for {len(uncached_runs)} runs") + + run_ids = [r.id for r in uncached_runs] + + # 一括でS3データの有無を確認 + s3_run_ids = _batch_check_s3_presence(run_ids) + + # 一括でDBログの有無を確認 + db_run_ids = _batch_check_db_logs(db_session, run_ids) + + # 各Runにモードを設定 + updated_count = 0 + for run in uncached_runs: + has_s3 = run.id in s3_run_ids + has_db = run.id in db_run_ids + + if has_s3 and has_db: + run.storage_mode = 'hybrid' + elif has_s3: + run.storage_mode = 's3' + elif has_db: + run.storage_mode = 'local' + else: + run.storage_mode = 'unknown' + + updated_count += 1 + + db_session.commit() + logger.info(f"Batch persisted storage_mode for {updated_count} runs") + + +def _batch_check_s3_presence(run_ids: List[int]) -> Set[int]: + """ + S3にデータが存在するRun IDのセットを取得 + + S3のlist_objects_v2を使用して、runs/プレフィックス配下のフォルダを一括取得。 + + Args: + run_ids: チェック対象のRun IDリスト + + Returns: + S3にデータが存在するRun IDのセット + """ + try: + from services.storage_service import BackendRegistry, StorageConfig + + config = StorageConfig.from_env() + if not BackendRegistry.is_registered('s3'): + return set() + + backend_class = BackendRegistry.get('s3') + s3_backend = backend_class(config.s3) + + # runs/プレフィックス配下のディレクトリを一括取得 + result = s3_backend.list_objects_with_dirs("runs/") + common_prefixes = result.get('common_prefixes', []) + + # プレフィックスからRun IDを抽出 + s3_run_ids = set() + for prefix_info in common_prefixes: + prefix = prefix_info.get('Prefix', '') + # "runs/21/" -> 21 + parts = prefix.strip('/').split('/') + if len(parts) >= 2: + try: + run_id = int(parts[1]) + if run_id in run_ids: + s3_run_ids.add(run_id) + except ValueError: + pass + + logger.debug(f"S3 batch check: found {len(s3_run_ids)} runs with data") + return s3_run_ids + + except Exception as e: + logger.warning(f"S3 batch check failed: {e}") + return set() + + +def _batch_check_db_logs(db_session, run_ids: List[int]) -> Set[int]: + """ + DBにログが存在するRun IDのセットを取得 + + 1回のDBクエリで全Run IDのログ有無を確認。 + + Args: + db_session: SQLAlchemy Session + run_ids: チェック対象のRun IDリスト + + Returns: + DBにログが存在するRun IDのセット + """ + try: + from define_db.models import Operation, Process + from sqlalchemy import func + + # Run ID毎にOperation.logが存在するかを一括クエリ + result = db_session.query(Process.run_id).join(Operation).filter( + Process.run_id.in_(run_ids), + Operation.log.isnot(None), + Operation.log != '' + ).distinct().all() + + db_run_ids = {row[0] for row in result} + logger.debug(f"DB batch check: found {len(db_run_ids)} runs with logs") + return db_run_ids + + except Exception as e: + logger.warning(f"DB batch check failed: {e}") + return set() + + +__all__ = [ + 'StorageMode', + 'ContentType', + 'DataSource', + 'ContentItem', + 'StorageInfo', + 'HybridAccessLayer', + 'infer_storage_mode_for_run', + 'batch_infer_storage_modes', +] diff --git a/app/services/hal/db_backend.py b/app/services/hal/db_backend.py new file mode 100644 index 0000000..de23bf5 --- /dev/null +++ b/app/services/hal/db_backend.py @@ -0,0 +1,203 @@ +"""DBデータバックエンド + +SQLiteデータベースからデータを取得するバックエンド。 +ローカルモードでは、オペレーションログ等がDBに保存されている。 +""" + +import re +import logging +from typing import List, Optional, Dict, Any +from datetime import datetime +from sqlalchemy.orm import Session + +from .models import ContentItem, ContentType, DataSource + +logger = logging.getLogger(__name__) + + +class DBDataBackend: + """データベースからデータを取得するバックエンド""" + + def __init__(self, db_session: Session): + self._db = db_session + + def list_operation_logs(self, run_id: int, prefix: str = "") -> List[ContentItem]: + """ + Run内のオペレーションログを仮想ファイルとして一覧取得 + + 階層構造を尊重し、現在の階層に属するファイルのみを返す: + - prefix="": 何も返さない(ディレクトリのみ) + - prefix="operations/": 何も返さない(サブディレクトリのみ) + - prefix="operations/172/": log.txtを返す + + Args: + run_id: Run ID + prefix: フィルタリング用プレフィックス + + Returns: + ContentItemのリスト + """ + from define_db.models import Operation, Process + + items = [] + + # ルート階層または operations/ 階層の場合は、ファイルは返さない + # (ディレクトリは list_virtual_directories で返す) + if prefix == "" or prefix == "operations/" or prefix == "operations": + return items + + # operations/{op_id}/ 階層の場合のみ、log.txt を返す + op_id_match = re.match(r'^operations/(\d+)/?$', prefix) + if not op_id_match: + return items + + target_op_id = int(op_id_match.group(1)) + + # 該当オペレーションを取得 + operation = self._db.query(Operation).filter( + Operation.id == target_op_id + ).first() + + if operation and operation.log: + # プロセスがこのRunに属しているか確認 + process = self._db.query(Process).filter( + Process.id == operation.process_id, + Process.run_id == run_id + ).first() + + if process: + log_path = f"operations/{operation.id}/log.txt" + items.append(ContentItem( + name="log.txt", + path=log_path, + type="file", + size=len(operation.log.encode('utf-8')), + last_modified=operation.finished_at.isoformat() if operation.finished_at else None, + content_type=ContentType.OPERATION_LOG, + source=DataSource.DATABASE + )) + + return items + + def list_virtual_directories(self, run_id: int, prefix: str = "") -> List[ContentItem]: + """ + 仮想ディレクトリ一覧を生成 + + Args: + run_id: Run ID + prefix: フィルタリング用プレフィックス + + Returns: + 仮想ディレクトリのContentItemリスト + """ + from define_db.models import Operation, Process + + items = [] + + # 該当Runのオペレーションを取得 + operations = self._db.query(Operation).join(Process).filter( + Process.run_id == run_id + ).all() + + op_ids_with_log = [op.id for op in operations if op.log] + + # ルートレベルの場合: operations/ディレクトリを追加 + if prefix == "" and op_ids_with_log: + items.append(ContentItem( + name="operations", + path="operations/", + type="directory", + size=0, + last_modified=None, + content_type=ContentType.OTHER, + source=DataSource.VIRTUAL + )) + + # operations/レベルの場合: 各オペレーションのサブディレクトリを追加 + if prefix == "operations/" or prefix == "operations": + for op_id in op_ids_with_log: + items.append(ContentItem( + name=str(op_id), + path=f"operations/{op_id}/", + type="directory", + size=0, + last_modified=None, + content_type=ContentType.OTHER, + source=DataSource.VIRTUAL + )) + + return items + + def load_operation_log(self, operation_id: int) -> Optional[bytes]: + """ + オペレーションログを取得 + + Args: + operation_id: Operation ID + + Returns: + ログ内容のバイト列(なければNone) + """ + from define_db.models import Operation + + operation = self._db.query(Operation).filter( + Operation.id == operation_id + ).first() + + if operation and operation.log: + return operation.log.encode('utf-8') + return None + + def get_operation_log_info(self, operation_id: int) -> Optional[Dict[str, Any]]: + """ + オペレーションログの情報を取得 + + Args: + operation_id: Operation ID + + Returns: + ログ情報の辞書 + """ + from define_db.models import Operation + + operation = self._db.query(Operation).filter( + Operation.id == operation_id + ).first() + + if not operation or not operation.log: + return None + + return { + "size": len(operation.log.encode('utf-8')), + "last_modified": operation.finished_at.isoformat() if operation.finished_at else None, + "operation_name": operation.name + } + + @staticmethod + def extract_operation_id(path: str) -> Optional[int]: + """ + パスからオペレーションIDを抽出 + + Args: + path: 仮想パス (例: "operations/172/log.txt") + + Returns: + Operation ID(抽出できなければNone) + """ + match = re.search(r'operations/(\d+)/', path) + if match: + return int(match.group(1)) + return None + + @staticmethod + def is_operation_log_path(path: str) -> bool: + """ + パスがオペレーションログを指しているか判定 + + Args: + path: 仮想パス + + Returns: + オペレーションログの場合True + """ + return "operations/" in path and path.endswith("log.txt") diff --git a/app/services/hal/hybrid_access_layer.py b/app/services/hal/hybrid_access_layer.py new file mode 100644 index 0000000..72cf8e5 --- /dev/null +++ b/app/services/hal/hybrid_access_layer.py @@ -0,0 +1,632 @@ +"""Hybrid Access Layer (HAL) + +S3/ローカルファイルとDBデータを統一的に扱うアクセス抽象化レイヤー。 +Run.storage_modeとデータ種別に基づいて適切なデータソースを選択。 + +レジストリパターンを使用して、バックエンドを動的に取得。 +""" + +import os +import logging +from typing import List, Optional, Dict, Any +from datetime import datetime +from sqlalchemy.orm import Session + +from .models import StorageMode, ContentType, DataSource, ContentItem, StorageInfo +from .db_backend import DBDataBackend + +logger = logging.getLogger(__name__) + + +class HybridAccessLayer: + """ + ハイブリッドアクセスレイヤー + + S3/ローカルファイルとDBデータを統一的に扱う。 + Run.storage_modeとデータ種別に基づいて適切なデータソースを選択。 + storage_mode=nullの場合は、データの存在を確認してモードを推論する。 + """ + + def __init__(self, db_session: Session): + self._db = db_session + self._db_backend = DBDataBackend(db_session) + + # ファイルバックエンドは遅延初期化(辞書で動的管理) + self._file_backends: Dict[str, Any] = {} + # 推論結果のキャッシュ(run_id -> StorageMode) + self._inferred_mode_cache: Dict[int, StorageMode] = {} + + def _get_backend(self, mode_str: str): + """ + モード名に対応するバックエンドを取得(遅延初期化) + + レジストリパターンを使用して動的にバックエンドを取得。 + """ + if mode_str not in self._file_backends: + from services.storage_service import BackendRegistry, StorageConfig + config = StorageConfig.from_env() + + if BackendRegistry.is_registered(mode_str): + backend_class = BackendRegistry.get(mode_str) + # モードに応じた設定を取得 + if mode_str == 's3': + self._file_backends[mode_str] = backend_class(config.s3) + elif mode_str == 'local': + self._file_backends[mode_str] = backend_class(config.local) + else: + # 新しいバックエンド用(設定はNoneで初期化) + self._file_backends[mode_str] = backend_class(None) + else: + # 未登録の場合はS3をフォールバック + backend_class = BackendRegistry.get('s3') + self._file_backends[mode_str] = backend_class(config.s3) + + return self._file_backends[mode_str] + + def _get_s3_backend(self): + """S3バックエンドを取得(後方互換性)""" + return self._get_backend('s3') + + def _get_local_backend(self): + """ローカルバックエンドを取得(後方互換性)""" + return self._get_backend('local') + + def _get_run(self, run_id: int): + """Runエンティティを取得""" + from define_db.models import Run + run = self._db.query(Run).filter( + Run.id == run_id, + Run.deleted_at.is_(None) + ).first() + if not run: + raise ValueError(f"Run {run_id} not found") + return run + + def _get_storage_mode(self, run) -> StorageMode: + """Runのストレージモードを取得""" + return StorageMode.from_string(run.storage_mode) + + def _infer_storage_mode(self, run) -> StorageMode: + """ + storage_mode=nullのRunに対してストレージモードを推論 + + 推論アルゴリズム: + 1. storage_modeが明示的に設定されている場合はそのまま返す + 2. storage_mode=nullの場合: + a. DBにオペレーションログ(Operation.log)が存在すれば LOCAL + b. S3にファイルが存在すれば S3 + c. どちらにもなければ UNKNOWN + + Args: + run: Runエンティティ + + Returns: + 推論されたStorageMode + """ + # 明示的にモードが設定されている場合 + if run.storage_mode: + return StorageMode.from_string(run.storage_mode) + + # キャッシュをチェック + if run.id in self._inferred_mode_cache: + return self._inferred_mode_cache[run.id] + + # 推論開始 + inferred = self._do_infer_storage_mode(run) + self._inferred_mode_cache[run.id] = inferred + + if inferred != StorageMode.UNKNOWN: + logger.info(f"Inferred storage_mode for Run {run.id}: {inferred.value}") + + return inferred + + def _do_infer_storage_mode(self, run) -> StorageMode: + """実際の推論処理 + + 推論優先順位(重要): + 1. S3にファイルがあれば S3(レガシーデータはS3保存が主) + 2. S3にファイルがなく、DBにログがあれば LOCAL(ローカルモード専用) + 3. どちらにもなければ UNKNOWN + + この優先順位は、レガシーデータ(storage_mode=null)が + S3に保存されている可能性が高いことに基づいています。 + """ + from define_db.models import Operation, Process + + # Step 1: S3にファイルがあるか確認(優先) + try: + s3_backend = self._get_s3_backend() + storage_address = run.storage_address or f"runs/{run.id}/" + result = s3_backend.list_objects_with_dirs(storage_address) + has_s3_files = bool(result.get('contents', [])) + if has_s3_files: + return StorageMode.S3 + except Exception as e: + logger.debug(f"S3 check failed for Run {run.id}: {e}") + + # Step 2: DBにオペレーションログがあるか確認 + has_db_logs = self._db.query(Operation).join(Process).filter( + Process.run_id == run.id, + Operation.log.isnot(None), + Operation.log != '' + ).first() is not None + + if has_db_logs: + return StorageMode.LOCAL + + # Step 3: どちらにもデータがない場合 + return StorageMode.UNKNOWN + + def _get_file_backend(self, mode: StorageMode): + """モードに対応するファイルバックエンドを取得(レジストリパターン)""" + return self._get_backend(mode.value) + + def list_contents(self, run_id: int, prefix: str = "") -> List[ContentItem]: + """ + Run内のコンテンツ一覧を取得 + + ファイルとDBデータを統合して仮想ファイルシステムとして返す。 + + Args: + run_id: Run ID + prefix: フィルタリング用プレフィックス + + Returns: + ContentItemのリスト + """ + run = self._get_run(run_id) + mode = self._get_storage_mode(run) + items = [] + + if mode == StorageMode.S3: + # S3モード: ファイルベースのみ + items.extend(self._list_file_contents(run, prefix)) + + elif mode == StorageMode.LOCAL: + # ローカルモード: DBからデータを仮想ファイルとして構築 + + # 1. 仮想ディレクトリを追加 + items.extend(self._db_backend.list_virtual_directories(run_id, prefix)) + + # 2. オペレーションログを仮想ファイルとして追加 + items.extend(self._db_backend.list_operation_logs(run_id, prefix)) + + # 3. ローカルファイルシステムにもファイルがあれば追加 + items.extend(self._list_file_contents(run, prefix)) + + elif mode == StorageMode.UNKNOWN: + # UNKNOWNモード: 両方のストレージを試行(フォールバック) + logger.info(f"Run {run_id} has unknown storage_mode, trying both S3 and local") + + # S3から試行 + s3_items = self._try_list_from_s3(run, prefix) + items.extend(s3_items) + + # ローカル(DB + ファイル)から試行 + local_items = self._try_list_from_local(run_id, run, prefix) + items.extend(local_items) + + # 重複除去(pathをキーとして) + seen_paths = set() + unique_items = [] + for item in items: + if item.path not in seen_paths: + seen_paths.add(item.path) + unique_items.append(item) + + return unique_items + + def _try_list_from_s3(self, run, prefix: str = "") -> List[ContentItem]: + """S3からコンテンツ一覧取得を試行(エラーは握りつぶす)""" + try: + file_backend = self._get_s3_backend() + full_prefix = f"{run.storage_address}{prefix}" + result = file_backend.list_objects_with_dirs(full_prefix) + + items = [] + # ファイルを変換 + for file_info in result.get('contents', []): + key = file_info.get('Key', '') + relative_path = key[len(run.storage_address):] if key.startswith(run.storage_address) else key + if not relative_path or relative_path.endswith('/'): + continue + name = relative_path.split('/')[-1] + items.append(ContentItem( + name=name, + path=relative_path, + type="file", + size=file_info.get('Size', 0), + last_modified=file_info.get('LastModified').isoformat() if file_info.get('LastModified') else None, + content_type=self._detect_content_type(relative_path), + source=DataSource.FILE, + backend="s3" + )) + + # ディレクトリを変換 + for dir_info in result.get('common_prefixes', []): + dir_path = dir_info.get('Prefix', '') + relative_path = dir_path[len(run.storage_address):] if dir_path.startswith(run.storage_address) else dir_path + if not relative_path: + continue + name = relative_path.rstrip('/').split('/')[-1] + items.append(ContentItem( + name=name, + path=relative_path, + type="directory", + size=0, + last_modified=None, + content_type=ContentType.OTHER, + source=DataSource.FILE, + backend="s3" + )) + + return items + except Exception as e: + logger.debug(f"S3 fallback failed for run {run.id}: {e}") + return [] + + def _try_list_from_local(self, run_id: int, run, prefix: str = "") -> List[ContentItem]: + """ローカル(DB + ファイル)からコンテンツ一覧取得を試行(エラーは握りつぶす)""" + items = [] + try: + # DBから仮想ディレクトリとログを取得(backendを設定) + db_dirs = self._db_backend.list_virtual_directories(run_id, prefix) + for item in db_dirs: + item.backend = "local" + items.extend(db_dirs) + + db_logs = self._db_backend.list_operation_logs(run_id, prefix) + for item in db_logs: + item.backend = "local" + items.extend(db_logs) + except Exception as e: + logger.debug(f"DB fallback failed for run {run_id}: {e}") + + try: + # ローカルファイルシステムからも試行 + file_backend = self._get_local_backend() + full_prefix = f"{run.storage_address}{prefix}" + result = file_backend.list_objects_with_dirs(full_prefix) + + for file_info in result.get('contents', []): + key = file_info.get('Key', '') + relative_path = key[len(run.storage_address):] if key.startswith(run.storage_address) else key + if not relative_path or relative_path.endswith('/'): + continue + name = relative_path.split('/')[-1] + items.append(ContentItem( + name=name, + path=relative_path, + type="file", + size=file_info.get('Size', 0), + last_modified=file_info.get('LastModified').isoformat() if file_info.get('LastModified') else None, + content_type=self._detect_content_type(relative_path), + source=DataSource.FILE, + backend="local" + )) + + for dir_info in result.get('common_prefixes', []): + dir_path = dir_info.get('Prefix', '') + relative_path = dir_path[len(run.storage_address):] if dir_path.startswith(run.storage_address) else dir_path + if not relative_path: + continue + name = relative_path.rstrip('/').split('/')[-1] + items.append(ContentItem( + name=name, + path=relative_path, + type="directory", + size=0, + last_modified=None, + content_type=ContentType.OTHER, + source=DataSource.FILE, + backend="local" + )) + except Exception as e: + logger.debug(f"Local file fallback failed for run {run_id}: {e}") + + return items + + def _list_file_contents(self, run, prefix: str = "") -> List[ContentItem]: + """ファイルバックエンドからコンテンツ一覧を取得""" + mode = self._get_storage_mode(run) + file_backend = self._get_file_backend(mode) + backend_name = mode.value # "s3" or "local" + + # 完全プレフィックスを構築 + full_prefix = f"{run.storage_address}{prefix}" + + try: + result = file_backend.list_objects_with_dirs(full_prefix) + except Exception as e: + logger.warning(f"Failed to list files for {full_prefix}: {e}") + return [] + + items = [] + + # ファイルを変換 + for file_info in result.get('contents', []): + key = file_info.get('Key', '') + # storage_addressを除去して相対パスに + relative_path = key[len(run.storage_address):] if key.startswith(run.storage_address) else key + + if not relative_path or relative_path.endswith('/'): + continue + + name = relative_path.split('/')[-1] + items.append(ContentItem( + name=name, + path=relative_path, + type="file", + size=file_info.get('Size', 0), + last_modified=file_info.get('LastModified').isoformat() if file_info.get('LastModified') else None, + content_type=self._detect_content_type(relative_path), + source=DataSource.FILE, + backend=backend_name + )) + + # ディレクトリを変換 + for dir_info in result.get('common_prefixes', []): + dir_path = dir_info.get('Prefix', '') + # storage_addressを除去して相対パスに + relative_path = dir_path[len(run.storage_address):] if dir_path.startswith(run.storage_address) else dir_path + + if not relative_path: + continue + + name = relative_path.rstrip('/').split('/')[-1] + items.append(ContentItem( + name=name, + path=relative_path, + type="directory", + size=0, + last_modified=None, + content_type=ContentType.OTHER, + source=DataSource.FILE, + backend=backend_name + )) + + return items + + def load_content(self, run_id: int, path: str) -> Optional[bytes]: + """ + コンテンツを読み込む + + パスからデータソースを判定し、適切な方法で読み込む。 + + Args: + run_id: Run ID + path: 仮想パス + + Returns: + ファイル内容のバイト列(なければNone) + """ + run = self._get_run(run_id) + mode = self._get_storage_mode(run) + + # UNKNOWNモード: 両方のストレージを試行 + if mode == StorageMode.UNKNOWN: + logger.info(f"Run {run_id} has unknown storage_mode, trying fallback for load_content") + + # まずDBからオペレーションログを試行 + if self._db_backend.is_operation_log_path(path): + op_id = self._db_backend.extract_operation_id(path) + if op_id: + content = self._db_backend.load_operation_log(op_id) + if content: + return content + + # S3から試行 + content = self._try_load_from_s3(run, path) + if content: + return content + + # ローカルファイルから試行 + content = self._try_load_from_local(run, path) + if content: + return content + + return None + + # オペレーションログの場合 + if mode == StorageMode.LOCAL and self._db_backend.is_operation_log_path(path): + op_id = self._db_backend.extract_operation_id(path) + if op_id: + content = self._db_backend.load_operation_log(op_id) + if content: + return content + + # ファイルバックエンドから取得 + file_backend = self._get_file_backend(mode) + full_path = f"{run.storage_address}{path}" + + try: + return file_backend.load(full_path) + except Exception as e: + logger.warning(f"Failed to load content from {full_path}: {e}") + return None + + def _try_load_from_s3(self, run, path: str) -> Optional[bytes]: + """S3からコンテンツ読み込みを試行(エラーは握りつぶす)""" + try: + file_backend = self._get_s3_backend() + full_path = f"{run.storage_address}{path}" + return file_backend.load(full_path) + except Exception as e: + logger.debug(f"S3 load fallback failed for run {run.id}, path {path}: {e}") + return None + + def _try_load_from_local(self, run, path: str) -> Optional[bytes]: + """ローカルファイルからコンテンツ読み込みを試行(エラーは握りつぶす)""" + try: + file_backend = self._get_local_backend() + full_path = f"{run.storage_address}{path}" + return file_backend.load(full_path) + except Exception as e: + logger.debug(f"Local load fallback failed for run {run.id}, path {path}: {e}") + return None + + def get_download_url(self, run_id: int, path: str) -> str: + """ + ダウンロードURLを取得 + + S3: presigned URL + ローカル(ファイル): /api/storage/download-direct + ローカル(DB): /api/v2/storage/db-content + + Args: + run_id: Run ID + path: 仮想パス + + Returns: + ダウンロードURL + """ + run = self._get_run(run_id) + mode = self._get_storage_mode(run) + + # UNKNOWNモード: オペレーションログならDB経由、それ以外は直接ダウンロード + if mode == StorageMode.UNKNOWN: + if self._db_backend.is_operation_log_path(path): + op_id = self._db_backend.extract_operation_id(path) + return f"/api/v2/storage/db-content/{run_id}?path={path}&op_id={op_id}" + # S3のpresigned URLを試行 + try: + s3_backend = self._get_s3_backend() + full_path = f"{run.storage_address}{path}" + url = s3_backend.generate_presigned_url(full_path) + if url: + return url + except Exception as e: + logger.debug(f"S3 presigned URL fallback failed for run {run_id}: {e}") + # フォールバック: 直接ダウンロードAPI + return f"/api/storage/download-direct?path={run.storage_address}{path}" + + # ローカルモード + オペレーションログ → DB経由 + if mode == StorageMode.LOCAL and self._db_backend.is_operation_log_path(path): + op_id = self._db_backend.extract_operation_id(path) + return f"/api/v2/storage/db-content/{run_id}?path={path}&op_id={op_id}" + + # S3モード → presigned URL + if mode == StorageMode.S3: + file_backend = self._get_file_backend(mode) + full_path = f"{run.storage_address}{path}" + try: + url = file_backend.generate_presigned_url(full_path) + if url: + return url + except Exception as e: + logger.warning(f"Failed to generate presigned URL: {e}") + + # フォールバック: 直接ダウンロードAPI + return f"/api/storage/download-direct?path={run.storage_address}{path}" + + def get_storage_info(self, run_id: int) -> StorageInfo: + """ + Run固有のストレージ情報を取得 + + storage_mode=nullの場合は推論を行い、適切なモードを返す。 + ハイブリッドモード(S3+DB両方にデータあり)も検出する。 + + Args: + run_id: Run ID + + Returns: + StorageInfo + """ + run = self._get_run(run_id) + raw_mode = self._get_storage_mode(run) + + # storage_mode=nullの場合は推論を実行 + if raw_mode == StorageMode.UNKNOWN: + inferred_mode = self._infer_storage_mode(run) + mode = inferred_mode + is_inferred = True + else: + mode = raw_mode + is_inferred = False + + # ハイブリッドモードの検出 + has_s3_data = False + has_local_data = False + s3_path = None + local_path = None + + # S3にデータがあるか確認 + try: + s3_backend = self._get_s3_backend() + bucket_name = getattr(s3_backend, 'bucket_name', 'labcode-dev-artifacts') + storage_address = run.storage_address or f"runs/{run_id}/" + result = s3_backend.list_objects_with_dirs(storage_address) + has_s3_data = bool(result.get('contents', [])) + if has_s3_data: + s3_path = f"s3://{bucket_name}/{storage_address}" + except Exception as e: + logger.debug(f"S3 check for hybrid failed for Run {run_id}: {e}") + + # ローカル(DB)にデータがあるか確認 + from define_db.models import Operation, Process + has_local_data = self._db.query(Operation).join(Process).filter( + Process.run_id == run_id, + Operation.log.isnot(None), + Operation.log != '' + ).first() is not None + if has_local_data: + local_path = f"db://sqlite/runs/{run_id}/" + + is_hybrid = has_s3_data and has_local_data + + if mode == StorageMode.UNKNOWN: + # 推論してもUNKNOWNの場合: 警告付きで返却 + return StorageInfo( + mode=mode, + storage_address=run.storage_address or f"runs/{run_id}/", + full_path="unknown://", + data_sources={ + "logs": "unknown", + "yaml": "unknown", + "data": "unknown" + }, + warning="Storage mode is not set and could not be inferred. Data may not be displayed correctly.", + is_hybrid=is_hybrid, + s3_path=s3_path, + local_path=local_path + ) + elif mode == StorageMode.S3: + full_path = s3_path or f"s3://labcode-dev-artifacts/{run.storage_address}" + data_sources = { + "logs": "s3" if not has_local_data else "hybrid", + "yaml": "s3", + "data": "s3" + } + else: + full_path = local_path or f"db://sqlite/runs/{run_id}/" + data_sources = { + "logs": "database" if not has_s3_data else "hybrid", + "yaml": "database_or_none", + "data": "database_or_none" + } + + return StorageInfo( + mode=mode, + storage_address=run.storage_address or f"runs/{run_id}/", + full_path=full_path, + data_sources=data_sources, + inferred=is_inferred, + is_hybrid=is_hybrid, + s3_path=s3_path, + local_path=local_path + ) + + def _detect_content_type(self, path: str) -> ContentType: + """パスからコンテンツ種別を判定""" + if self._db_backend.is_operation_log_path(path): + return ContentType.OPERATION_LOG + elif path.endswith("protocol.yaml") or path.endswith("protocol.yml"): + return ContentType.PROTOCOL_YAML + elif path.endswith("manipulate.yaml") or path.endswith("manipulate.yml"): + return ContentType.MANIPULATE_YAML + elif path.endswith(".yaml") or path.endswith(".yml"): + return ContentType.OTHER + elif "processes/" in path: + return ContentType.PROCESS_DATA + else: + return ContentType.OTHER diff --git a/app/services/hal/models.py b/app/services/hal/models.py new file mode 100644 index 0000000..715daee --- /dev/null +++ b/app/services/hal/models.py @@ -0,0 +1,105 @@ +"""HALデータモデル定義 + +Hybrid Access Layerで使用するEnum、データクラスを定義。 +""" + +from enum import Enum +from dataclasses import dataclass, field +from typing import Optional, Dict, Any +from datetime import datetime + + +class StorageMode(Enum): + """ストレージモード""" + S3 = "s3" + LOCAL = "local" + UNKNOWN = "unknown" # storage_mode=nullの場合 + + @classmethod + def from_string(cls, value: Optional[str]) -> 'StorageMode': + """文字列からStorageModeを取得(nullはUNKNOWN)""" + if value is None: + return cls.UNKNOWN + try: + return cls(value.lower()) + except ValueError: + return cls.UNKNOWN + + +class ContentType(Enum): + """コンテンツ種別""" + OPERATION_LOG = "operation_log" # オペレーションログ + PROTOCOL_YAML = "protocol_yaml" # プロトコルYAML + MANIPULATE_YAML = "manipulate_yaml" # 操作定義YAML + PROCESS_DATA = "process_data" # プロセスデータ + MEASUREMENT = "measurement" # 測定結果 + OTHER = "other" # その他 + + +class DataSource(Enum): + """データソース種別""" + FILE = "file" # ファイルシステム (S3 or LocalFS) + DATABASE = "db" # データベース (SQLite) + VIRTUAL = "virtual" # 仮想ディレクトリ + + +@dataclass +class ContentItem: + """仮想ファイルシステムアイテム""" + name: str # ファイル/ディレクトリ名 + path: str # 仮想パス + type: str # "file" or "directory" + size: int # バイトサイズ + last_modified: Optional[str] # 最終更新日時 (ISO 8601形式) + content_type: ContentType # コンテンツ種別 + source: DataSource # データソース + backend: Optional[str] = None # バックエンド種別 ("s3", "local", None) + + def to_dict(self) -> Dict[str, Any]: + """辞書形式に変換(APIレスポンス用)""" + result = { + "name": self.name, + "path": self.path, + "type": self.type, + "size": self.size, + "lastModified": self.last_modified, + "contentType": self.content_type.value, + "source": self.source.value + } + if self.backend: + result["backend"] = self.backend + return result + + +@dataclass +class StorageInfo: + """ストレージ情報""" + mode: StorageMode # ストレージモード + storage_address: str # 相対パス (runs/XX/) + full_path: str # フルパス (s3://... or db://...) + data_sources: Dict[str, str] = field(default_factory=dict) # 各データ種別のソース + warning: Optional[str] = None # 警告メッセージ(UNKNOWNモード時など) + inferred: bool = False # モードが推論されたかどうか + is_hybrid: bool = False # ハイブリッドモードかどうか(S3+DB両方にデータあり) + s3_path: Optional[str] = None # S3パス(ハイブリッド時) + local_path: Optional[str] = None # ローカルパス(ハイブリッド時) + + def to_dict(self) -> Dict[str, Any]: + """辞書形式に変換(APIレスポンス用)""" + result = { + "mode": self.mode.value, + "storage_address": self.storage_address, + "full_path": self.full_path, + "data_sources": self.data_sources + } + if self.warning: + result["warning"] = self.warning + if self.inferred: + result["inferred"] = True + if self.is_hybrid: + result["isHybrid"] = True + if self.s3_path: + result["s3Path"] = self.s3_path + if self.local_path: + result["localPath"] = self.local_path + return result diff --git a/app/services/s3_service.py b/app/services/s3_service.py new file mode 100644 index 0000000..d8fae54 --- /dev/null +++ b/app/services/s3_service.py @@ -0,0 +1,229 @@ +"""S3操作サービスクラス + +S3バケットへのアクセスを抽象化し、以下の機能を提供する: +- オブジェクト一覧取得 +- オブジェクト内容取得 +- 事前署名URL生成 +- 再帰的オブジェクト一覧取得(バッチダウンロード用) +- バッチオブジェクト取得(バッチダウンロード用) + +注意: このクラスはStorageServiceのラッパーとして動作し、 + STORAGE_MODE環境変数に応じてS3またはローカルFSを使用する。 +""" + +import os +from typing import Optional, List, Generator, Tuple +from datetime import datetime, timedelta +from botocore.exceptions import ClientError +import logging + +from services.storage_service import get_storage, StorageService + +logger = logging.getLogger(__name__) + + +class S3Service: + """ + S3操作を行うサービスクラス + + StorageServiceをバックエンドとして使用し、 + 環境変数STORAGE_MODEに応じてS3またはローカルFSを透過的に切り替える。 + """ + + def __init__(self): + """StorageServiceを初期化""" + self._storage = get_storage() + self.bucket_name = os.getenv('S3_BUCKET_NAME', 'labcode-dev-artifacts') + logger.info(f"S3Service initialized: mode={self._storage.mode}") + + def list_objects( + self, + prefix: str, + delimiter: str = '/' + ) -> dict: + """ + オブジェクト一覧を取得する + + Args: + prefix: S3プレフィックス + delimiter: 階層区切り文字(デフォルト: '/') + + Returns: + dict: {'contents': [...], 'common_prefixes': [...]} + + Raises: + ClientError: S3アクセスエラー(S3モードのみ) + """ + return self._storage.list_objects_with_dirs(prefix, delimiter) + + def get_object(self, key: str) -> dict: + """ + オブジェクトを取得する + + Args: + key: S3キー + + Returns: + dict: {'body': bytes, 'content_length': int, 'last_modified': datetime} + + Raises: + ClientError: S3アクセスエラー(NoSuchKey含む) + """ + content = self._storage.load(key) + if content is None: + # ClientErrorを模倣してNoSuchKeyエラーを発生 + from botocore.exceptions import ClientError + raise ClientError( + {'Error': {'Code': 'NoSuchKey', 'Message': 'Not Found'}}, + 'GetObject' + ) + + metadata = self._storage.get_metadata(key) + return { + 'body': content, + 'content_length': metadata['content_length'] if metadata else len(content), + 'last_modified': metadata['last_modified'] if metadata else datetime.now() + } + + def head_object(self, key: str) -> dict: + """ + オブジェクトのメタデータを取得する(存在確認用) + + Args: + key: S3キー + + Returns: + dict: {'content_length': int, 'last_modified': datetime} + + Raises: + ClientError: S3アクセスエラー(NoSuchKey含む) + """ + metadata = self._storage.get_metadata(key) + if metadata is None: + from botocore.exceptions import ClientError + raise ClientError( + {'Error': {'Code': 'NoSuchKey', 'Message': 'Not Found'}}, + 'HeadObject' + ) + + return { + 'content_length': metadata['content_length'], + 'last_modified': metadata['last_modified'] + } + + def generate_presigned_url( + self, + key: str, + expires_in: int = 3600 + ) -> str: + """ + 事前署名URLを生成する + + Args: + key: S3キー + expires_in: 有効期限(秒)、デフォルト3600秒(1時間) + + Returns: + str: 事前署名URL(ローカルモードではNone) + """ + url = self._storage.generate_presigned_url(key, expires_in) + if url is None and self._storage.mode == 'local': + # ローカルモードでは直接ダウンロードAPIを使用する必要がある + logger.warning(f"Presigned URL not available in local mode for: {key}") + # APIエンドポイントを返す(フロントエンドで対応が必要) + return f"/api/storage/download-direct?file_path={key}" + return url + + def list_objects_recursive(self, prefix: str) -> List[dict]: + """ + 指定プレフィックス配下の全オブジェクトを再帰的に取得する + + Args: + prefix: S3プレフィックス(例: runs/1/) + + Returns: + List[dict]: オブジェクト情報リスト + 各要素: {'Key': str, 'Size': int, 'LastModified': datetime} + + Raises: + ClientError: S3アクセスエラー + """ + return self._storage.list_objects(prefix) + + def get_object_stream(self, key: str) -> Generator[bytes, None, None]: + """ + オブジェクトをストリーミングで取得する + + Args: + key: S3キー + + Yields: + bytes: ファイルチャンク(64KB単位) + + Raises: + ClientError: S3アクセスエラー + """ + return self._storage.load_stream(key, chunk_size=64 * 1024) + + def get_objects_batch( + self, + keys: List[str] + ) -> Generator[Tuple[str, bytes], None, None]: + """ + 複数オブジェクトをバッチ取得する(ジェネレータ) + + Args: + keys: S3キーリスト + + Yields: + Tuple[str, bytes]: (キー, コンテンツ) + + Note: + エラーが発生したキーはスキップし、ログに記録する + """ + for key in keys: + try: + content = self._storage.load(key) + if content is not None: + yield (key, content) + else: + logger.warning(f"Failed to get object {key}: Not found") + except Exception as e: + logger.warning(f"Failed to get object {key}: {e}") + continue + + def calculate_total_size(self, prefix: str) -> int: + """ + 指定プレフィックス配下の全オブジェクトの合計サイズを計算する + + Args: + prefix: S3プレフィックス + + Returns: + int: 合計サイズ(バイト) + """ + return self._storage.calculate_total_size(prefix) + + +def get_content_type(extension: str) -> str: + """ + 拡張子からコンテンツタイプを判定する + + Args: + extension: ファイル拡張子(小文字) + + Returns: + str: 'text', 'json', 'yaml', または 'binary' + """ + text_types = {'txt', 'log', 'md', 'rst', 'csv'} + json_types = {'json'} + yaml_types = {'yaml', 'yml'} + + if extension in text_types: + return 'text' + elif extension in json_types: + return 'json' + elif extension in yaml_types: + return 'yaml' + else: + return 'binary' diff --git a/app/services/storage/__init__.py b/app/services/storage/__init__.py new file mode 100644 index 0000000..3fc1f87 --- /dev/null +++ b/app/services/storage/__init__.py @@ -0,0 +1,22 @@ +"""Storage Module - 統合ストレージサービス + +S3とローカルファイルシステムを統一的に扱うためのストレージ抽象化レイヤー。 +責任分離型設計: Read + 管理機能を提供(Write操作はlabcode-simが担当) +""" + +from .config import StorageConfig, S3Config, LocalConfig +from .registry import BackendRegistry +from .service import StorageService, get_storage +from .backends.base import StorageBackend + +__all__ = [ + 'StorageConfig', + 'S3Config', + 'LocalConfig', + 'BackendRegistry', + 'StorageService', + 'get_storage', + 'StorageBackend' +] + +__version__ = '1.0.0' diff --git a/app/services/storage/backends/__init__.py b/app/services/storage/backends/__init__.py new file mode 100644 index 0000000..77a23a2 --- /dev/null +++ b/app/services/storage/backends/__init__.py @@ -0,0 +1,14 @@ +"""Storage Backends + +各種ストレージバックエンドの実装。 +""" + +from .base import StorageBackend +from .s3 import S3StorageBackend +from .local import LocalStorageBackend + +__all__ = [ + 'StorageBackend', + 'S3StorageBackend', + 'LocalStorageBackend' +] diff --git a/app/services/storage/backends/base.py b/app/services/storage/backends/base.py new file mode 100644 index 0000000..87534e7 --- /dev/null +++ b/app/services/storage/backends/base.py @@ -0,0 +1,138 @@ +"""ストレージバックエンド抽象基底クラス + +すべてのストレージバックエンドが実装すべきインターフェースを定義。 +責任分離型設計: Read操作を中心とし、Write操作はオプショナル。 +""" + +from abc import ABC, abstractmethod +from typing import List, Optional, Generator, Dict, Any + + +class StorageBackend(ABC): + """ストレージバックエンドの抽象基底クラス(Read専用重視)""" + + # --- 読み取り系メソッド(Read Operations) --- + + @abstractmethod + def load(self, path: str) -> Optional[bytes]: + """ + ファイルを読み込む + + Args: + path: ファイルパス(相対パス形式) + + Returns: + Optional[bytes]: ファイル内容、存在しない場合はNone + """ + pass + + @abstractmethod + def load_stream(self, path: str, chunk_size: int = 65536) -> Generator[bytes, None, None]: + """ + ファイルをストリーミング読み込みする + + Args: + path: ファイルパス + chunk_size: チャンクサイズ(デフォルト64KB) + + Yields: + bytes: ファイルチャンク + """ + pass + + @abstractmethod + def list_objects(self, prefix: str) -> List[Dict[str, Any]]: + """ + 指定プレフィックス配下のオブジェクト一覧を取得する + + Args: + prefix: プレフィックス + + Returns: + List[Dict]: オブジェクト情報リスト + 各要素: {'Key': str, 'Size': int, 'LastModified': datetime} + """ + pass + + @abstractmethod + def list_objects_with_dirs(self, prefix: str, delimiter: str = '/') -> Dict[str, Any]: + """ + ファイルとディレクトリの一覧を取得する + + Args: + prefix: プレフィックス + delimiter: パス区切り文字 + + Returns: + Dict: {'contents': [...], 'common_prefixes': [...]} + """ + pass + + @abstractmethod + def exists(self, path: str) -> bool: + """ + ファイルが存在するか確認する + + Args: + path: ファイルパス + + Returns: + bool: 存在する場合True + """ + pass + + @abstractmethod + def get_metadata(self, path: str) -> Optional[Dict[str, Any]]: + """ + ファイルのメタデータを取得する + + Args: + path: ファイルパス + + Returns: + Optional[Dict]: メタデータ(content_length, last_modified等) + """ + pass + + # --- 書き込み系メソッド(Optional - log-serverでは基本使用しない) --- + + def save(self, path: str, content: bytes, content_type: str = 'application/octet-stream') -> bool: + """ + ファイルを保存する(オプショナル) + + Args: + path: 保存先パス(相対パス形式) + content: ファイル内容(バイト列) + content_type: コンテンツタイプ + + Returns: + bool: 成功時True + """ + raise NotImplementedError("Write operations are optional for log-server backends") + + def delete(self, path: str) -> bool: + """ + ファイルを削除する(オプショナル) + + Args: + path: ファイルパス + + Returns: + bool: 成功時True + """ + raise NotImplementedError("Write operations are optional for log-server backends") + + # --- オプショナルメソッド(Optional Operations) --- + + def generate_presigned_url(self, path: str, expires_in: int = 3600) -> Optional[str]: + """ + 事前署名URLを生成する(S3のみ実装) + + Args: + path: ファイルパス + expires_in: 有効期限(秒) + + Returns: + Optional[str]: 事前署名URL、未対応の場合None + """ + return None diff --git a/app/services/storage/backends/local.py b/app/services/storage/backends/local.py new file mode 100644 index 0000000..7c8fcc1 --- /dev/null +++ b/app/services/storage/backends/local.py @@ -0,0 +1,157 @@ +"""ローカルファイルシステムストレージバックエンド + +ローカルファイルシステムを使用したストレージ。 +""" + +import logging +from pathlib import Path +from typing import List, Optional, Generator, Dict, Any +from datetime import datetime + +from ..registry import BackendRegistry +from ..config import LocalConfig +from .base import StorageBackend + +logger = logging.getLogger(__name__) + + +@BackendRegistry.register("local") +class LocalStorageBackend(StorageBackend): + """ローカルファイルシステムストレージバックエンド""" + + def __init__(self, config: LocalConfig = None): + """ + ローカルバックエンドを初期化 + + Args: + config: ローカル設定。Noneの場合は環境変数から読み込み + """ + if config is None: + config = LocalConfig.from_env() + + self.base_path = Path(config.base_path) + self.base_path.mkdir(parents=True, exist_ok=True) + logger.info(f"LocalStorageBackend initialized: path={self.base_path}") + + def _get_full_path(self, path: str) -> Path: + """相対パスをフルパスに変換""" + return self.base_path / path + + def load(self, path: str) -> Optional[bytes]: + try: + full_path = self._get_full_path(path) + with open(full_path, 'rb') as f: + return f.read() + except FileNotFoundError: + logger.debug(f"Local file not found: {path}") + return None + except Exception as e: + logger.error(f"Local load failed: {path} - {e}") + return None + + def load_stream(self, path: str, chunk_size: int = 65536) -> Generator[bytes, None, None]: + try: + full_path = self._get_full_path(path) + with open(full_path, 'rb') as f: + while True: + chunk = f.read(chunk_size) + if not chunk: + break + yield chunk + except Exception as e: + logger.error(f"Local stream load failed: {path} - {e}") + return + + def list_objects(self, prefix: str) -> List[Dict[str, Any]]: + all_objects = [] + base_dir = self._get_full_path(prefix) + + if not base_dir.exists(): + return [] + + try: + for file_path in base_dir.rglob('*'): + if file_path.is_file(): + relative_path = str(file_path.relative_to(self.base_path)) + stat = file_path.stat() + all_objects.append({ + 'Key': relative_path, + 'Size': stat.st_size, + 'LastModified': datetime.fromtimestamp(stat.st_mtime) + }) + except Exception as e: + logger.error(f"Local list_objects failed: {prefix} - {e}") + + return all_objects + + def list_objects_with_dirs(self, prefix: str, delimiter: str = '/') -> Dict[str, Any]: + base_dir = self._get_full_path(prefix) + contents = [] + common_prefixes = [] + + if not base_dir.exists(): + return {'contents': contents, 'common_prefixes': common_prefixes} + + try: + for item in base_dir.iterdir(): + relative_path = str(item.relative_to(self.base_path)) + if item.is_file(): + stat = item.stat() + contents.append({ + 'Key': relative_path, + 'Size': stat.st_size, + 'LastModified': datetime.fromtimestamp(stat.st_mtime) + }) + elif item.is_dir(): + common_prefixes.append({ + 'Prefix': relative_path + '/' + }) + except Exception as e: + logger.error(f"Local list_objects_with_dirs failed: {prefix} - {e}") + + return {'contents': contents, 'common_prefixes': common_prefixes} + + def exists(self, path: str) -> bool: + full_path = self._get_full_path(path) + return full_path.exists() and full_path.is_file() + + def get_metadata(self, path: str) -> Optional[Dict[str, Any]]: + try: + full_path = self._get_full_path(path) + if not full_path.exists(): + return None + stat = full_path.stat() + return { + 'content_length': stat.st_size, + 'last_modified': datetime.fromtimestamp(stat.st_mtime), + 'content_type': 'application/octet-stream' + } + except Exception: + return None + + def save(self, path: str, content: bytes, content_type: str = 'application/octet-stream') -> bool: + try: + full_path = self._get_full_path(path) + full_path.parent.mkdir(parents=True, exist_ok=True) + with open(full_path, 'wb') as f: + f.write(content) + logger.debug(f"Local save success: {path}") + return True + except Exception as e: + logger.error(f"Local save failed: {path} - {e}") + return False + + def delete(self, path: str) -> bool: + try: + full_path = self._get_full_path(path) + if full_path.exists(): + full_path.unlink() + return True + except Exception as e: + logger.error(f"Local delete failed: {path} - {e}") + return False + + def generate_presigned_url(self, path: str, expires_in: int = 3600) -> Optional[str]: + # ローカルモードでは事前署名URLをサポートしない + logger.warning(f"Presigned URL not supported in local mode: {path}") + return None diff --git a/app/services/storage/backends/s3.py b/app/services/storage/backends/s3.py new file mode 100644 index 0000000..04cccfa --- /dev/null +++ b/app/services/storage/backends/s3.py @@ -0,0 +1,173 @@ +"""S3ストレージバックエンド + +AWS S3およびS3互換ストレージ(MinIO等)に対応。 +""" + +import logging +from typing import List, Optional, Generator, Dict, Any + +import boto3 +from botocore.exceptions import ClientError + +from ..registry import BackendRegistry +from ..config import S3Config +from .base import StorageBackend + +logger = logging.getLogger(__name__) + + +@BackendRegistry.register("s3") +class S3StorageBackend(StorageBackend): + """S3ストレージバックエンド""" + + def __init__(self, config: S3Config = None): + """ + S3バックエンドを初期化 + + Args: + config: S3設定。Noneの場合は環境変数から読み込み + """ + if config is None: + config = S3Config.from_env() + + client_kwargs = { + 'aws_access_key_id': config.access_key_id, + 'aws_secret_access_key': config.secret_access_key, + 'region_name': config.region + } + + if config.endpoint_url: + client_kwargs['endpoint_url'] = config.endpoint_url + + self.client = boto3.client('s3', **client_kwargs) + self.bucket_name = config.bucket_name + logger.info(f"S3StorageBackend initialized: bucket={self.bucket_name}") + + def load(self, path: str) -> Optional[bytes]: + try: + response = self.client.get_object(Bucket=self.bucket_name, Key=path) + return response['Body'].read() + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code', '') + if error_code == 'NoSuchKey': + logger.debug(f"S3 object not found: {path}") + else: + logger.error(f"S3 load failed: {path} - {e}") + return None + + def load_stream(self, path: str, chunk_size: int = 65536) -> Generator[bytes, None, None]: + try: + response = self.client.get_object(Bucket=self.bucket_name, Key=path) + body = response['Body'] + while True: + chunk = body.read(chunk_size) + if not chunk: + break + yield chunk + except ClientError as e: + logger.error(f"S3 stream load failed: {path} - {e}") + return + + def list_objects(self, prefix: str) -> List[Dict[str, Any]]: + all_objects = [] + continuation_token = None + + while True: + params = { + 'Bucket': self.bucket_name, + 'Prefix': prefix, + } + if continuation_token: + params['ContinuationToken'] = continuation_token + + try: + response = self.client.list_objects_v2(**params) + except ClientError as e: + logger.error(f"S3 list_objects failed: {prefix} - {e}") + return [] + + contents = response.get('Contents', []) + for obj in contents: + if obj['Key'] != prefix and not obj['Key'].endswith('/'): + all_objects.append({ + 'Key': obj['Key'], + 'Size': obj['Size'], + 'LastModified': obj['LastModified'] + }) + + if response.get('IsTruncated'): + continuation_token = response.get('NextContinuationToken') + else: + break + + return all_objects + + def list_objects_with_dirs(self, prefix: str, delimiter: str = '/') -> Dict[str, Any]: + try: + response = self.client.list_objects_v2( + Bucket=self.bucket_name, + Prefix=prefix, + Delimiter=delimiter + ) + return { + 'contents': response.get('Contents', []), + 'common_prefixes': response.get('CommonPrefixes', []) + } + except ClientError as e: + logger.error(f"S3 list_objects_with_dirs failed: {prefix} - {e}") + return {'contents': [], 'common_prefixes': []} + + def exists(self, path: str) -> bool: + try: + self.client.head_object(Bucket=self.bucket_name, Key=path) + return True + except ClientError: + return False + + def get_metadata(self, path: str) -> Optional[Dict[str, Any]]: + try: + response = self.client.head_object(Bucket=self.bucket_name, Key=path) + return { + 'content_length': response['ContentLength'], + 'last_modified': response['LastModified'], + 'content_type': response.get('ContentType', 'application/octet-stream') + } + except ClientError: + return None + + def save(self, path: str, content: bytes, content_type: str = 'application/octet-stream') -> bool: + try: + self.client.put_object( + Bucket=self.bucket_name, + Key=path, + Body=content, + ContentType=content_type + ) + logger.debug(f"S3 upload success: {path}") + return True + except ClientError as e: + logger.error(f"S3 upload failed: {path} - {e}") + return False + + def delete(self, path: str) -> bool: + try: + self.client.delete_object(Bucket=self.bucket_name, Key=path) + return True + except ClientError as e: + logger.error(f"S3 delete failed: {path} - {e}") + return False + + def generate_presigned_url(self, path: str, expires_in: int = 3600) -> Optional[str]: + try: + url = self.client.generate_presigned_url( + 'get_object', + Params={ + 'Bucket': self.bucket_name, + 'Key': path + }, + ExpiresIn=expires_in + ) + return url + except ClientError as e: + logger.error(f"Failed to generate presigned URL: {path} - {e}") + return None diff --git a/app/services/storage/config.py b/app/services/storage/config.py new file mode 100644 index 0000000..4b5bdd0 --- /dev/null +++ b/app/services/storage/config.py @@ -0,0 +1,67 @@ +"""ストレージ設定クラス + +環境変数からの設定読み込みを一元管理。 +""" + +from dataclasses import dataclass, field +from typing import Optional +import os + + +@dataclass +class S3Config: + """S3固有設定""" + bucket_name: str = "labcode-dev-artifacts" + endpoint_url: Optional[str] = None + region: str = "ap-northeast-1" + access_key_id: Optional[str] = None + secret_access_key: Optional[str] = None + + @classmethod + def from_env(cls) -> 'S3Config': + """環境変数から設定を読み込み""" + return cls( + bucket_name=os.getenv('S3_BUCKET_NAME', 'labcode-dev-artifacts'), + endpoint_url=os.getenv('S3_ENDPOINT_URL'), + region=os.getenv('AWS_DEFAULT_REGION', 'ap-northeast-1'), + access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), + secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY') + ) + + +@dataclass +class LocalConfig: + """ローカルストレージ固有設定""" + base_path: str = "/data/storage" + + @classmethod + def from_env(cls) -> 'LocalConfig': + """環境変数から設定を読み込み""" + return cls( + base_path=os.getenv('LOCAL_STORAGE_PATH', '/data/storage') + ) + + +@dataclass +class StorageConfig: + """統合ストレージ設定""" + mode: str = "s3" + s3: S3Config = field(default_factory=S3Config) + local: LocalConfig = field(default_factory=LocalConfig) + + @classmethod + def from_env(cls) -> 'StorageConfig': + """環境変数から設定を読み込み""" + return cls( + mode=os.getenv('STORAGE_MODE', 's3').lower(), + s3=S3Config.from_env(), + local=LocalConfig.from_env() + ) + + def get_backend_config(self): + """現在のモードに対応するバックエンド設定を取得""" + if self.mode == 's3': + return self.s3 + elif self.mode == 'local': + return self.local + return None diff --git a/app/services/storage/exceptions.py b/app/services/storage/exceptions.py new file mode 100644 index 0000000..a149f92 --- /dev/null +++ b/app/services/storage/exceptions.py @@ -0,0 +1,29 @@ +"""カスタム例外 + +ストレージ関連のエラーを表す例外クラス。 +""" + + +class StorageError(Exception): + """ストレージ操作の基底例外""" + pass + + +class StorageNotFoundError(StorageError): + """ファイルが見つからない""" + pass + + +class StorageAccessError(StorageError): + """ストレージアクセスエラー""" + pass + + +class StorageConfigError(StorageError): + """設定エラー""" + pass + + +class BackendNotRegisteredError(StorageError): + """バックエンドが未登録""" + pass diff --git a/app/services/storage/registry.py b/app/services/storage/registry.py new file mode 100644 index 0000000..be358f6 --- /dev/null +++ b/app/services/storage/registry.py @@ -0,0 +1,65 @@ +"""バックエンドレジストリ + +ストレージバックエンドの動的登録・取得を管理。 +""" + +from typing import Dict, Type, TYPE_CHECKING + +if TYPE_CHECKING: + from .backends.base import StorageBackend + + +class BackendRegistry: + """ストレージバックエンドのレジストリ""" + + _backends: Dict[str, Type['StorageBackend']] = {} + + @classmethod + def register(cls, mode: str): + """ + バックエンドクラスを登録するデコレータ + + 使用例: + @BackendRegistry.register("s3") + class S3StorageBackend(StorageBackend): + ... + """ + def decorator(backend_class: Type['StorageBackend']): + cls._backends[mode.lower()] = backend_class + return backend_class + return decorator + + @classmethod + def get(cls, mode: str) -> Type['StorageBackend']: + """ + モード名からバックエンドクラスを取得 + + Args: + mode: ストレージモード名('s3', 'local'等) + + Returns: + バックエンドクラス + + Raises: + ValueError: 未登録のモードが指定された場合 + """ + mode_lower = mode.lower() + if mode_lower not in cls._backends: + available = ", ".join(cls._backends.keys()) + raise ValueError(f"Unknown storage mode: {mode}. Available: {available}") + return cls._backends[mode_lower] + + @classmethod + def list_modes(cls) -> list: + """登録済みモード一覧を取得""" + return list(cls._backends.keys()) + + @classmethod + def is_registered(cls, mode: str) -> bool: + """モードが登録済みか確認""" + return mode.lower() in cls._backends + + @classmethod + def clear(cls): + """テスト用: レジストリをクリア""" + cls._backends.clear() diff --git a/app/services/storage/service.py b/app/services/storage/service.py new file mode 100644 index 0000000..13432d5 --- /dev/null +++ b/app/services/storage/service.py @@ -0,0 +1,148 @@ +"""統合ストレージサービス + +ストレージバックエンドを抽象化し、統一的なAPIを提供。 +責任分離型設計: Read + 管理機能を提供(Write操作はオプショナル) +""" + +import logging +from typing import Optional, List, Dict, Any, Generator + +from .config import StorageConfig +from .registry import BackendRegistry +from .backends.base import StorageBackend + +logger = logging.getLogger(__name__) + + +class StorageService: + """ + 統合ストレージサービス(シングルトン) + + 環境変数STORAGE_MODEでバックエンドを切り替え: + - 's3': S3ストレージ(デフォルト) + - 'local': ローカルファイルシステム + """ + + _instance: Optional['StorageService'] = None + _config: Optional[StorageConfig] = None + + def __new__(cls, config: Optional[StorageConfig] = None): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._initialize(config) + return cls._instance + + def _initialize(self, config: Optional[StorageConfig] = None): + """バックエンドを初期化""" + self._config = config or StorageConfig.from_env() + + # レジストリからバックエンドクラスを取得 + backend_class = BackendRegistry.get(self._config.mode) + + # バックエンド固有設定を取得 + backend_config = self._config.get_backend_config() + + # バックエンドをインスタンス化 + self._backend = backend_class(backend_config) + + self.mode = self._config.mode + logger.info(f"StorageService initialized: mode={self.mode}") + + @property + def backend(self) -> StorageBackend: + """バックエンドインスタンスを取得""" + return self._backend + + @property + def config(self) -> StorageConfig: + """設定を取得""" + return self._config + + # --- 読み取り系メソッド --- + + def load(self, path: str) -> Optional[bytes]: + """ファイルを読み込み""" + return self._backend.load(path) + + def load_text(self, path: str, encoding: str = 'utf-8') -> Optional[str]: + """テキストファイルを読み込み""" + content = self.load(path) + if content is None: + return None + return content.decode(encoding) + + def load_json(self, path: str) -> Optional[dict]: + """JSONファイルを読み込み""" + import json + text = self.load_text(path) + if text is None: + return None + return json.loads(text) + + def load_stream(self, path: str, chunk_size: int = 65536) -> Generator[bytes, None, None]: + """ファイルをストリーミング読み込み""" + return self._backend.load_stream(path, chunk_size) + + def list_objects(self, prefix: str) -> List[Dict[str, Any]]: + """オブジェクト一覧を取得""" + return self._backend.list_objects(prefix) + + def list_objects_with_dirs(self, prefix: str, delimiter: str = '/') -> Dict[str, Any]: + """ファイルとディレクトリの一覧を取得""" + return self._backend.list_objects_with_dirs(prefix, delimiter) + + def exists(self, path: str) -> bool: + """ファイル存在確認""" + return self._backend.exists(path) + + def get_metadata(self, path: str) -> Optional[Dict[str, Any]]: + """ファイルメタデータ取得""" + return self._backend.get_metadata(path) + + def calculate_total_size(self, prefix: str) -> int: + """指定プレフィックス配下の合計サイズを計算""" + objects = self.list_objects(prefix) + return sum(obj['Size'] for obj in objects) + + # --- 書き込み系メソッド(オプショナル) --- + + def save(self, path: str, content: bytes, content_type: str = 'application/octet-stream') -> bool: + """ファイルを保存(オプショナル: log-serverでは基本使用しない)""" + return self._backend.save(path, content, content_type) + + def save_text(self, path: str, content: str, encoding: str = 'utf-8') -> bool: + """テキストファイルを保存(オプショナル)""" + return self.save(path, content.encode(encoding), content_type='text/plain') + + def save_json(self, path: str, data: dict) -> bool: + """JSONファイルを保存(オプショナル)""" + import json + content = json.dumps(data, ensure_ascii=False, indent=2) + return self.save(path, content.encode('utf-8'), content_type='application/json') + + def delete(self, path: str) -> bool: + """ファイル削除(オプショナル)""" + return self._backend.delete(path) + + # --- S3固有メソッド --- + + def generate_presigned_url(self, path: str, expires_in: int = 3600) -> Optional[str]: + """事前署名URLを生成(S3のみ)""" + return self._backend.generate_presigned_url(path, expires_in) + + # --- ユーティリティ --- + + @classmethod + def reset_instance(cls): + """ + シングルトンインスタンスをリセット(テスト用) + + 注意: 本番環境では使用しないこと + """ + cls._instance = None + cls._config = None + + +def get_storage(config: Optional[StorageConfig] = None) -> StorageService: + """StorageServiceのシングルトンインスタンスを取得""" + return StorageService(config) diff --git a/app/services/storage_service.py b/app/services/storage_service.py new file mode 100644 index 0000000..eaaabf3 --- /dev/null +++ b/app/services/storage_service.py @@ -0,0 +1,41 @@ +"""ストレージサービス抽象化レイヤー(labcode-log-server用) + +services/storageモジュールを使用した統一ストレージアクセス。 +責任分離型設計: Read + 管理機能を提供。 + +使用例: + from services.storage_service import StorageService, get_storage + + storage = get_storage() + content = storage.load("runs/1/log.txt") + objects = storage.list_objects("runs/1/") +""" + +# 内部storageモジュールから再エクスポート +from .storage import ( + StorageService, + StorageBackend, + StorageConfig, + S3Config, + LocalConfig, + BackendRegistry, + get_storage +) + +# 後方互換性のため、バックエンドクラスも直接エクスポート +from .storage.backends import ( + S3StorageBackend, + LocalStorageBackend +) + +__all__ = [ + 'StorageService', + 'StorageBackend', + 'StorageConfig', + 'S3Config', + 'LocalConfig', + 'BackendRegistry', + 'get_storage', + 'S3StorageBackend', + 'LocalStorageBackend' +] diff --git a/app/services/zip_service.py b/app/services/zip_service.py new file mode 100644 index 0000000..a45ba73 --- /dev/null +++ b/app/services/zip_service.py @@ -0,0 +1,237 @@ +"""ZIPストリーミング生成サービス + +複数ランのファイルをZIP形式でストリーミング生成する。 +メモリ効率を考慮し、ファイルを逐次追加しながらZIPストリームを生成する。 +""" + +import json +import zipstream +from datetime import datetime +from typing import List, Generator, Dict, Any, Optional +import logging + +from services.s3_service import S3Service + +logger = logging.getLogger(__name__) + +# サイズ制限(バイト) +MAX_ZIP_SIZE = 500 * 1024 * 1024 # 500MB +MAX_SINGLE_FILE_SIZE = 100 * 1024 * 1024 # 100MB +MAX_RUN_COUNT = 100 + + +class ZipServiceError(Exception): + """ZIPサービスエラーの基底クラス""" + pass + + +class SizeLimitExceededError(ZipServiceError): + """サイズ制限超過エラー""" + pass + + +class RunNotFoundError(ZipServiceError): + """ラン未検出エラー""" + pass + + +class ZipStreamService: + """ + ストリーミングZIP生成サービス + + メモリ効率を考慮し、ファイルを逐次追加しながら + ZIPストリームを生成する。 + """ + + def __init__(self, s3_service: Optional[S3Service] = None): + """ + 初期化 + + Args: + s3_service: S3サービスインスタンス(テスト用にDI可能) + """ + self.s3_service = s3_service or S3Service() + + def create_zip_stream( + self, + runs: List[Dict[str, Any]], + include_manifest: bool = True + ) -> Generator[bytes, None, None]: + """ + ZIPストリームを生成する + + Args: + runs: ランオブジェクトリスト + 各要素: {'id': int, 'storage_address': str, 'file_name': str, 'status': str} + include_manifest: manifestファイルを含めるか + + Yields: + bytes: ZIPストリームチャンク + + Raises: + SizeLimitExceededError: サイズ制限超過時 + RunNotFoundError: ランが見つからない時 + """ + if len(runs) > MAX_RUN_COUNT: + raise SizeLimitExceededError( + f"ラン数が上限({MAX_RUN_COUNT}件)を超えています" + ) + + # ZIPストリームを作成 + z = zipstream.ZipFile(mode='w', compression=zipstream.ZIP_DEFLATED) + + # manifest用のデータ収集 + manifest_data = { + 'generated_at': datetime.utcnow().isoformat() + 'Z', + 'runs': [], + 'errors': [], + 'total_files': 0, + 'total_size': 0 + } + + # 各ランを処理 + for run in runs: + run_id = run.get('id') + storage_address = run.get('storage_address', '') + + if not storage_address: + logger.warning(f"Run {run_id}: storage_address is empty, skipping") + manifest_data['errors'].append({ + 'run_id': run_id, + 'error': 'storage_address is empty', + 'skipped': True + }) + continue + + try: + # S3からファイル一覧を取得 + prefix = storage_address.rstrip('/') + '/' + objects = self.s3_service.list_objects_recursive(prefix) + + if not objects: + logger.warning(f"Run {run_id}: No files found at {prefix}") + manifest_data['errors'].append({ + 'run_id': run_id, + 'error': 'No files found', + 'skipped': True + }) + continue + + # サイズチェック + total_run_size = sum(obj['Size'] for obj in objects) + if manifest_data['total_size'] + total_run_size > MAX_ZIP_SIZE: + raise SizeLimitExceededError( + f"合計サイズが上限({MAX_ZIP_SIZE // (1024*1024)}MB)を超えます" + ) + + run_file_count = 0 + + # 各ファイルをZIPに追加 + for obj in objects: + key = obj['Key'] + size = obj['Size'] + + # 大きすぎるファイルはスキップ + if size > MAX_SINGLE_FILE_SIZE: + logger.warning( + f"File {key} exceeds size limit ({size} bytes), skipping" + ) + continue + + # ZIP内のパスを決定 + # storage_address以降の相対パスを使用 + relative_path = key[len(prefix):] + zip_path = f"run_{run_id}/{relative_path}" + + # ファイルコンテンツのジェネレータを作成 + z.write_iter( + zip_path, + self._file_content_generator(key) + ) + + run_file_count += 1 + manifest_data['total_size'] += size + + # manifest用のラン情報を記録 + manifest_data['runs'].append({ + 'run_id': run_id, + 'file_name': run.get('file_name', ''), + 'status': run.get('status', ''), + 'file_count': run_file_count, + 'total_size': total_run_size + }) + manifest_data['total_files'] += run_file_count + + except SizeLimitExceededError: + raise + except Exception as e: + logger.error(f"Run {run_id}: Error processing - {e}") + manifest_data['errors'].append({ + 'run_id': run_id, + 'error': str(e), + 'skipped': True + }) + + # manifestファイルを追加 + if include_manifest: + manifest_json = json.dumps(manifest_data, indent=2, ensure_ascii=False) + z.writestr('manifest.json', manifest_json.encode('utf-8')) + + # ZIPストリームを出力 + for chunk in z: + yield chunk + + def _file_content_generator(self, key: str) -> Generator[bytes, None, None]: + """ + S3からファイルコンテンツを取得するジェネレータ + + Args: + key: S3キー + + Yields: + bytes: ファイルチャンク + """ + try: + for chunk in self.s3_service.get_object_stream(key): + yield chunk + except Exception as e: + logger.error(f"Error reading file {key}: {e}") + # 空のファイルとして処理 + yield b'' + + def estimate_zip_size(self, run_ids: List[int], runs_data: List[Dict[str, Any]]) -> int: + """ + ZIPファイルの推定サイズを計算する + + Args: + run_ids: ランIDリスト + runs_data: ランデータリスト + + Returns: + int: 推定サイズ(バイト) + """ + total_size = 0 + run_map = {run['id']: run for run in runs_data} + + for run_id in run_ids: + run = run_map.get(run_id) + if not run or not run.get('storage_address'): + continue + + prefix = run['storage_address'].rstrip('/') + '/' + try: + total_size += self.s3_service.calculate_total_size(prefix) + except Exception as e: + logger.warning(f"Could not calculate size for run {run_id}: {e}") + + return total_size + + def generate_filename(self) -> str: + """ + ZIPファイル名を生成する + + Returns: + str: ファイル名(例: labcode_runs_20251221_120000.zip) + """ + timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S') + return f"labcode_runs_{timestamp}.zip" diff --git a/app/test_s3_connection.py b/app/test_s3_connection.py new file mode 100644 index 0000000..730511c --- /dev/null +++ b/app/test_s3_connection.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +"""S3接続テストスクリプト""" + +import os +import boto3 +from botocore.exceptions import ClientError, NoCredentialsError + +def test_s3_connection(): + """S3への接続をテストする""" + print("=" * 60) + print("S3接続テスト") + print("=" * 60) + + # 環境変数確認 + print("\n📋 環境変数確認:") + access_key = os.getenv('AWS_ACCESS_KEY_ID') + secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') + region = os.getenv('AWS_DEFAULT_REGION', 'ap-northeast-1') + bucket_name = os.getenv('S3_BUCKET_NAME', 'labcode-dev-artifacts') + + print(f" AWS_ACCESS_KEY_ID: {'設定済み (' + access_key[:8] + '...)' if access_key else '❌ 未設定'}") + print(f" AWS_SECRET_ACCESS_KEY: {'設定済み' if secret_key else '❌ 未設定'}") + print(f" AWS_DEFAULT_REGION: {region}") + print(f" S3_BUCKET_NAME: {bucket_name}") + + if not access_key or not secret_key: + print("\n❌ AWS認証情報が設定されていません") + print(" → .envファイルを確認してください") + return False + + try: + # S3クライアント作成 + print("\n🔍 S3クライアントを作成中...") + s3_client = boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + region_name=region + ) + print("✅ S3クライアント作成成功") + + # バケット存在確認 + print(f"\n🔍 バケット '{bucket_name}' への接続を確認中...") + try: + s3_client.head_bucket(Bucket=bucket_name) + print(f"✅ バケット '{bucket_name}' への接続成功") + except ClientError as e: + error_code = e.response['Error']['Code'] + if error_code == '404': + print(f"❌ バケットが存在しません: {bucket_name}") + # バケット一覧を確認 + print("\n🔍 アクセス可能なバケット一覧を確認中...") + try: + response = s3_client.list_buckets() + buckets = response.get('Buckets', []) + if buckets: + print(" 利用可能なバケット:") + for b in buckets: + print(f" - {b['Name']}") + else: + print(" 利用可能なバケットがありません") + except Exception as e2: + print(f" バケット一覧取得失敗: {e2}") + return False + elif error_code == '403': + print(f"❌ バケットへのアクセスが拒否されました") + return False + else: + raise + + # オブジェクト一覧取得テスト + print("\n🔍 オブジェクト一覧を取得中...") + response = s3_client.list_objects_v2( + Bucket=bucket_name, + MaxKeys=10 + ) + key_count = response.get('KeyCount', 0) + print(f"✅ オブジェクト一覧取得成功({key_count}件)") + + if key_count > 0: + print("\n📁 既存オブジェクト一覧(最大10件):") + for obj in response.get('Contents', []): + size_kb = obj['Size'] / 1024 + print(f" - {obj['Key']} ({size_kb:.1f} KB)") + + # テストファイルアップロード + print("\n🔍 テストファイルのアップロードを試行中...") + test_key = "test/connection_test.txt" + test_content = f"S3接続テスト成功 - LabCode\nタイムスタンプ: {os.popen('date').read().strip()}" + s3_client.put_object( + Bucket=bucket_name, + Key=test_key, + Body=test_content.encode('utf-8'), + ContentType='text/plain; charset=utf-8' + ) + print(f"✅ テストファイルアップロード成功: {test_key}") + + # テストファイル読み取り + print("\n🔍 テストファイルの読み取りを試行中...") + response = s3_client.get_object(Bucket=bucket_name, Key=test_key) + content = response['Body'].read().decode('utf-8') + print(f"✅ テストファイル読み取り成功:") + print(f" 内容: '{content}'") + + # テストファイル削除 + print("\n🔍 テストファイルの削除を試行中...") + s3_client.delete_object(Bucket=bucket_name, Key=test_key) + print(f"✅ テストファイル削除成功") + + # 署名付きURL生成テスト + print("\n🔍 署名付きURL生成テスト...") + # ダミーファイルをアップロード + s3_client.put_object(Bucket=bucket_name, Key="test/presigned_test.txt", Body=b"test") + presigned_url = s3_client.generate_presigned_url( + 'get_object', + Params={'Bucket': bucket_name, 'Key': 'test/presigned_test.txt'}, + ExpiresIn=60 + ) + print(f"✅ 署名付きURL生成成功") + print(f" URL: {presigned_url[:80]}...") + # クリーンアップ + s3_client.delete_object(Bucket=bucket_name, Key="test/presigned_test.txt") + + print("\n" + "=" * 60) + print("🎉 すべてのS3テストが成功しました!") + print("=" * 60) + + print("\n📊 テスト結果サマリー:") + print(" ✅ S3クライアント作成: 成功") + print(" ✅ バケット接続: 成功") + print(" ✅ オブジェクト一覧取得: 成功") + print(" ✅ ファイルアップロード: 成功") + print(" ✅ ファイル読み取り: 成功") + print(" ✅ ファイル削除: 成功") + print(" ✅ 署名付きURL生成: 成功") + + return True + + except NoCredentialsError: + print("\n❌ AWS認証情報が見つかりません") + print(" → .envファイルを確認してください") + print(" → AWS_ACCESS_KEY_ID と AWS_SECRET_ACCESS_KEY が設定されているか確認") + return False + except ClientError as e: + error_code = e.response['Error']['Code'] + error_message = e.response['Error']['Message'] + print(f"\n❌ AWSエラー ({error_code}): {error_message}") + + if error_code == 'InvalidAccessKeyId': + print(" → アクセスキーIDが無効です") + print(" → AWS_ACCESS_KEY_ID を確認してください") + elif error_code == 'SignatureDoesNotMatch': + print(" → シークレットアクセスキーが無効です") + print(" → AWS_SECRET_ACCESS_KEY を確認してください") + elif error_code == 'AccessDenied': + print(" → アクセスが拒否されました") + print(" → IAM権限を確認してください") + + return False + except Exception as e: + print(f"\n❌ 予期せぬエラー: {type(e).__name__}: {e}") + import traceback + traceback.print_exc() + return False + +if __name__ == '__main__': + # .envファイルを読み込む + try: + from dotenv import load_dotenv + # コンテナ内のパスで.envを探す + env_paths = [ + '/app/.env', + '.env', + '../.env', + '/home/ayumu/Documents/Science-Aid/SciAid-LabCode/labcode-test-environment/labcode-log-server/.env' + ] + for env_path in env_paths: + if os.path.exists(env_path): + print(f"📂 .envファイルを読み込み: {env_path}") + load_dotenv(env_path) + break + else: + print("⚠️ .envファイルが見つかりませんでした(環境変数から読み込み)") + except ImportError: + print("⚠️ python-dotenvがインストールされていません(環境変数から読み込み)") + + success = test_s3_connection() + exit(0 if success else 1) diff --git a/playground_merlin/test_storage.py b/playground_merlin/test_storage.py new file mode 100644 index 0000000..6b35060 --- /dev/null +++ b/playground_merlin/test_storage.py @@ -0,0 +1,304 @@ +"""ストレージAPIのユニットテスト + +テスト対象: +- GET /api/storage/list +- GET /api/storage/preview +- GET /api/storage/download +""" + +import pytest +from unittest.mock import patch, MagicMock +from datetime import datetime +from fastapi.testclient import TestClient +from botocore.exceptions import ClientError + +# テスト用のmainをインポート +import sys +import os +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'app')) + +from main import app + +client = TestClient(app) + + +# ==================== Mock Data ==================== + +def create_mock_s3_list_response(): + """list_objects_v2のモックレスポンス""" + return { + 'contents': [ + { + 'Key': 'runs/1/output.json', + 'Size': 1024, + 'LastModified': datetime(2025, 12, 15, 10, 0, 0) + }, + { + 'Key': 'runs/1/protocol.yaml', + 'Size': 512, + 'LastModified': datetime(2025, 12, 15, 9, 0, 0) + } + ], + 'common_prefixes': [ + {'Prefix': 'runs/1/artifacts/'} + ] + } + + +def create_mock_s3_get_response(): + """get_objectのモックレスポンス""" + return { + 'body': b'{"result": "success", "data": [1, 2, 3]}', + 'content_length': 42, + 'last_modified': datetime(2025, 12, 15, 10, 0, 0) + } + + +def create_mock_s3_head_response(): + """head_objectのモックレスポンス""" + return { + 'content_length': 1024, + 'last_modified': datetime(2025, 12, 15, 10, 0, 0) + } + + +# ==================== GET /api/storage/list Tests ==================== + +class TestStorageList: + """GET /api/storage/list のテスト""" + + @patch('api.route.storage.S3Service') + def test_list_files_success(self, mock_s3_class): + """正常系: ファイル一覧取得成功""" + mock_s3 = MagicMock() + mock_s3.list_objects.return_value = create_mock_s3_list_response() + mock_s3_class.return_value = mock_s3 + + response = client.get("/api/storage/list?prefix=runs/1/") + + assert response.status_code == 200 + data = response.json() + assert 'files' in data + assert 'directories' in data + assert 'pagination' in data + assert len(data['files']) == 2 + assert len(data['directories']) == 1 + assert data['files'][0]['name'] == 'output.json' + assert data['directories'][0]['name'] == 'artifacts' + + @patch('api.route.storage.S3Service') + def test_list_files_empty(self, mock_s3_class): + """正常系: 空のディレクトリ""" + mock_s3 = MagicMock() + mock_s3.list_objects.return_value = {'contents': [], 'common_prefixes': []} + mock_s3_class.return_value = mock_s3 + + response = client.get("/api/storage/list?prefix=runs/empty/") + + assert response.status_code == 200 + data = response.json() + assert len(data['files']) == 0 + assert len(data['directories']) == 0 + + @patch('api.route.storage.S3Service') + def test_list_files_sort_by_size(self, mock_s3_class): + """正常系: サイズでソート""" + mock_s3 = MagicMock() + mock_s3.list_objects.return_value = create_mock_s3_list_response() + mock_s3_class.return_value = mock_s3 + + response = client.get("/api/storage/list?prefix=runs/1/&sort_by=size&order=desc") + + assert response.status_code == 200 + data = response.json() + # サイズ降順: output.json (1024) > protocol.yaml (512) + assert data['files'][0]['name'] == 'output.json' + assert data['files'][1]['name'] == 'protocol.yaml' + + def test_list_files_missing_prefix(self): + """異常系: prefix未指定""" + response = client.get("/api/storage/list") + + assert response.status_code == 422 # Validation error + + @patch('api.route.storage.S3Service') + def test_list_files_invalid_sort_by(self, mock_s3_class): + """異常系: 無効なsort_by""" + response = client.get("/api/storage/list?prefix=runs/1/&sort_by=invalid") + + assert response.status_code == 400 + assert "sort_by" in response.json()['detail'] + + @patch('api.route.storage.S3Service') + def test_list_files_s3_error(self, mock_s3_class): + """異常系: S3エラー""" + mock_s3 = MagicMock() + mock_s3.list_objects.side_effect = ClientError( + {'Error': {'Code': 'AccessDenied', 'Message': 'Access Denied'}}, + 'ListObjectsV2' + ) + mock_s3_class.return_value = mock_s3 + + response = client.get("/api/storage/list?prefix=runs/1/") + + assert response.status_code == 403 + + +# ==================== GET /api/storage/preview Tests ==================== + +class TestStoragePreview: + """GET /api/storage/preview のテスト""" + + @patch('api.route.storage.S3Service') + def test_preview_json_success(self, mock_s3_class): + """正常系: JSONファイルプレビュー""" + mock_s3 = MagicMock() + mock_s3.get_object.return_value = create_mock_s3_get_response() + mock_s3_class.return_value = mock_s3 + + response = client.get("/api/storage/preview?file_path=runs/1/output.json") + + assert response.status_code == 200 + data = response.json() + assert data['content_type'] == 'json' + assert 'result' in data['content'] + assert data['truncated'] is False + + @patch('api.route.storage.S3Service') + def test_preview_yaml_success(self, mock_s3_class): + """正常系: YAMLファイルプレビュー""" + mock_s3 = MagicMock() + mock_s3.get_object.return_value = { + 'body': b'key: value\nlist:\n - item1\n - item2', + 'content_length': 35, + 'last_modified': datetime(2025, 12, 15, 10, 0, 0) + } + mock_s3_class.return_value = mock_s3 + + response = client.get("/api/storage/preview?file_path=runs/1/config.yaml") + + assert response.status_code == 200 + data = response.json() + assert data['content_type'] == 'yaml' + + @patch('api.route.storage.S3Service') + def test_preview_truncated(self, mock_s3_class): + """正常系: 行数制限による切り詰め""" + mock_s3 = MagicMock() + # 100行のテストデータ + content = '\n'.join([f'line {i}' for i in range(100)]) + mock_s3.get_object.return_value = { + 'body': content.encode('utf-8'), + 'content_length': len(content), + 'last_modified': datetime(2025, 12, 15, 10, 0, 0) + } + mock_s3_class.return_value = mock_s3 + + response = client.get("/api/storage/preview?file_path=runs/1/log.txt&max_lines=50") + + assert response.status_code == 200 + data = response.json() + assert data['truncated'] is True + assert len(data['content'].split('\n')) == 50 + + def test_preview_binary_file(self): + """異常系: バイナリファイル""" + response = client.get("/api/storage/preview?file_path=runs/1/data.bin") + + assert response.status_code == 415 + assert "Binary" in response.json()['detail'] + + @patch('api.route.storage.S3Service') + def test_preview_file_not_found(self, mock_s3_class): + """異常系: ファイルが存在しない""" + mock_s3 = MagicMock() + mock_s3.get_object.side_effect = ClientError( + {'Error': {'Code': 'NoSuchKey', 'Message': 'Not Found'}}, + 'GetObject' + ) + mock_s3_class.return_value = mock_s3 + + response = client.get("/api/storage/preview?file_path=runs/1/nonexistent.json") + + assert response.status_code == 404 + + +# ==================== GET /api/storage/download Tests ==================== + +class TestStorageDownload: + """GET /api/storage/download のテスト""" + + @patch('api.route.storage.S3Service') + def test_download_success(self, mock_s3_class): + """正常系: ダウンロードURL生成""" + mock_s3 = MagicMock() + mock_s3.head_object.return_value = create_mock_s3_head_response() + mock_s3.generate_presigned_url.return_value = 'https://example.s3.amazonaws.com/runs/1/output.json?signature=xxx' + mock_s3_class.return_value = mock_s3 + + response = client.get("/api/storage/download?file_path=runs/1/output.json") + + assert response.status_code == 200 + data = response.json() + assert 'download_url' in data + assert 'expires_at' in data + assert 's3.amazonaws.com' in data['download_url'] + + @patch('api.route.storage.S3Service') + def test_download_custom_expiry(self, mock_s3_class): + """正常系: カスタム有効期限""" + mock_s3 = MagicMock() + mock_s3.head_object.return_value = create_mock_s3_head_response() + mock_s3.generate_presigned_url.return_value = 'https://example.s3.amazonaws.com/test' + mock_s3_class.return_value = mock_s3 + + response = client.get("/api/storage/download?file_path=runs/1/output.json&expires_in=7200") + + assert response.status_code == 200 + mock_s3.generate_presigned_url.assert_called_once() + call_args = mock_s3.generate_presigned_url.call_args + assert call_args[1]['expires_in'] == 7200 + + @patch('api.route.storage.S3Service') + def test_download_file_not_found(self, mock_s3_class): + """異常系: ファイルが存在しない""" + mock_s3 = MagicMock() + mock_s3.head_object.side_effect = ClientError( + {'Error': {'Code': 'NoSuchKey', 'Message': 'Not Found'}}, + 'HeadObject' + ) + mock_s3_class.return_value = mock_s3 + + response = client.get("/api/storage/download?file_path=runs/1/nonexistent.json") + + assert response.status_code == 404 + + def test_download_expires_in_too_short(self): + """異常系: 有効期限が短すぎる""" + response = client.get("/api/storage/download?file_path=runs/1/output.json&expires_in=30") + + assert response.status_code == 422 # Validation error + + +# ==================== Integration Tests (Optional) ==================== + +class TestStorageIntegration: + """統合テスト(実際のS3接続が必要) + + Note: これらのテストは環境変数が設定されている場合のみ実行される + """ + + @pytest.mark.skipif( + not os.getenv('AWS_ACCESS_KEY_ID'), + reason="AWS credentials not configured" + ) + def test_real_s3_list(self): + """実際のS3へのリスト操作""" + response = client.get("/api/storage/list?prefix=") + + assert response.status_code == 200 + + +if __name__ == '__main__': + pytest.main([__file__, '-v']) diff --git a/requirements.txt b/requirements.txt index 3254f82..2aeb61f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,5 @@ starlette==0.41.3 typing_extensions==4.12.2 uvicorn==0.34.0 PyYAML==6.0.2 +boto3>=1.35.0 +zipstream-new>=1.1.8 diff --git a/scripts/migrate_storage_address.py b/scripts/migrate_storage_address.py new file mode 100644 index 0000000..f81ad18 --- /dev/null +++ b/scripts/migrate_storage_address.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +""" +マイグレーションスクリプト: Google Drive URL → S3パス + +既存RunデータのGoogle Drive URLをS3パス形式に変換する。 + +使用方法: + Docker内で実行: + docker exec labcode_log_server python /app/scripts/migrate_storage_address.py [--dry-run] + + 直接実行(プロジェクトルートから): + cd labcode-log-server/scripts + python migrate_storage_address.py [--dry-run] + +オプション: + --dry-run 実際に更新せず、対象レコードを表示するだけ + +作成日: 2025-12-21 +作成者: Astra エージェント +""" + +import sys +import argparse +from pathlib import Path + +# プロジェクトルートをパスに追加(直接実行時用) +app_dir = Path(__file__).parent.parent +if str(app_dir) not in sys.path: + sys.path.insert(0, str(app_dir)) + +try: + from define_db.database import SessionLocal + from define_db.models import Run +except ImportError: + # Docker内で実行する場合 + sys.path.insert(0, '/app') + from define_db.database import SessionLocal + from define_db.models import Run + + +def migrate_storage_address(dry_run: bool = False): + """Google Drive URLをS3パスに移行""" + + print("=" * 60) + print("Storage Address Migration: Google Drive URL → S3 Path") + print("=" * 60) + + with SessionLocal() as session: + # Google Drive URLを持つRunを検索 + runs_with_url = session.query(Run).filter( + Run.storage_address.like('https://drive.google.com%') + ).all() + + print(f"\n対象レコード数: {len(runs_with_url)}") + + if not runs_with_url: + print("✅ 移行対象のレコードはありません。") + return + + print("\n移行対象:") + print("-" * 60) + + for run in runs_with_url: + old_value = run.storage_address + new_value = f"runs/{run.id}/" + + print(f" Run ID: {run.id}") + print(f" 旧: {old_value[:50]}...") + print(f" 新: {new_value}") + print() + + if not dry_run: + run.storage_address = new_value + + if dry_run: + print("-" * 60) + print("🔍 [DRY RUN] 実際の更新は行われませんでした。") + print(" 実行するには --dry-run オプションを外してください。") + else: + session.commit() + print("-" * 60) + print(f"✅ {len(runs_with_url)} 件のレコードを更新しました。") + + print("=" * 60) + + +def main(): + parser = argparse.ArgumentParser( + description="Google Drive URLをS3パスに移行" + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='実際に更新せず、対象レコードを表示するだけ' + ) + + args = parser.parse_args() + migrate_storage_address(dry_run=args.dry_run) + + +if __name__ == "__main__": + main() From 380b1cc62c71ead2136be35fe1659884f603fe27 Mon Sep 17 00:00:00 2001 From: Ayumu-Nono Date: Tue, 23 Dec 2025 22:00:05 +0900 Subject: [PATCH 2/6] =?UTF-8?q?storage=E5=AE=8C=E4=BA=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .env.local | 2 +- .gitignore | 7 +- app/api/route/process_operations.py | 163 ++++++++ app/api/route/storage_v2.py | 516 +++++++++++++++++++++++- app/main.py | 3 +- app/services/hal/hybrid_access_layer.py | 123 +++++- app/services/hal/models.py | 5 +- 7 files changed, 797 insertions(+), 22 deletions(-) create mode 100644 app/api/route/process_operations.py diff --git a/.env.local b/.env.local index 9253e70..578ced1 100644 --- a/.env.local +++ b/.env.local @@ -5,5 +5,5 @@ LOCAL_STORAGE_PATH=/data/storage # ダミーS3設定(ローカルモードでは使用されない) AWS_ACCESS_KEY_ID=dummy AWS_SECRET_ACCESS_KEY=dummy -AWS_DEFAULT_REGION=ap-northeast-1 +AWS_DEFAULT_REGION=ap-northeast-2 S3_BUCKET_NAME=labcode-dev-artifacts diff --git a/.gitignore b/.gitignore index 5596f89..bbe810e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,9 @@ .venv __pycache__ *.db -.env \ No newline at end of file +.env.env.s3.backup + +# Backup files (not to be tracked) +app/scripts_backup_local/ +app/tests_backup_local/ +data_backup_local/ diff --git a/app/api/route/process_operations.py b/app/api/route/process_operations.py new file mode 100644 index 0000000..8c3ce55 --- /dev/null +++ b/app/api/route/process_operations.py @@ -0,0 +1,163 @@ +"""ProcessOperation中間テーブルのAPI + +ProcessとOperationのN:M関係を管理するAPIエンドポイント。 +""" + +from fastapi import APIRouter, HTTPException, Query, Body +from typing import Optional, List +from pydantic import BaseModel +from define_db.models import ProcessOperation, Process, Operation +from define_db.database import SessionLocal + +router = APIRouter() + + +class ProcessOperationCreate(BaseModel): + """ProcessOperation作成リクエスト""" + process_id: int + operation_id: int + + +class ProcessOperationResponse(BaseModel): + """ProcessOperationレスポンス""" + id: int + process_id: int + operation_id: int + created_at: Optional[str] = None + + class Config: + from_attributes = True + + +@router.get("/process-operations", tags=["process-operations"]) +def get_process_operations( + process_id: Optional[int] = Query(None, description="Filter by process_id"), + operation_id: Optional[int] = Query(None, description="Filter by operation_id"), + limit: int = Query(1000, description="Limit number of results", ge=1, le=10000), + offset: int = Query(0, description="Offset for pagination", ge=0) +) -> List[ProcessOperationResponse]: + """ + ProcessOperation一覧を取得する。 + + Parameters: + - process_id: プロセスIDでフィルタリング(オプション) + - operation_id: オペレーションIDでフィルタリング(オプション) + - limit: 取得件数制限(デフォルト: 1000) + - offset: オフセット(デフォルト: 0) + """ + with SessionLocal() as session: + query = session.query(ProcessOperation) + + if process_id is not None: + query = query.filter(ProcessOperation.process_id == process_id) + if operation_id is not None: + query = query.filter(ProcessOperation.operation_id == operation_id) + + query = query.limit(limit).offset(offset) + results = query.all() + + return [ + ProcessOperationResponse( + id=po.id, + process_id=po.process_id, + operation_id=po.operation_id, + created_at=po.created_at.isoformat() if po.created_at else None + ) + for po in results + ] + + +@router.post("/process-operations", tags=["process-operations"]) +def create_process_operation( + data: ProcessOperationCreate = Body(...) +) -> ProcessOperationResponse: + """ + ProcessOperationを作成する。 + + Parameters: + - process_id: プロセスID + - operation_id: オペレーションID + """ + with SessionLocal() as session: + # プロセス存在確認 + process = session.query(Process).filter(Process.id == data.process_id).first() + if not process: + raise HTTPException( + status_code=404, + detail=f"Process with id {data.process_id} not found" + ) + + # オペレーション存在確認 + operation = session.query(Operation).filter(Operation.id == data.operation_id).first() + if not operation: + raise HTTPException( + status_code=404, + detail=f"Operation with id {data.operation_id} not found" + ) + + # 重複チェック + existing = session.query(ProcessOperation).filter( + ProcessOperation.process_id == data.process_id, + ProcessOperation.operation_id == data.operation_id + ).first() + if existing: + raise HTTPException( + status_code=409, + detail="ProcessOperation already exists" + ) + + # 作成 + po = ProcessOperation( + process_id=data.process_id, + operation_id=data.operation_id + ) + session.add(po) + session.commit() + session.refresh(po) + + return ProcessOperationResponse( + id=po.id, + process_id=po.process_id, + operation_id=po.operation_id, + created_at=po.created_at.isoformat() if po.created_at else None + ) + + +@router.get("/process-operations/{id}", tags=["process-operations"]) +def get_process_operation(id: int) -> ProcessOperationResponse: + """ + ProcessOperationを取得する。 + + Parameters: + - id: ProcessOperation ID + """ + with SessionLocal() as session: + po = session.query(ProcessOperation).filter(ProcessOperation.id == id).first() + if not po: + raise HTTPException(status_code=404, detail="ProcessOperation not found") + + return ProcessOperationResponse( + id=po.id, + process_id=po.process_id, + operation_id=po.operation_id, + created_at=po.created_at.isoformat() if po.created_at else None + ) + + +@router.delete("/process-operations/{id}", tags=["process-operations"]) +def delete_process_operation(id: int): + """ + ProcessOperationを削除する。 + + Parameters: + - id: ProcessOperation ID + """ + with SessionLocal() as session: + po = session.query(ProcessOperation).filter(ProcessOperation.id == id).first() + if not po: + raise HTTPException(status_code=404, detail="ProcessOperation not found") + + session.delete(po) + session.commit() + + return {"message": f"ProcessOperation {id} deleted successfully"} diff --git a/app/api/route/storage_v2.py b/app/api/route/storage_v2.py index 10f1db7..2506de7 100644 --- a/app/api/route/storage_v2.py +++ b/app/api/route/storage_v2.py @@ -6,11 +6,16 @@ import logging import os +import io +import json import tempfile import sqlite3 -from typing import Optional -from fastapi import APIRouter, Depends, Query, HTTPException -from fastapi.responses import PlainTextResponse, FileResponse +import zipfile +from datetime import datetime +from typing import Optional, List +from fastapi import APIRouter, Depends, Query, HTTPException, Body +from fastapi.responses import PlainTextResponse, FileResponse, StreamingResponse +from pydantic import BaseModel, Field from sqlalchemy.orm import Session from define_db.database import get_db @@ -22,6 +27,29 @@ router = APIRouter(prefix="/api/v2/storage", tags=["storage-v2"]) +# ==================== Request/Response Models ==================== + +class BatchDownloadV2Request(BaseModel): + """HAL対応バッチダウンロードリクエスト""" + run_ids: List[int] = Field( + ..., + min_length=1, + max_length=100, + description="ダウンロード対象のランIDリスト" + ) + + +class BatchDownloadV2Estimate(BaseModel): + """HAL対応バッチダウンロード推定サイズレスポンス""" + run_count: int + total_files: int + estimated_size_bytes: int + estimated_size_mb: float + can_download: bool + message: Optional[str] = None + runs_detail: List[dict] = [] + + @router.get("/list/{run_id}") def list_run_contents( run_id: int, @@ -152,27 +180,21 @@ def download_sql_dump( """ Run関連データのSQLiteダンプをダウンロード - ローカルモードのRunに対して、関連する全データを - 独立したSQLiteファイルとしてエクスポートする。 + 全ストレージモード(S3/local/hybrid)に対応。 + Run関連の全メタデータを独立したSQLiteファイルとしてエクスポートする。 含まれるデータ: - runs: 該当Run - processes: Run内のProcess - - operations: Process内のOperation + - operations: Process内のOperation(S3モードではlogフィールドは空の場合あり) - edges: Run内のEdge - ports: Process内のPort """ - # Runの存在確認とモードチェック + # Runの存在確認 run = db.query(Run).filter(Run.id == run_id).first() if not run: raise HTTPException(status_code=404, detail=f"Run {run_id} not found") - if run.storage_mode != 'local': - raise HTTPException( - status_code=400, - detail=f"SQL dump is only available for local mode runs. This run uses '{run.storage_mode}' mode." - ) - try: # 一時ファイルを作成 temp_file = tempfile.NamedTemporaryFile( @@ -344,3 +366,471 @@ def download_sql_dump( if 'temp_path' in locals() and os.path.exists(temp_path): os.unlink(temp_path) raise HTTPException(status_code=500, detail=f"Failed to create SQL dump: {str(e)}") + + +# ==================== HAL-based Batch Download ==================== + +MAX_BATCH_SIZE = 500 * 1024 * 1024 # 500MB + + +def _collect_all_files_recursive(hal: HybridAccessLayer, run_id: int, prefix: str = "") -> List[dict]: + """ + HALを使用してRun内の全ファイルを再帰的に収集 + + Args: + hal: HybridAccessLayer instance + run_id: Run ID + prefix: 検索プレフィックス + + Returns: + ファイル情報のリスト + """ + files = [] + items = hal.list_contents(run_id, prefix) + + for item in items: + if item.type == "file": + files.append({ + "path": item.path, + "size": item.size or 0, + "source": item.source.value if item.source else "unknown" + }) + elif item.type == "directory": + # ディレクトリは再帰的に探索 + sub_prefix = item.path if item.path.endswith('/') else item.path + '/' + files.extend(_collect_all_files_recursive(hal, run_id, sub_prefix)) + + return files + + +@router.post("/batch-download") +def batch_download_v2( + request: BatchDownloadV2Request = Body(...), + db: Session = Depends(get_db) +): + """ + HALを使用した複数ランの一括ダウンロード(Storage Browser相当のフォルダ構成) + + 全ストレージモード(S3/local/hybrid/unknown)に対応。 + HALを通じてStorage Browserと同じフォルダ構成でZIPを生成。 + + Args: + request: BatchDownloadV2Request(run_ids) + db: Database session + + Returns: + StreamingResponse: ZIPファイル + """ + if not request.run_ids: + raise HTTPException(status_code=400, detail="run_ids is required") + + hal = HybridAccessLayer(db) + + # ランを取得 + runs = db.query(Run).filter(Run.id.in_(request.run_ids)).all() + if not runs: + raise HTTPException(status_code=404, detail="No runs found") + + # ZIPをメモリ上で作成 + zip_buffer = io.BytesIO() + manifest_data = { + "generated_at": datetime.utcnow().isoformat() + "Z", + "runs": [], + "total_files": 0, + "total_size": 0, + "errors": [] + } + + try: + with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf: + for run in runs: + run_prefix = f"run_{run.id}/" + run_file_count = 0 + run_size = 0 + + try: + # HALを使用して全ファイルを再帰収集 + files = _collect_all_files_recursive(hal, run.id) + + for file_info in files: + file_path = file_info["path"] + file_size = file_info["size"] + + # コンテンツを読み込み + content = hal.load_content(run.id, file_path) + if content: + # ZIPに追加(run_{id}/相対パス の構造) + zip_path = run_prefix + file_path + zf.writestr(zip_path, content) + + run_file_count += 1 + run_size += len(content) + + # メタデータダンプも追加(オプション) + try: + dump_content = _generate_metadata_dump(db, run.id) + if dump_content: + zf.writestr(f"{run_prefix}_metadata.db", dump_content) + except Exception as e: + logger.warning(f"Failed to generate metadata dump for run {run.id}: {e}") + + manifest_data["runs"].append({ + "run_id": run.id, + "storage_mode": run.storage_mode, + "file_count": run_file_count, + "total_size": run_size + }) + manifest_data["total_files"] += run_file_count + manifest_data["total_size"] += run_size + + except Exception as e: + logger.error(f"Error processing run {run.id}: {e}") + manifest_data["errors"].append({ + "run_id": run.id, + "error": str(e) + }) + + # マニフェストを追加 + zf.writestr("manifest.json", json.dumps(manifest_data, indent=2, ensure_ascii=False)) + + # バッファを先頭に戻す + zip_buffer.seek(0) + + # ファイル名を生成 + timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S') + filename = f"labcode_runs_{timestamp}.zip" + + return StreamingResponse( + iter([zip_buffer.getvalue()]), + media_type="application/zip", + headers={ + "Content-Disposition": f'attachment; filename="{filename}"' + } + ) + + except Exception as e: + logger.error(f"Error creating batch download ZIP: {e}") + raise HTTPException(status_code=500, detail=f"Failed to create ZIP: {str(e)}") + + +def _generate_metadata_dump(db: Session, run_id: int) -> Optional[bytes]: + """ + メタデータダンプをバイト列として生成(SQLite形式) + + Args: + db: Database session + run_id: Run ID + + Returns: + SQLiteデータベースのバイト列 + """ + run = db.query(Run).filter(Run.id == run_id).first() + if not run: + return None + + # 一時ファイルに作成 + temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.db') + temp_path = temp_file.name + temp_file.close() + + try: + conn = sqlite3.connect(temp_path) + cursor = conn.cursor() + + # テーブル作成 + cursor.execute(''' + CREATE TABLE runs ( + id INTEGER PRIMARY KEY, + project_id INTEGER, + file_name TEXT, + checksum TEXT, + user_id INTEGER, + added_at TEXT, + started_at TEXT, + finished_at TEXT, + status TEXT, + storage_address TEXT, + storage_mode TEXT, + deleted_at TEXT, + display_visible INTEGER + ) + ''') + + cursor.execute(''' + CREATE TABLE processes ( + id INTEGER PRIMARY KEY, + name TEXT, + run_id INTEGER, + storage_address TEXT, + process_type TEXT, + FOREIGN KEY (run_id) REFERENCES runs(id) + ) + ''') + + cursor.execute(''' + CREATE TABLE operations ( + id INTEGER PRIMARY KEY, + process_id INTEGER, + name TEXT, + parent_id INTEGER, + started_at TEXT, + finished_at TEXT, + status TEXT, + storage_address TEXT, + is_transport INTEGER, + is_data INTEGER, + log TEXT, + FOREIGN KEY (process_id) REFERENCES processes(id) + ) + ''') + + cursor.execute(''' + CREATE TABLE edges ( + id INTEGER PRIMARY KEY, + run_id INTEGER, + from_id INTEGER, + to_id INTEGER, + FOREIGN KEY (run_id) REFERENCES runs(id) + ) + ''') + + cursor.execute(''' + CREATE TABLE ports ( + id INTEGER PRIMARY KEY, + process_id INTEGER, + port_name TEXT, + port_type TEXT, + data_type TEXT, + position INTEGER, + is_required INTEGER, + default_value TEXT, + description TEXT, + FOREIGN KEY (process_id) REFERENCES processes(id) + ) + ''') + + # データを挿入 + cursor.execute(''' + INSERT INTO runs VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + run.id, run.project_id, run.file_name, run.checksum, run.user_id, + run.added_at.isoformat() if run.added_at else None, + run.started_at.isoformat() if run.started_at else None, + run.finished_at.isoformat() if run.finished_at else None, + run.status, run.storage_address, run.storage_mode, + run.deleted_at.isoformat() if run.deleted_at else None, + 1 if run.display_visible else 0 + )) + + processes = db.query(Process).filter(Process.run_id == run_id).all() + process_ids = [p.id for p in processes] + + for p in processes: + cursor.execute(''' + INSERT INTO processes VALUES (?, ?, ?, ?, ?) + ''', (p.id, p.name, p.run_id, p.storage_address, p.process_type)) + + if process_ids: + operations = db.query(Operation).filter( + Operation.process_id.in_(process_ids) + ).all() + + for op in operations: + cursor.execute(''' + INSERT INTO operations VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + op.id, op.process_id, op.name, op.parent_id, + op.started_at.isoformat() if op.started_at else None, + op.finished_at.isoformat() if op.finished_at else None, + op.status, op.storage_address, + 1 if op.is_transport else 0, + 1 if op.is_data else 0, + op.log + )) + + ports = db.query(Port).filter( + Port.process_id.in_(process_ids) + ).all() + + for port in ports: + cursor.execute(''' + INSERT INTO ports VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + port.id, port.process_id, port.port_name, port.port_type, + port.data_type, port.position, + 1 if port.is_required else 0, + getattr(port, 'default_value', None), + getattr(port, 'description', None) + )) + + edges = db.query(Edge).filter(Edge.run_id == run_id).all() + for e in edges: + cursor.execute(''' + INSERT INTO edges VALUES (?, ?, ?, ?) + ''', (e.id, e.run_id, e.from_id, e.to_id)) + + conn.commit() + conn.close() + + # ファイルを読み込み + with open(temp_path, 'rb') as f: + content = f.read() + + return content + + finally: + # 一時ファイルを削除 + if os.path.exists(temp_path): + os.unlink(temp_path) + + +@router.post("/batch-dump") +def batch_dump_metadata( + request: BatchDownloadV2Request = Body(...), + db: Session = Depends(get_db) +): + """ + 複数ランのメタデータダンプを一括ダウンロード + + 各ランごとに個別の.dbファイルを生成し、ZIPにまとめて返却。 + ファイル実体なしでメタデータのみダウンロードしたい場合に使用。 + + Args: + request: BatchDownloadV2Request(run_ids) + db: Database session + + Returns: + StreamingResponse: ZIPファイル + """ + if not request.run_ids: + raise HTTPException(status_code=400, detail="run_ids is required") + + runs = db.query(Run).filter(Run.id.in_(request.run_ids)).all() + if not runs: + raise HTTPException(status_code=404, detail="No runs found") + + # ZIPをメモリ上で作成 + zip_buffer = io.BytesIO() + manifest_data = { + "generated_at": datetime.utcnow().isoformat() + "Z", + "type": "metadata_dumps", + "runs": [], + "errors": [] + } + + try: + with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf: + for run in runs: + try: + dump_content = _generate_metadata_dump(db, run.id) + if dump_content: + zf.writestr(f"run_{run.id}_dump.db", dump_content) + manifest_data["runs"].append({ + "run_id": run.id, + "storage_mode": run.storage_mode, + "dump_size": len(dump_content) + }) + except Exception as e: + logger.error(f"Error generating dump for run {run.id}: {e}") + manifest_data["errors"].append({ + "run_id": run.id, + "error": str(e) + }) + + # マニフェストを追加 + zf.writestr("manifest.json", json.dumps(manifest_data, indent=2, ensure_ascii=False)) + + zip_buffer.seek(0) + + timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S') + filename = f"labcode_metadata_dumps_{timestamp}.zip" + + return StreamingResponse( + iter([zip_buffer.getvalue()]), + media_type="application/zip", + headers={ + "Content-Disposition": f'attachment; filename="{filename}"' + } + ) + + except Exception as e: + logger.error(f"Error creating batch dump ZIP: {e}") + raise HTTPException(status_code=500, detail=f"Failed to create ZIP: {str(e)}") + + +@router.post("/batch-download/estimate", response_model=BatchDownloadV2Estimate) +def estimate_batch_download_v2( + request: BatchDownloadV2Request = Body(...), + db: Session = Depends(get_db) +): + """ + HAL対応バッチダウンロードの推定サイズを取得 + + Args: + request: BatchDownloadV2Request(run_ids) + db: Database session + + Returns: + BatchDownloadV2Estimate + """ + if not request.run_ids: + return BatchDownloadV2Estimate( + run_count=0, + total_files=0, + estimated_size_bytes=0, + estimated_size_mb=0.0, + can_download=False, + message="run_ids is required" + ) + + hal = HybridAccessLayer(db) + runs = db.query(Run).filter(Run.id.in_(request.run_ids)).all() + + if not runs: + return BatchDownloadV2Estimate( + run_count=0, + total_files=0, + estimated_size_bytes=0, + estimated_size_mb=0.0, + can_download=False, + message="No runs found" + ) + + total_size = 0 + total_files = 0 + runs_detail = [] + + for run in runs: + try: + files = _collect_all_files_recursive(hal, run.id) + run_size = sum(f["size"] for f in files) + run_files = len(files) + + total_size += run_size + total_files += run_files + + runs_detail.append({ + "run_id": run.id, + "storage_mode": run.storage_mode, + "file_count": run_files, + "estimated_size": run_size + }) + except Exception as e: + logger.warning(f"Error estimating run {run.id}: {e}") + runs_detail.append({ + "run_id": run.id, + "error": str(e) + }) + + can_download = total_size <= MAX_BATCH_SIZE + message = None if can_download else f"Estimated size ({total_size // (1024*1024)}MB) exceeds limit (500MB)" + + return BatchDownloadV2Estimate( + run_count=len(runs), + total_files=total_files, + estimated_size_bytes=total_size, + estimated_size_mb=round(total_size / (1024 * 1024), 2), + can_download=can_download, + message=message, + runs_detail=runs_detail + ) diff --git a/app/main.py b/app/main.py index 7d9e6b7..ecc507b 100644 --- a/app/main.py +++ b/app/main.py @@ -1,5 +1,5 @@ from fastapi import FastAPI -from api.route import users, projects, runs, processes, operations, edges, ports, storage, storage_v2 +from api.route import users, projects, runs, processes, operations, edges, ports, storage, storage_v2, process_operations from fastapi.middleware.cors import CORSMiddleware app = FastAPI() @@ -24,5 +24,6 @@ app.include_router(edges.router, prefix="/api") app.include_router(ports.router, prefix="/api") app.include_router(storage.router, prefix="/api") +app.include_router(process_operations.router, prefix="/api") # HAL (Hybrid Access Layer) を使用した新API app.include_router(storage_v2.router) diff --git a/app/services/hal/hybrid_access_layer.py b/app/services/hal/hybrid_access_layer.py index 72cf8e5..2fae861 100644 --- a/app/services/hal/hybrid_access_layer.py +++ b/app/services/hal/hybrid_access_layer.py @@ -120,7 +120,7 @@ def _infer_storage_mode(self, run) -> StorageMode: return inferred - def _do_infer_storage_mode(self, run) -> StorageMode: + def _do_infer_storage_mode(self, run, persist: bool = True) -> StorageMode: """実際の推論処理 推論優先順位(重要): @@ -130,6 +130,10 @@ def _do_infer_storage_mode(self, run) -> StorageMode: この優先順位は、レガシーデータ(storage_mode=null)が S3に保存されている可能性が高いことに基づいています。 + + Args: + run: Runエンティティ + persist: TrueならDBに永続化する(デフォルト: True) """ from define_db.models import Operation, Process @@ -140,7 +144,10 @@ def _do_infer_storage_mode(self, run) -> StorageMode: result = s3_backend.list_objects_with_dirs(storage_address) has_s3_files = bool(result.get('contents', [])) if has_s3_files: - return StorageMode.S3 + inferred = StorageMode.S3 + if persist: + self._persist_inferred_mode(run, inferred) + return inferred except Exception as e: logger.debug(f"S3 check failed for Run {run.id}: {e}") @@ -152,11 +159,100 @@ def _do_infer_storage_mode(self, run) -> StorageMode: ).first() is not None if has_db_logs: - return StorageMode.LOCAL + inferred = StorageMode.LOCAL + if persist: + self._persist_inferred_mode(run, inferred) + return inferred # Step 3: どちらにもデータがない場合 return StorageMode.UNKNOWN + def _persist_inferred_mode(self, run, mode: StorageMode): + """推論結果をDBに永続化する""" + try: + run.storage_mode = mode.value + self._db.commit() + logger.info(f"Persisted inferred storage_mode for Run {run.id}: {mode.value}") + except Exception as e: + logger.warning(f"Failed to persist storage_mode for Run {run.id}: {e}") + self._db.rollback() + + def batch_infer_storage_modes(self, runs: list) -> Dict[int, StorageMode]: + """ + 複数RunのストレージモードをバッチInfer + + パフォーマンス最適化: + 1. S3存在確認は個別に実行(list_objects APIの制限) + 2. DBクエリをIN句で一括取得 + + Args: + runs: Runエンティティのリスト + + Returns: + run_id -> StorageMode のマッピング + """ + from define_db.models import Operation, Process + + result: Dict[int, StorageMode] = {} + runs_to_infer = [] + + # Step 1: キャッシュチェック & 明示設定済みの除外 + for run in runs: + if run.storage_mode: + result[run.id] = StorageMode.from_string(run.storage_mode) + elif run.id in self._inferred_mode_cache: + result[run.id] = self._inferred_mode_cache[run.id] + else: + runs_to_infer.append(run) + + if not runs_to_infer: + return result + + # Step 2: S3一括確認(各prefixを確認) + s3_exists: Dict[int, bool] = {} + try: + s3_backend = self._get_s3_backend() + for run in runs_to_infer: + prefix = run.storage_address or f"runs/{run.id}/" + try: + res = s3_backend.list_objects_with_dirs(prefix) + s3_exists[run.id] = bool(res.get('contents', [])) + except Exception: + s3_exists[run.id] = False + except Exception as e: + logger.warning(f"S3 batch check failed: {e}") + for run in runs_to_infer: + s3_exists[run.id] = False + + # Step 3: DB一括確認(IN句で効率化) + run_ids = [r.id for r in runs_to_infer] + db_log_exists = set( + row[0] for row in self._db.query(Process.run_id).join(Operation).filter( + Process.run_id.in_(run_ids), + Operation.log.isnot(None), + Operation.log != '' + ).distinct() + ) + + # Step 4: モード判定 & キャッシュ更新 + for run in runs_to_infer: + if s3_exists.get(run.id): + mode = StorageMode.S3 + elif run.id in db_log_exists: + mode = StorageMode.LOCAL + else: + mode = StorageMode.UNKNOWN + + result[run.id] = mode + self._inferred_mode_cache[run.id] = mode + + # UNKNOWNでなければDBに永続化 + if mode != StorageMode.UNKNOWN: + self._persist_inferred_mode(run, mode) + + logger.info(f"Batch inferred storage_modes for {len(runs_to_infer)} runs") + return result + def _get_file_backend(self, mode: StorageMode): """モードに対応するファイルバックエンドを取得(レジストリパターン)""" return self._get_backend(mode.value) @@ -574,6 +670,13 @@ def get_storage_info(self, run_id: int) -> StorageInfo: is_hybrid = has_s3_data and has_local_data + # アクセス可能性を判定 + is_accessible = has_s3_data or has_local_data + + # ハイブリッドモードの場合はStorageMode.HYBRIDを使用 + if is_hybrid: + mode = StorageMode.HYBRID + if mode == StorageMode.UNKNOWN: # 推論してもUNKNOWNの場合: 警告付きで返却 return StorageInfo( @@ -587,20 +690,29 @@ def get_storage_info(self, run_id: int) -> StorageInfo: }, warning="Storage mode is not set and could not be inferred. Data may not be displayed correctly.", is_hybrid=is_hybrid, + is_accessible=is_accessible, s3_path=s3_path, local_path=local_path ) + elif mode == StorageMode.HYBRID: + # ハイブリッドモード: S3とDBの両方にデータあり + full_path = f"hybrid://{run.storage_address or f'runs/{run_id}/'}" + data_sources = { + "logs": "hybrid", + "yaml": "s3", + "data": "hybrid" + } elif mode == StorageMode.S3: full_path = s3_path or f"s3://labcode-dev-artifacts/{run.storage_address}" data_sources = { - "logs": "s3" if not has_local_data else "hybrid", + "logs": "s3", "yaml": "s3", "data": "s3" } else: full_path = local_path or f"db://sqlite/runs/{run_id}/" data_sources = { - "logs": "database" if not has_s3_data else "hybrid", + "logs": "database", "yaml": "database_or_none", "data": "database_or_none" } @@ -612,6 +724,7 @@ def get_storage_info(self, run_id: int) -> StorageInfo: data_sources=data_sources, inferred=is_inferred, is_hybrid=is_hybrid, + is_accessible=is_accessible, s3_path=s3_path, local_path=local_path ) diff --git a/app/services/hal/models.py b/app/services/hal/models.py index 715daee..6453a39 100644 --- a/app/services/hal/models.py +++ b/app/services/hal/models.py @@ -13,6 +13,7 @@ class StorageMode(Enum): """ストレージモード""" S3 = "s3" LOCAL = "local" + HYBRID = "hybrid" # S3とDBの両方にデータがある場合 UNKNOWN = "unknown" # storage_mode=nullの場合 @classmethod @@ -81,6 +82,7 @@ class StorageInfo: warning: Optional[str] = None # 警告メッセージ(UNKNOWNモード時など) inferred: bool = False # モードが推論されたかどうか is_hybrid: bool = False # ハイブリッドモードかどうか(S3+DB両方にデータあり) + is_accessible: bool = True # ストレージにアクセス可能かどうか s3_path: Optional[str] = None # S3パス(ハイブリッド時) local_path: Optional[str] = None # ローカルパス(ハイブリッド時) @@ -90,7 +92,8 @@ def to_dict(self) -> Dict[str, Any]: "mode": self.mode.value, "storage_address": self.storage_address, "full_path": self.full_path, - "data_sources": self.data_sources + "data_sources": self.data_sources, + "isAccessible": self.is_accessible } if self.warning: result["warning"] = self.warning From 33c8e7426b6f470e4ea7d3a10b7847e30384a852 Mon Sep 17 00:00:00 2001 From: Ayumu-Nono Date: Wed, 24 Dec 2025 09:30:17 +0900 Subject: [PATCH 3/6] save --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bbe810e..14b478f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ .venv __pycache__ *.db -.env.env.s3.backup +.env # Backup files (not to be tracked) app/scripts_backup_local/ From 1ef6e8fa406cac3602ff55e94b8feb47459b64d5 Mon Sep 17 00:00:00 2001 From: Ayumu-Nono Date: Wed, 24 Dec 2025 16:07:08 +0900 Subject: [PATCH 4/6] feat: Add admin API endpoints - GET /users/list - List all users - GET /users/{id}/projects - Get user's projects - GET /projects/list - List all projects with owner info - Add ProjectResponseWithOwner response model --- app/api/response_model.py | 22 +++++++++++++++++++- app/api/route/projects.py | 41 +++++++++++++++++++++++++++++++++++-- app/api/route/users.py | 43 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 101 insertions(+), 5 deletions(-) diff --git a/app/api/response_model.py b/app/api/response_model.py index 5cc9004..81eb93c 100644 --- a/app/api/response_model.py +++ b/app/api/response_model.py @@ -234,4 +234,24 @@ class ProcessListResponse(BaseModel): - items: プロセスリスト(ProcessResponseEnhanced) """ total: int - items: List[ProcessResponseEnhanced] \ No newline at end of file + items: List[ProcessResponseEnhanced] + + +# ============================================================ +# Admin API用の新規レスポンスモデル +# ============================================================ + +class ProjectResponseWithOwner(BaseModel): + """プロジェクト情報(オーナー情報含む)のレスポンスモデル + + 管理画面のプロジェクト一覧で使用。 + オーナーのメールアドレスを含む。 + """ + model_config = ConfigDict(from_attributes=True) + + id: int + name: str + user_id: int + owner_email: Optional[str] = None + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None \ No newline at end of file diff --git a/app/api/route/projects.py b/app/api/route/projects.py index e7a6f50..13f14df 100644 --- a/app/api/route/projects.py +++ b/app/api/route/projects.py @@ -1,14 +1,51 @@ from define_db.models import Project, User from define_db.database import SessionLocal -from api.response_model import ProjectResponse -from fastapi import APIRouter +from api.response_model import ProjectResponse, ProjectResponseWithOwner +from fastapi import APIRouter, Query from fastapi import Form from fastapi import HTTPException +from sqlalchemy.orm import joinedload +from typing import List import datetime as dt router = APIRouter() +# ============================================================ +# Admin API: プロジェクト一覧取得(オーナー情報含む) +# ============================================================ + +@router.get("/projects/list", tags=["projects"], response_model=List[ProjectResponseWithOwner]) +def list_all( + limit: int = Query(default=100, ge=1, le=1000, description="Maximum number of projects to return"), + offset: int = Query(default=0, ge=0, description="Number of projects to skip") +): + """ + 全プロジェクト一覧を取得(オーナー情報含む) + + 管理画面のプロジェクト一覧表示で使用。 + オーナーのメールアドレスを含む。 + ページネーション対応。 + """ + with SessionLocal() as session: + projects = session.query(Project).options( + joinedload(Project.user) + ).offset(offset).limit(limit).all() + + result = [] + for p in projects: + resp = ProjectResponseWithOwner( + id=p.id, + name=p.name, + user_id=p.user_id, + owner_email=p.user.email if p.user else None, + created_at=p.created_at, + updated_at=p.updated_at + ) + result.append(resp) + return result + + @router.post("/projects/", tags=["projects"], response_model=ProjectResponse) def create(name: str = Form(), user_id: int = Form()): with SessionLocal() as session: diff --git a/app/api/route/users.py b/app/api/route/users.py index af0ded1..f42f8ee 100644 --- a/app/api/route/users.py +++ b/app/api/route/users.py @@ -1,8 +1,8 @@ from define_db.models import User, Run, Project from define_db.database import SessionLocal -from api.response_model import UserResponse, RunResponseWithProjectName +from api.response_model import UserResponse, RunResponseWithProjectName, ProjectResponse from services.hal import batch_infer_storage_modes -from fastapi import Form +from fastapi import Form, Query from fastapi import APIRouter from fastapi import HTTPException from sqlalchemy.orm import joinedload, selectinload @@ -11,6 +11,45 @@ router = APIRouter() +# ============================================================ +# Admin API: ユーザー一覧取得 +# ============================================================ + +@router.get("/users/list", tags=["users"], response_model=List[UserResponse]) +def list_all( + limit: int = Query(default=100, ge=1, le=1000, description="Maximum number of users to return"), + offset: int = Query(default=0, ge=0, description="Number of users to skip") +): + """ + 全ユーザー一覧を取得 + + 管理画面のユーザー一覧表示で使用。 + ページネーション対応。 + """ + with SessionLocal() as session: + users = session.query(User).offset(offset).limit(limit).all() + return [UserResponse.model_validate(u) for u in users] + + +# ============================================================ +# Admin API: ユーザーのプロジェクト一覧取得 +# ============================================================ + +@router.get("/users/{id}/projects", tags=["users"], response_model=List[ProjectResponse]) +def read_user_projects(id: int): + """ + ユーザーのプロジェクト一覧を取得 + + 指定されたユーザーが所有するプロジェクトの一覧を返す。 + """ + with SessionLocal() as session: + user = session.query(User).filter(User.id == id).first() + if not user: + raise HTTPException(status_code=404, detail="User not found") + projects = session.query(Project).filter(Project.user_id == id).all() + return [ProjectResponse.model_validate(p) for p in projects] + + @router.post("/users/", tags=["users"], response_model=UserResponse) def create(email: str = Form()) -> User: with SessionLocal() as session: From 5530a252bd35a542d39694b43ada22c664487695 Mon Sep 17 00:00:00 2001 From: Ayumu-Nono Date: Wed, 24 Dec 2025 22:12:57 +0900 Subject: [PATCH 5/6] ignore db file --- .gitignore | 1 + data/sql_app.db.backup_20251120_164403 | Bin 81920 -> 0 bytes data/sql_app.db.backup_20251121_142331 | Bin 86016 -> 0 bytes 3 files changed, 1 insertion(+) delete mode 100644 data/sql_app.db.backup_20251120_164403 delete mode 100644 data/sql_app.db.backup_20251121_142331 diff --git a/.gitignore b/.gitignore index 14b478f..a8fe400 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__ *.db .env +*.db* # Backup files (not to be tracked) app/scripts_backup_local/ diff --git a/data/sql_app.db.backup_20251120_164403 b/data/sql_app.db.backup_20251120_164403 deleted file mode 100644 index e7fe624d64d2edfb3a71f255b4b3f6032ab0b2bf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 81920 zcmeHwdu$xZdFKqt;q(k=rlpm%T)n8_K16XT4*UJ!?uw!$>Md$l;;#429g!o78!d+> zhl)2FU)Z3J*$ml&7Ca1Ob<*a3pX$R$C7%OB1L$>HvD59EvuF2NmwixFRfB)A-m za}wur_p9#d?&|JhcV}(kc7u{(ZK*0E;PxL=80-$eYJUMacSYg!j0zQ^`++GvsbU4Y*s6qTiw??E4}4ivTv{7 z?JRG!-{>@-o4;}P;{1&hif&9sYFN3`S^2K*oi}zbv9sOT+D$-vb+xm)j81tDDlc8S zwh*a%yVu_8Me~W$x7Rn;x9=pMqj+y;J5WTHqMB~^x?AnF&N5_fb+*HD2}rA*yPbsC z)%ESoyX`lZ@2zjIgYD*->({R?%r7?2Eu5c!_Uckoj%fb$jZ4?&Z@kdFvhYIl#QN$a zQTzP$8w-~%ERts@Lf$8vHx|w>+*nvVyKuAFe~cDm{HD)i zw9Mq&y=r-Mdb;@O!|uS1a~l8mp2&cX=cWv9uW6X$KB^J3NR&!u5mqlhdwucd(v5jc zHqD)l^^fj!s2r+>W@6Oz`x-rB@W|``g!yv|R~Inzot?jV zcK+N#e9T3KMTQX}HE%UB9cg9Ks;N%SjSP?8oGupJ0kZwkyD-Vi?VVoN{daP(%JG)o z`(kByR2?n8NsqAYWy~2Xn9lkGds}4nSh|$oMS(Z`>Et_O)pWJrg)DyiA zKTNYHePx&(#)Xt_PuwECH#b9*6Q9Rd+r9R3q(ou{-|TL$lPQm>#5yLz&PR9Fw>qoA zEKb|R)z0nq&fVVfz4qOmPIGDD`6bemRy*4(TkD%{t^Tu&{_mPbuVGzv&t6M1UB%LVP@R0cT`G@`j~72( zcbN+-3{%4q4#VduM22n791j`evAxyVpdCcQ#Q!J93?sL<)!uL?Cg#vZ0Wv!<^X_)n zg29~C6GFx!?bUzWA0_cFvFS4v`3>DdaO$d)&gM{g^w_cD+cTc^_eWk(c`4Gu!Q+$$ zPH(}Kag^I7&LS*r=u{^^Sso~l9z9xo8*KyM8IZ`LW844lU>vZU`@Zp}tv6P-I_;Qi zAMx9FHqn2gD~m{e77tlIdavltjLrp@2-kae{~Cx;;66&h-+80Gem7X>=&^N9T*vAp zS(}%{dj;{&#orVEmH1;w@;{sb&H!hCGr$?(3~&ZG1DpZQ0B3+Rz!~5Sd`B2KS{f|O zx8K}(W2d#z-RPd)URm#KtaPT^>#MDm?i-H|4HmAGjA^%wDygT}TAh`Z*3NXV`(1B# zTkVzB>zjuL2MbSxl_-*OdX239Ne%l41`9KtkG0>}yxTeL?g@~mdy9jG$vfRO>JeH0 z3-zxQ#9tM=;_=2GG=8vgxlzIc{)aQb8Q=_X1~>zp0nPwtfHS}u;0$mEK2Qvt6^08( zi}UkX{pqqSnr6{%%dl4Y5@ zQSOU9u83(088(yXM+YGzBdG*z~D z%TeV^L#V3~uPdNI(wYFKDYJ^zGIUTxRCB3|<=B#OVQ{$c7}V-@wh^W3#VGW3l3b>x zSq)?>EmP5TP4y~D=IF+S0k0ym2=^-*h!xjnWus-wpt}z)sL0QeCFOh(6#*QrQM4Y} zoRv_IY8i^PTaKpI8#9I4(7Ofkp`mx{=W8#IY*jy9`MmJu@^6;!4gYZI=RQ!Koh!o` z;0$mEA`Ccd1C2su@){jiTk9KZ&aGO#P&|4KhR8)K5Z#H<&%zjY3Sjf>mL!?@tp`7F z7u+1v(528C_8kdjR+m~5?4+gNXOJTikmbA}V@`_#H^FYnFrr4ky*!W>d>N8s8MM%?YTNS!rPq*;?O8lBfIbA97S+)i6r@ zK@#K;pIWs|#k{Y*#JN*;vHCOH-xq64ZEaT4TdJ-omXypfFt#np4qWp-RBcw#TC$;N z`u!ac&Yj`!Bvf@)kz1yOF&=mKk)yQGrkW8aK_$H&t*ROKd4gLj0qR81gZsm^Z?AIp z-(X3ARfpZuRZT&=y2}?T2Y3`)Rxyy{IT)*!WGJ?DfBWy+Pz32f#4=|gjW3IIvIy05 ziL_`G4|3#|Y+IV%@4f*CI$_6F&=a)#i}dzjo%TT@K>xlj2$t+t%Y*@rHz`@n+bzw4 zb%Ko|YgHeps?18T&bEpb@qGmf&+AVj-q%0We>j~YT67}tDkCkL)petdBg5rTTB@qa zX5VW1%|>$6mSk9l8uK=hqbb;y&}`efuMJukf4*?K^o4@>w)mnrD;{k8W#hLRKi=4A zTy0D?3ibb7|C#z*^%v?!y@2fvu z{Y3SZs#Yyme!ubymCsgQugq8W3ttz0UHA#%yM;wzy!?&wzbSvQ{7(6$a;sb%esB1f zhJSeY&hQh%jnW^LexvjS@pnu7!ngMslgrNY6Y=^g*A&GgUY&A=Fu zqtu=48N+DKmkOgr(>*H8ETK=HE)^z=Ci!t^`EL8xcjGXk%q*%+&6Nt%MH46CaP!8k z?e5mCHp0@_h1IE3Zeeo!MQ39hRpUti`YOAyGVxRiJLo3vq_~HT@4Glj$1W(3KS@Cz zVL_IfPlS+JC3JgomI0Yr)S9xHkeP+eDT@i2S=g8`6Ofq&^>KrOTwy@gwWc0IW|mMV zH3np6QDsVHLS`10rxYe+W?^YUPC#ZA)a-GIf-Eo~YgTh6gv=~qPPQ13nMIAMX(nW5 zVSVZ(6Ed@~HZhfe%q*ymKTbg|vmh(Y$q+KLggkkI0hw7;nwnrjW))WLspCw@%)-{h zV+qL2g68<66y#wBWYuUs5<+H{&?g^eKxP)zrpB3&nT6G)P5#pW?^Y!Ujj0#plpvnKtUd4K$flM-VidgggLo~0hw9U zm=c+gnT7SK1`{%~ur^UoKxP(H$7>Yi0TyJXITAu&1X;jFo`4f;+8nGYcbMpBMtxiXL^l7zMS- zL1YcA8usIPfExe690^psEOYa|F`&;;-86MPx%4Q;tX&GI0Kvk&H!hC zGr$?(3~&ZG1DpZQ0B7KX&cMiZXjpv`e^n;%m#~kV|6i${EQsF}|3ds@@xO{+75|<1 zuJ~)>m&KnEKQI1>_!;pZi9PXk@ip-|@v3-6w8WP9sCY=+BMvwIw(-9j|D*BW8eeVv z+s0QKztZ@*#!ol?N#h3^?=-%rvDvuYc)4+-aj|i#p*0?FG#d{#YK?*VU)BGj{R+h;X#M-^pQzuf->u)OKVM(0pR3Q-rTX#uk@^F5q4syRZ`A%2 z(FgCrr)LyME)h^dg*9^eme>el20nPwtfHS}u z;0$mEI0Kx4?+^pRN(o;FE7o3lO}OQyR$dj_{bwt}YhLQsE5fpudij>{s+VfNEWF~S zUTO<3d#UA@g_nZVi*9OU=_A4mUh2hXh3EUvUKBp!rCwMPp7T;SUl5-4QqSKMmb}!B z=Y^YI>baZ3jUe@mo2pz{6rT1{&s-I*_n$o@EPAP@uL#$?)a9pzt6u8*W#NjKx^!K* z?4_<<7A^&;i*8CdvmjjXQWwt(=ljnt3JYH9!WrS5mzuvIob^)Y=LN*|fl1-?c>&>j zBz10HK$u=Xg(y6v%J!22LhMLtZdO42ocnA}K!6-cow5bQ!I6}8NZcGygw)X3xPVw8l6v%*fS4fn*`oqNen{$(F#+*B zBz5!=0l_*X_3%*v(KsY^L~pF->mQiBH`5)c?eQimTD5cT3dJ1iiq zg`^H05D>jWQu_}Hh*BY`gZl*pp^((Rg90Kfz&lR~%rYs23hexmgGp>GY14o(kTK(XH|eqZrPTs`m$_#glC zoo2u>u^&^E3pn0u3xac?erM;6_J$WOyX;1+Y(nLPZw!%Y- zK}bpX<&hcLzV3gUrkNDi3=W?(@g=@c#FB(V&k*j5a8@3G=C8P#R|IFi5|7(iMj(y6 z91Vt9Uf+lvO~X-Yaxz(GAF{@AHce9X@F*7(j}_v{t3)bF0QChsYkPsR?W0@};t@w4 z%7j=ZP92BIbyGI;uQA5{%HGn%?tHP=!sih zEQy@nO6K5btci^9$Q=`b9wP8WWGINs1iWgUBMm4x+E?(c5jWH00*^|*x53| zu+*}3oFmN8PYd;PC1j`<)auxE^bxhMo-(5+^gg zc}r3yB}08BRG%#}1A%*S0}FOowlBY3u=~DVA)S3k z4IBZvFp!wzU62W}iJ6~NiYvox16NfgVJeuH3`q)aiePm&%8uvPA`7T5*;&Tb3KiGB z5ukH9O-lj_<^#;IvL5T3fRBMHYo=tTk15KC=TspBVIbByI_z6FQxHBr%(8Bzm2i82 zrdsjo0y*&QblEhhlk==rqRex)aieQ>4wro62L0YDF^n5wQNZtIwm39jX~XB!s^5cqnyvM(e*nzxi7#qY((h;Nkfy+4L#Vz zO+ySjKxP$9v6XZ?K$+vDv<$KX3H71Z3i}JIg`wXTPc=SS|D*bu+N&eI>JL=DS{V~w zF8@=U)qkz@y@U4#eyI3f@i+=D4t%-ze~Vu%elh>z*O{B}dn8%b9zD$NA5nsEpQ2{N zBQ$J1wst^{j{6jBRaVv1@uX6gk}WcgC0dL#eH;iTd}nLqP!~sDdt2ng>D^w;z!Aj_ zGHcliBL^*ONLZDpD^A(Ih@=dCaVe%(yJJ2EeM2|vV6uZG*@kDXSf+oolkb@mI11pH zxOydBY08!*#bs&ki*QyRh34T#!hwXZk;~1}aPx}>|3k-RRWYQ4h z5~(OD!Oq$vKpAc%?9cDvB+!r*fsOlJW0N0YK*mM_+B1CvNTqCKel@a?`l2yQ+|EB! zyx)?Jx*vRoi=jrk$sjvdn0!qWet48ml0&RGE?P^UTNG+h5f_%m4K2WFKMbwIO_qH* zUBIZyXlQJu!0Ax!*gdZ#$82emrkWXDo$@?#>X3$!mg|g-`)(-a{*OPzazja1RdKJ8 zEy=OwAUPPunu^&zee$8ak8D4buGTry^!-pM_|_=H4<&fWwRaXK^d$FKAYva+O5es( zDHkKt5he2cS{?fs`bfCR@}M`hrfjkhaG2mQ^BGBP;U!vzsZkeyvHT%5;aWuGWwd=D z!OjA6L)Y0WnhWCZldCb<@BvlH8D2h`#M<&&>LMbj(RjQuk4Qx z^@Q$Zz!^HOoJ{6mp2kWmWBXc3_$>K+QPO980gibDT8GwuU(y^WwIll+xa~0RYoS`Q zUuxj?@6-XPP%cYSTn68P2xsLmG!L!6nAh+|@nDkGRm|cTmN7ge2iq@_f|({A59Q6| zRU!=~E!bH*1eBrmZ{#<-37o`QnYaZ#zMl%5Ik#0d0%0_2fIJ#Qr$S7^$_A)FS^uw%;Iag&djMg+XiMscfWiu4Y0%uw{ z(r$jOj{P9|NNC~qWLh}FCI+!?;`LFYKM=Fr>mc<{2^BV zx9|;shlfvsh;gPZAP9GN zjz{)fl8S>}S+Q{2d!m=%^@3zcX8Kx1r%Y}{QkK4c?T%?e-_XB%Ea`A1RKur0aJw^Z z=T7F3a05$M(tT>3GN4Iu8JbJJ2xrBB=AnN#p4YXCX|0lFp=6?m#~T4Tc-JUPc=eJx zmFSc?&8tKzN=mS^rUPZ@-!=0)R!N)^uHUv5T)`dfY!IhlzaD-=x~0%5vz%XzETq0@ zr>y~L=-(a7>02ehkR1%kBJm(`n2LeXhOO|7?x|c4qL^~(kbyCf>x`+iZF`hy+t3_V z3-1Bt@RI_4+vZ*?jsOiQx8C#AT zWwG-r!9qz?BH^C2U}xrz^Pf7n6U1QeN~6NcS2;FjjzDoj(6+lp)WlLl#Ez%?~2i8GD?z zaAUNEqdO$|aE77Azdbh>pQ-Q^szp zNzJ*TNT+>0GKa`H$8SPC>^WjfSNB7aDaFY)uz_v*^jEzmw(~)C+2L?5mMEL}QHUgtj^ftCfPU z!9;n++ilE->Dne-o;*81e%qrhJsBUY(MHE_XY^idwt9ownZ6A6!yOb z-QAQ#FUAg{M4}4Pd8b^Kh@>oi{n{P#67&uIV>#)tCY449#vlJp5!G49v_c%pbn|Ue z4oFg5hUS3?XXPR^5B=lmysl}0$Nz2{Li-TJgBO#@9Bj~G|3}Ga1SQ-u$g9LID821w zyq@+2J8Kt!GW3s6=66gJXs|Vc&o1JfeKN-c1LS|{osDv9@~e@76svnrw9`Hhq@jO2 zmD8>yz~Gz5h_KN7fE;vBzoGF#PQ>}*h)X(5${)(9Lk7k`t~0hk+cqg?+k8E-w8yy? z3p0v8D+Dzr!ag6`&5>l`^p+qN0XKaTQfD2S!$zeA;Ru$OOWvX-&O zW>B^_&9?dK1+_Z%S@e<6KW?SjwtzFdE0S=wBfgw>BZ3gshZSRLe=sOZ%#fGS_M|Oc z8P41p=pK5K)1g~O8=6FulL2DlJA=v2fG`b&w8~lPQ>K|MG6O*%#+jZ6g3z{|j98qc z;^+%_wF=_ny_ify_*R*oZYB)M1&&C{($}xuF;7F^(6&t_jZH!|@}*94pd`sQrWM&z z@q#10vrq;(DK10vK!me02hBswTiT7VPH5h!)k#0^nF)G3{Gn$h!;W>54!syE->;L_Qt%CSqL#p4aeP!hD zM$T0KS#<#K|F!bs@Q;?hR(iO&Z|Lp9*5FSK{BrR>e*3F^XY4iFnx2VT)8K2<*cK#b z*WnTL=b^6GUI`*q*_Rw6NW|)tpQhLimcMV_v zgz8$DoA5Eu^zA)6F$3jTiiNuSGMtr_$P_e}cl0L#VLnF4mA@@ZeIcAgKH)p)8B?lF zc{6#HV0YROr7zf7yA_#&PURo_37jNa6(7Q7H5~Rk$yPx*QGPYDkouyXb~`c! zJ(Y9#Ct;k-)hj?z=f7woLP0Fx5@_J*BDe)-RFXM( zF9_E#Y~&t|&Z$%+dzd^t_Vl$V303#PA( zLs}MPWxnm{H!9Z|`v~oiJ(={!yw=CMP{jou!Onev09{oz$?XiO)0ssXkSu@96X;j# z9C?nm4^JfQgO3Nta1a=h>_|AkfTK|vum3H|U(B!%o;<%+$9@)lBy<;NGu%ZF9L5rE zmBBd?YP#}r(Dzh1%N$#jHDk!j=-&wO@MJi1OVB;EZ8qD`B$OmuV~BO~(2+40VJb3|bh~UN>~bh9 zL+e0*W8Q$)p@lP(CMTh{g!PGJB49R=gUJPvXSh-%eE?EsIVmnnb6+!{G2!;YTy0}pVz<{ z;0$mEI0Kvk&H!hCGr$?(3~&ZG1Dt^mDFbG?S|~nt@UT!RsNP-JtKPNj-d5(*ciX*= zZQSf^-RrD2H_5-6Ue3cq_~EgGNAbf#sn^-=EpK+WdS2cV{<-w#W(S-+5aq=f#;*>J zK2oU^%<@VXw-T?pdy=l4v+lo%to4mGtn*3k&Q`m(-rbN&_}%CzQByCwYJ@+tjY}Jw zJH5Vky>8RpN3M#$gZ2MU7Q|l`{{UYA{7Lb1QBC;2I0Kvk&H!hCGr$?(3~&ZG1DpZQ z0B3+Rz!|u&3>+*K4?b2bdwT`$V!glTP<{|O$BHE{C)iXNf;J+h(F3L8=)q#y-2w1+ z35NIL(P;mXyFE~5lC0B3+Rz!~5Sa0WO7oB_@NXMi)n z8Q=_X20nxglm?5%a+&;HB6|cw?!SlKzf0u&e_;zp0nPwt zfHS}u;0$mEI0Kvk&Oj~(s-zTI|NnKa;=B-NfHS}u;0$mEI0Kvk&H!hCGr$?(3~&ZG z10O;Lg7yF3eh8K4Dsu)n1DpZQ0B3+Rz!~5Sa0WO7oB_@NXMi)1i-BPM|E*lbc_GdK zXMi)n8Q=_X1~>zp0nPwtfHS}u;0$mEK7wo_K|3lb%TxHGxXMi)n8Q=_X1~>zp S0nPwtfHS}u;0)wr;Qs;Tn&gZC diff --git a/data/sql_app.db.backup_20251121_142331 b/data/sql_app.db.backup_20251121_142331 deleted file mode 100644 index 393998211c1f950afb931321fbd14a5bfba0ec4b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 86016 zcmeHw3veXIdEPF7#m-`1Jo1jrq+OG#{%t#T4eNx5uQBDv&>lTwtj^OBvEt)ePcl5)i=DRIdzJEgL0 z$>rqhnVp&5?Z(Vl;#t0`21}>Lbff?O`}*14@$Yisb z6SZ0DBek_4al4If{37+Myzf+^cE3)7oyVH5VasyV;J)F_2c88%;*+%6fZi zqw)IU&Gq&=*sh(ta^>Ru?B&|2`O~vcU0kRsJ(@pp_1vY|tIyUh%s*Q@w!Sh>)INRX z>ioGgm&vnZ5%1%*tMjMlug+hdo4;1;K1Pdi{-(=g&mWLye(90%H!Atj*~zTCZ+&y6 z`M&k{EpD~8JB!P$%}uoMdTX=2h|Yk9=zqMo+hNXKJ~jVLZTG*cUAbKA|J&Lzzs_;@ zu5y0#Nk}(#m5#<-Yj%9CSS{{K)S~zA`P%fBfmWp?t=g@f?d9ffTHTqbRqx;PwdyT1 z{#IusKRP*?{qTL>!0qR>|Mxq326TUJ%;5H$hCA+iHR2ZOrIK5O*URUwT)wt&brzFN zZD(`+wVfu_18YxRKKJBP^FgOMUh7rkc&%3^GithhjUF+0mf|ttH? z9-O;eJ2tkyxwX?7J6;=W?Q}?L-0Sx2FbW!r{CPN#f`{uX$NjmP)Z0xGo|z#O{Xy>l z@HFrD2{MKAqYqq_A@;!aYdlb``Vy3Uv z?beOmT#8O>w;Wz`(MCCLdg-4`x*GJ5*xG7tH#|$rwcnm%aepu}{#GNGA3bs;`{A<3 zTvTC{8t&mRdLBb$)aKmrkT4z_+s#edK_pE4h#WJF+|G7m)0>#MLl*_e?7+?2Xsv~V zIjJW^jPuKTeaZCfp zw_waTO6?LS5f(ReM#evwAIOg$JeYk8Z3EvKkjSE=9sK`bKVUES2gVz>-dNslHu_xq z9>0BO3;m~eWzmzL#6yyg-pP71qkG09!t>s}e+GIe@E*nBZ{BFEZ-na{qi>zluj9x# zS)1poH#5~gs(!8dud1IwQv3)81OtKr!GK^uFd!HZ3&AmagPAKNW6~?5$=ZpvdUJWXzBAcrz505q z-dL``vUTs^VCIpi5>-}DtdZ3}sbTNHU}mcMzQ&EMjphk&Pk=n#lO4>AUvI5ZkI4GJ zP<|s*{bIFMJzDvC23fBGn66H%cHJ~U z5mC+Kzc0s;%`=0;nTMcOr`bl7svo1!)k&Tte9F^0$X4r?Y8blaSH$KR=9vM%BC-e% zDjMi3u1_mw-BCby7hG6TkfX@z=`1P&I9j9LdK7C~Mm?Hss`hRnBpQl>ia?ZN6j zLtieRF1=9P9{F(LQ_|=1|1N)X_~VtS;km)D4Sf6E>F}Z!!N8A_0rz^flF5!;!a$xq z>>8N@Ipff2Ew?tjgA7Zyy0q%77)y<1&Cpg&y`iowDNB~AtZJswY=l!QnV+p`)gkk< z>ZrP7_~Xl;xq}?XR&1Ni(G-krGSy%|4x4j`D7xP1Y_+GSrdGDsZ#L^|t=8H`lkC>{ z52jXI8!L$9pHl3V>y2yHa~Ga%ZLT(#F6phOjqS!$mNWOUE&zTVhg z-&}LAS9+jIKO9thTGQ*MW6DlAcXvrZj#1YfNA+g*(0qXed)m-oG{_&_TBvISl}us$ z5+*~xReKs$O={KNsZo=s6}#@}FqeIkFUf)Zg3)pM=6B?P9aVJ<>((|Z#?C+ZQI4No zWM3?oGljAHva^}Y1EU!!ut?s(BEKlA)<1OYY1yi)riF2^+3M7~F{4wYw7XO+XR-&w zL7FEGq$rA&)R28niwZZvsVgv|X1Bc*&r;j)Wk`-~8iv{D&L9WQ0LH9oDz~)HUKlB1 zBJwHQml@sdy|AVYYg)(5p(}=M(Udi|w%k_=U~DpEY|tM7jSZhU*-Tv zlxbbA8-}7mN8Wu zs%nSpi7r&+n048(sp&n;QT4iFs=9G&2ZVcl_}zr6O{+@Xk}<~n-F@V!b+oB&_LHEI zUTw;j(Zr3d=NBf(UEasiMZo@it{mKBSrcTRpU3N6Ah;J>>*)?2u(>a!D$kB>C%rSrxYjyk94pBHdZZk`~n$WB4Y0;#v+uJ`C zxEyL-({#n^TFs!@NRC#QP21G^yiMfjDz+tb$FXl|gO;nmoVkc_gX&w==c?1y{gpqh z{6^&;RW>UZE8~?+`FG2|Sbn4YY}qWAOTSYV(vyG~8ZhU=(UsxYI zm%}!#g^LevVs~`|$F=wcwIgRK$Q2%Bb^J635Vxp2G0z3WD{MLwr?`N)h3z9JDLqRZ zJx#MV8#N8LgfV`CgM?dDpP1ny;TF~=9_J$A7FNd|W15CrP&x7_1-ZzBEY}{1AoEHX z&iFJ3GPkHb;cy{y3tJO57c#f7Ic71Cxdn|QCIz{`fo$kCBZAB=p^fVt$lRjpgvN!; zEv!taT*%zQ@|eOv<`vYPBQgb9;y~8z+EfIYTf!Qzb0BkzniG>;$lSul#BnZUZee|F zf`QB}s2zEjf}H0;R%_!CWNry%{1^u^x2QZZ#)Zr)tT_`$xsbVq?Xia#$lQX~kq0Tr z!yL$(S$iOY%q?Mz-_L=}Evio(;X>vX)+TCP$lSu}*kJ}Tx1e(55Cu8MgDlq$Mv!?W zRA>AE2Qs&)J#ilwGPkfbaW5A#x3D?3pMlIRXdJnRf;_~5tm?JV2r{>XHhwn;GPkHY zv5yOxTUeRc%Z1D>ERWsAK;{)xoFjKqkOw)C6}z@4g3K*pjo-n6%q?n8RJoA3g^h^{ z7c#f7K2~NRa|>!mN)+S)9%Qvvj39GMDB~j>$lRjx!v%uE==TzNg`I~buWY}Q$SZ3f z%@e=|?IZHaS%-&#w_x~QF$b=BC`{6vpt6bQd}Qzf*i)ruyCLA6LIw z{U6n@R{wML%hg}2ey;k9)lXG_s``=Y->G)0uT)>IK3%<7Jz2G@_3DGwd#iU;hbwU)NODy!?#tA1)@O?svKY(rZ2Q?IT{*ZoxURcXynz0#Cc{nW}UQqxaeZ%Qj+ zYS~MbUVK?v@>9z%NsaEaW$9%=lMvQI zQupqY5WPZDd+(JHr9x8s_euytA*s9eONcNbsnNX>!biF(#D^d?P^wA@10g9aWD)M+ zJ-bsvGzUrTDM<*`AgSUW32_)Cbw^P`pan^d+#w;Jf}|=%32_sCDvz*;0;CP+5%YjV zVJM&XpA_|NXvm;6FzVh{*8;wmrl6ldKSlRA9T7nZn zE=~i7$qhyCZ#OVt)osU83Avs^H+P z5Q7{ojrtC`Erm3+Ue_!`wW8YrxG>EKW=b8BFa~nnLsgu@-rpTSD{} z^cI_gqp=n;q9b=)1V)6w7m=VKE)(#pbq`da;AmIDH;P`U8KVWcl>i5|)xNW3gkibv z7&t_ipr0P;=SfIVFRaydaPU8?b=7l6NBrrdXN)u2Vk~L0qB`Akxxq;|a)$zXif(IW z!tmB2Z8`E1bPr{?GbQNm1iBwAbehYUY1cQ&2SoAlJyn*;^*oIF7$CSD!emZn`tz2o z$!dc7YNS42WC8;3&?FJ#YUG}T$pFC~+`B*!tm4CT_$HUmA8@*AE4Ztp=lCq?C`R3| z6;q3j19LmF(yx0YEJ^D?fNPCF>&U|8S(C$Rr{J6@ECNSw9FcTPT&EIRDX!jfq&F)r zN%KI2yF_kT@GP7~EZiV7%crt%IM|8tjU&mpU7;5#$+0k5>M-W9cqoINR);u{v|M+U z+^*mmI0ABhfSKbxkc?Pn<|mcn$}q>oRaGoZ74woQQ&-^dnvJsK>9t4#8c22*akWCh zvu_0GOiI(jK*4-~8CEg+`X=CGpenj0Tk&IxGU6#!NI)2hbq@{)*3FWnH-~xFjkFSO z570Hcf4V>pd^5dlPLNU8!r6s|!5a2JPI@A&#wKIs=BUjbYt?{~PM{kL}~&e(b=e3^7Ms zLc@n5+{bYjfp6M~>IZSVOtNVsI^ra{Y^eRW8<8BnZli}Jj91Dq^Ccz$2&A~`004NF zjR4p)5O3K?vpIED)*Rez(qq|(>X?LZHwJZk39pwhMme2fqw9AKa$kUF*oe~mSVP0; zh8}F;rXh|UAhU{&o5kYo0A-F@X$fS963Rm_XZB`RGDE*veZ2C)@*k8>mR>4$Mt;2T z)xsg^h5V;+R{!PP`v-3h{ABi>>`@fFJn*^f|I2s%PqbZseJDxPkQu0N{u|$h;CyxTbSm10G?``2Ua%Y=-IK9>B zGjK$4lgwI<%E>{?nle`9@rqNnuSZgXzPJ?AuidpCg1(WPwV&-EEZgwRRoe<~b_zUm z0!IZL3s2!rt^A4ugiQ2prt++Bf+T1{7>0pgrR^fHcZRrdJ~gX&@T2 z#A^DP;s-71YJ0(Fv>2+yn+&pZg~`{l;D`6}Npgr4$3<)LbBjtXD*A=RaYGAm8}~!& zXp`lxlrCVe%4le8rNHUXoW6TrNsfgZgmukI=<1Z`ky3{^jI><$(2>9m#oYhqJv=v* zbX5)a8acAk*Bm4V!&uia+s98nl=qSBhtk!$2Wo*I3I*R7<@lil54rZv#)Qstj|C$3 z@u*8Kc^-yFxfqF#D3KS`>NM|zE&BZC7mxy`b;3ewGKe*$ok*KnqyWwvd@9r z4*PvAR7>_tP2Bz+I{;P6Wnsl7@EwY9m+yn-k@c^pHN3reFv;TXSzE!d?88HHu>B&d zm}%niP~J>hCE`%hg59-yfiklGmGowp!AY!@g4C7r*mGW}ZtC55> z5bZYh18HR8$|)6Td*~k8!rjTU zaHRFgPLYAjqWci|H}`3dV#dD+Q7IdpY~i{W0(#ZD2S#ZNw})flpdV&!%()tmVYH@+ z?-0aKx+-OX6D=HRx1d(nxf^{XvT%1KS~$Wc2C-q`^-&+2KF=CnNc7M~dTWa0O*`d`8~0PY`tJhw2kIr#a(p@Eax4`sfUIbHp1^|Sqcx_jtRdJH^F zj{zmPP**2jl%ZSHrzoOZW;I+MOkCL5N!g?f`|e|a2wWhac7VX3TKB*s^cWbY#sH-s z?sJzN*}(fLZq|vtRL9T5>bJ%sf2UMa@3UqCocda-AG~|T0x&~#^Q`k1DTK9lT+qMUIwhh;w zgGbym49|jL7%>fwj19#20Yll|WZMRLL9MQ%ppQiU-Tle7%^yo9-mG%Ey^Z)V17xHB?zkNmqMd_%)fO2#JnxK|OG6_K- z#?_{PAlezN`PD_63XJaf0*0)ccJS7P)*W*ZwwkpB>KVHn2}{yC5a3#MXdPL&!>q|+ z_4dBwfQz)*9DIic-*M0r^rp-*D=tBEIGVl)cX<+;M;7kBw8q8PI^65RJ1#77EZgjl zlp?RC;2+^tO~RI=PFd`$l-7lXiBAOKQ#5-2Ckni`g)+X;;Ka|rI%UXH z>X3wyeDj0Ibq_sETew5Cg`+zp`EZ7*^?!SS^a~QvguTspzgMSh^zIgp?ruS~?tyXI z!X2b69Hk$5O^Gi(^hW`Zjs)%%RZWN!(3r&IQrBQK%tLm7Vj|4OEMprV#9mVT=EzlybymBMcn9+Xbx zpB~=I{c3J_=+xjx2L8{$+3ZRtll^l4+W&9k@9xY6v`A#PHNFw`sfUI0zbc8{7c9t$ zrHGuQFbB~w*lkMKO|_^wHxlVKuJp_yO3LxesE0jAZ0Q<7C^Dru*#Am zLMceTO667S9=Jr$A^yFcj28IDmTKY?Tan`y=!e%U{$8RU}UTn7dgW8=u z5B8%SAhl;-hT$Ef%~8DF6s8wr2T>+b1@XL7E=!N3Bz=S0UF#h5jr?PUby!)Y(Sh+3 zyeXnO3z=4kLm6+rEy@95#U*GSig1_DLi5N!o=oeShIoSSwjs0+K|FXd$>v~#4*Nf9 zLL;crmO)x2c0uWHH{y?EQknfFERl^ zD8`+f1%k-79q+L?tm5bkc(ob?-_X4T6A`{uX2hEblX8K3Bqiw^)b3g*pl@W`CRk&` zC{4c9Ne+~-Y-3tcYz=Q8;yVjvkXdmFnuj9X-V1}5Hr;nZOGJx97Z;lyYN&rE1e%7mxXAqit3m#qKy zX0|icyDM_}X6Z}Czbc*_`Ntyzc>k~GFAx88?i;!Lvv&=>mDwKr`GGHF{|iC|e#js9 z(95(nJ=tqb!!JEzTacVxhey!0rcn+iPzUj@@&15AO`vhslx%J~c$r3a8~tis=Or|I zWUfwd%m=HKQMb`uqt`!?x;Ew}e9SX`d(UBJpcG57NcTX7yS&^p1!YNwzBD z-D`*Piqfl*gftNCHX1!s&|@iwKMT`yXz&t}&4H&PYi7a`DTi{WQtFU^5kbnnT=&2t z?Y++Sc(36wf}M_-yLIk#nuko z5u7q%b39iEV<^@qaVQ(k7nwjX_NU=OK#V*20uV$V;Nv}h8KWL)7z1(2;W{B~1B?|5 z7gHpxV;ssT_ee_6H>};Yo`=4X2lyE4lCesYkOYI=cF=Dw;L0nOofM$&P$rfYm!x?h z!d-q2nnxa>lXm8#wZ?fy9LYxrx=xOQvpK}lAd)=ZWpJqFa9SmHN9n5+2zJ+=1tX42yVd{6*dR&1>qVdytJ_?Gm~D81f*E``=Z^(Ge8=7fIXiUO}qxhx0xPy zit53PUQUxS<>z4Og6S*akd{qZnIC%k?Um~udJpZ7J<9rHe(PghsNsT+aOXZmfUc@p zCC1KNRmJ13k<4t4?IoVhesIu5a7Wv90Z229SH{*a5O66^}kK|iwX9@mlxFP zI!~dGMDF5rg1hL0!&t(tGB_tfO;>&n`ktmFnPZ!>W*m75{Tm@3z6^I}0lG)F&EXpw zMoF?YhFGT{ewpg%*z_Tr2MHmNHs$T|MJ6E##JJiuAc*2yY`?lF57p=OA~kK#_l zj*PhoQ<160+hv=v%aO1ItwRB>bro7i7S3W#4x_h>^@(gDV3x_jM6f!5KxR&uoicIm?b8|C1&7IxnfF2J%Wi=9>pKFaWICV8Lc z33k_>0LsY1sp(A%ixW$23}-4#J9wptx6LZfFzE4nQ#NJE)2oq$G!R{`zMUDWj%9Ga z|8G}6SsknDKjiXW{JCI2Fd!HZ3a1`CTO{{ml|^zf3<(~fkGi;<(FHym3YnDll0`wdA}1`>ziv>=abx>?M7$4wJGQD zccY_3O(XBA5&fBCp4;5o>2$U0v})cy@<{b7SpWZQruv2I*YO3wpRInfR}=A@U_dY+ z7!V8y1_T3w0l|P^KrkQ}5DW+g1OvB}f&IDc{)e)8f3Lt>tatYu@^>TWa5m@XgqsRO z(56ahbZ0I*x<8xub^!ccg5f=QG}?XSZ4cx|s(*s@|2Hz#KdgSQ`km@GkQ6_H0l|P^ zKrkQ}5DW+g1OtKr!GK^uFd!HZ4BUncf1_T3w0l|P^KrkQ}5DW+g1OtKr!9Y3&%EN7ZeDGVDQf=g;)yK-8D1E(hwfIQk z=cO;^?iqZ4_P2*WmiyiO`5|rK<0y3d{ndYS%Sgvu%UTaq7Bl z>9)4qy0Nv<>@-)VNPUg9=2UB|>3^WHJ*6ntmEgBpS}pLE$8MFiMyD1@Ug+u+NOn)O z0MqecN|JlKY46tDBClm}*EYzpcy& zR(d^>pa!yCeG^b8gVx$}-e+!gW2d>^paT=LSZS_tBI^Pu4c8wN}>*-7z?LOiUVpYR2P{od^#rFG=-4 zhI{a}621c4RsD@i-Unbe$&Ia*!SK))WZtfEEAdLoka3#!e3J997JlDN}b zAYBVcAHC?4&@(GD>Z(rNFcjHh2f3!y(T7Yu35Xg2;RsBC6$)_=Z-e&zUFF}r?0=ny zRTmSTV=ESC2-_IkvY{mnVLejUmyw_$ES6ty`Hjq1_{86SPu-Obbd|`U&oc%gok6#! z4GaXZZrXiKg&ei6nyQ*)`(!(6khEIFHW?+^z5Xg-M%M5YGqHLw8L{kiYftN1-LVYS z=^ydPL7$XOO-`_eGUd6X*CPRHDBGhzBm=&aOLMUfd( z=;Ksu(@8KMGUc-Lh)PlzGG(k*1)9kvUXsXliY>yMkj;C&c<6b8OIcD8(Y}H8WN7Q_rYzL`oE8p4O4z7Xp$PS+7m?cJC%vPGB5$~`c zhyjOFQ&lI4b;_V~1SZfP3ULpw({}JI*AAj<+jZR{|FP=Ib;mGN#fbMmWXkzVVFxj( z2KB<)oqJa-o_k^y?G!Dy=J`t>1|gaAOv9G@T^64e1*RwoJLR(QWhS&1Vc(PU{{fbm BcDVom From c4dd9f91b84400a9b453f0e65ae7861839ade67e Mon Sep 17 00:00:00 2001 From: Ayumu-Nono Date: Wed, 24 Dec 2025 23:24:30 +0900 Subject: [PATCH 6/6] feat: Add database auto-initialization and improve migration idempotency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add init_db.py for automatic DB initialization on startup - Integrate lifespan into main.py for DB readiness check - Add duplicate check to yaml_importer.py (skip_existing parameter) - Improve migrate_ports.py with skip count reporting 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- app/init_db.py | 229 ++++++++++++++++++++++++++++++++++ app/main.py | 33 ++++- app/scripts/migrate_ports.py | 53 +++++--- app/services/yaml_importer.py | 127 ++++++++++++++----- 4 files changed, 395 insertions(+), 47 deletions(-) create mode 100644 app/init_db.py diff --git a/app/init_db.py b/app/init_db.py new file mode 100644 index 0000000..42a16a2 --- /dev/null +++ b/app/init_db.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python3 +""" +データベース自動初期化・マイグレーションモジュール + +FastAPI起動時に呼び出され: +1. データベースの状態をチェック +2. 必要に応じてテーブルを自動作成(既存データは保持) +3. 必要に応じてカスタムマイグレーションを実行 +""" +import logging +from pathlib import Path +from sqlalchemy import text +from define_db.database import engine +from define_db.models import Base + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +DB_PATH = Path("/data/sql_app.db") + +REQUIRED_TABLES = [ + 'users', 'projects', 'runs', 'processes', + 'operations', 'edges', 'ports', 'port_connections', + 'process_operations' +] + +# ============================================ +# カスタムマイグレーション定義 +# ============================================ +MIGRATIONS = [ + { + "version": "001", + "description": "Ensure storage_mode column in runs", + "check": "SELECT 1 FROM pragma_table_info('runs') WHERE name='storage_mode'", + "sql": "ALTER TABLE runs ADD COLUMN storage_mode VARCHAR(10)" + }, + { + "version": "002", + "description": "Ensure process_type column in processes", + "check": "SELECT 1 FROM pragma_table_info('processes') WHERE name='process_type'", + "sql": "ALTER TABLE processes ADD COLUMN process_type VARCHAR(256)" + }, + { + "version": "003", + "description": "Ensure display_visible column in runs", + "check": "SELECT 1 FROM pragma_table_info('runs') WHERE name='display_visible'", + "sql": "ALTER TABLE runs ADD COLUMN display_visible BOOLEAN DEFAULT 1 NOT NULL" + }, +] + + +def check_database_file() -> dict: + """データベースファイルの状態をチェック""" + result = { + 'exists': False, + 'size': 0, + 'is_empty': True, + 'is_readable': False + } + + if DB_PATH.exists(): + result['exists'] = True + result['size'] = DB_PATH.stat().st_size + result['is_empty'] = result['size'] == 0 + + try: + with open(DB_PATH, 'rb') as f: + f.read(16) + result['is_readable'] = True + except (IOError, PermissionError): + result['is_readable'] = False + + return result + + +def check_tables() -> dict: + """データベース内のテーブル存在をチェック""" + result = { + 'existing_tables': [], + 'missing_tables': [], + 'all_present': False + } + + try: + with engine.connect() as conn: + query = text("SELECT name FROM sqlite_master WHERE type='table'") + tables = [row[0] for row in conn.execute(query)] + result['existing_tables'] = tables + result['missing_tables'] = [t for t in REQUIRED_TABLES if t not in tables] + result['all_present'] = len(result['missing_tables']) == 0 + except Exception as e: + logger.warning(f"テーブルチェック中にエラー: {e}") + result['missing_tables'] = REQUIRED_TABLES + + return result + + +def create_tables(): + """ + 全テーブルを作成 + + 重要: create_all()は既存テーブルのデータを削除しない + 既存テーブルはスキップされ、新規テーブルのみ作成される + """ + logger.info("テーブル作成を開始(既存テーブルはスキップ)...") + Base.metadata.create_all(engine) + logger.info("テーブル作成完了") + + +def run_custom_migrations(): + """ + カスタムマイグレーションを実行 + + 既存テーブルへのカラム追加など、create_all()で対応できない + スキーマ変更を実行する。既存データは保持される。 + """ + logger.info("カスタムマイグレーションをチェック...") + + with engine.connect() as conn: + applied_count = 0 + skipped_count = 0 + + for migration in MIGRATIONS: + version = migration["version"] + description = migration["description"] + + try: + result = conn.execute(text(migration["check"])) + if result.fetchone(): + logger.debug(f"Migration {version} already applied: {description}") + skipped_count += 1 + continue + except Exception: + skipped_count += 1 + continue + + logger.info(f"Applying migration {version}: {description}") + try: + conn.execute(text(migration["sql"])) + conn.commit() + logger.info(f"Migration {version} completed") + applied_count += 1 + except Exception as e: + logger.error(f"Migration {version} failed: {e}") + + if applied_count > 0: + logger.info(f"マイグレーション完了: {applied_count}件適用, {skipped_count}件スキップ") + else: + logger.info(f"マイグレーション: 全て適用済み ({skipped_count}件)") + + +def ensure_database_ready() -> dict: + """ + データベースが使用可能な状態であることを保証 + + Returns: + dict: 実行結果サマリー + """ + summary = { + 'action': None, + 'file_status': None, + 'table_status': None, + 'migrations_run': False, + 'success': False + } + + # Step 1: ファイル状態チェック + file_status = check_database_file() + summary['file_status'] = file_status + + need_create = False + + if not file_status['exists']: + logger.info(f"[DB Init] データベースファイルが存在しません: {DB_PATH}") + need_create = True + summary['action'] = 'create_new' + + elif file_status['is_empty']: + logger.warning(f"[DB Init] データベースファイルが空です (0 bytes)") + need_create = True + summary['action'] = 'initialize_empty' + + if need_create: + logger.info("[DB Init] テーブルを作成します...") + create_tables() + + table_status = check_tables() + summary['table_status'] = table_status + + if table_status['all_present']: + logger.info("[DB Init] 初期化完了") + summary['success'] = True + return summary + + # Step 2: テーブル存在チェック + table_status = check_tables() + summary['table_status'] = table_status + + if not table_status['all_present']: + missing = ', '.join(table_status['missing_tables']) + logger.info(f"[DB Init] 不足テーブル: {missing}") + logger.info("[DB Init] 不足テーブルを作成します(既存データは保持)...") + summary['action'] = 'create_missing' + + create_tables() + + table_status = check_tables() + summary['table_status'] = table_status + + # Step 3: カスタムマイグレーション + run_custom_migrations() + summary['migrations_run'] = True + + if summary['action'] is None: + summary['action'] = 'none' + + summary['success'] = True + logger.info(f"[DB Init] データベース準備完了 (テーブル数: {len(table_status['existing_tables'])})") + + return summary + + +if __name__ == "__main__": + result = ensure_database_ready() + print(f"\n=== 実行結果 ===") + print(f"アクション: {result['action']}") + print(f"成功: {result['success']}") + if result['table_status']: + print(f"テーブル数: {len(result['table_status']['existing_tables'])}") diff --git a/app/main.py b/app/main.py index ecc507b..b9e6a45 100644 --- a/app/main.py +++ b/app/main.py @@ -1,8 +1,39 @@ from fastapi import FastAPI from api.route import users, projects, runs, processes, operations, edges, ports, storage, storage_v2, process_operations from fastapi.middleware.cors import CORSMiddleware +from contextlib import asynccontextmanager +import logging -app = FastAPI() +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + FastAPIライフサイクル管理 + + 起動時: データベース初期化チェック + 終了時: リソースクリーンアップ + """ + # === 起動時処理 === + logger.info("=== FastAPI Starting ===") + + # DB初期化 + from init_db import ensure_database_ready + result = ensure_database_ready() + + if not result['success'] and result['action'] != 'none': + logger.error("データベース初期化に失敗しました") + + logger.info("=== FastAPI Ready ===") + + yield # アプリケーション実行 + + # === 終了時処理 === + logger.info("=== FastAPI Shutting Down ===") + + +app = FastAPI(lifespan=lifespan) # CORSミドルウェアの設定 app.add_middleware( CORSMiddleware, diff --git a/app/scripts/migrate_ports.py b/app/scripts/migrate_ports.py index 1094145..6ab8df4 100644 --- a/app/scripts/migrate_ports.py +++ b/app/scripts/migrate_ports.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 """ -既存YAMLデータをPorts/PortConnectionsテーブルに移行するスクリプト +既存YAMLデータをPorts/PortConnectionsテーブルに移行するスクリプト(冪等性対応) + +★冪等性: 何度実行しても安全です。既存データはスキップされます。 使用方法: # 全Run移行 @@ -26,13 +28,15 @@ def migrate_all_runs(dry_run: bool = False): - """全Runのポート情報をマイグレーション""" + """全Runのポート情報をマイグレーション(冪等性対応)""" with SessionLocal() as session: runs = session.query(Run).filter(Run.deleted_at.is_(None)).all() - total_ports = 0 - total_connections = 0 - skipped_count = 0 + total_ports_created = 0 + total_ports_skipped = 0 + total_connections_created = 0 + total_connections_skipped = 0 + run_skipped_count = 0 print(f"Found {len(runs)} runs to process.\n") @@ -42,7 +46,7 @@ def migrate_all_runs(dry_run: bool = False): # storage_addressがGoogle Drive URLの場合はスキップ if run.storage_address.startswith("http"): print(f" ⏭️ Skipping (Google Drive URL): {run.storage_address}") - skipped_count += 1 + run_skipped_count += 1 continue # YAMLファイル存在確認 @@ -51,7 +55,7 @@ def migrate_all_runs(dry_run: bool = False): if not protocol_path.exists() or not manipulate_path.exists(): print(f" ⏭️ Skipping (YAML not found): {run.storage_address}") - skipped_count += 1 + run_skipped_count += 1 continue if dry_run: @@ -61,23 +65,33 @@ def migrate_all_runs(dry_run: bool = False): try: importer = YAMLPortImporter(session) result = importer.import_from_run(run.id, run.storage_address) - total_ports += result['ports_created'] - total_connections += result['connections_created'] - print(f" ✅ Ports: {result['ports_created']}, Connections: {result['connections_created']}") + total_ports_created += result['ports_created'] + total_ports_skipped += result['ports_skipped'] + total_connections_created += result['connections_created'] + total_connections_skipped += result['connections_skipped'] + + # 結果表示 + if result['ports_skipped'] > 0 or result['connections_skipped'] > 0: + print(f" ✅ Created: {result['ports_created']} ports, {result['connections_created']} connections") + print(f" Skipped: {result['ports_skipped']} ports, {result['connections_skipped']} connections (already exist)") + else: + print(f" ✅ Ports: {result['ports_created']}, Connections: {result['connections_created']}") except Exception as e: print(f" ❌ Error: {e}") print(f"\n{'[DRY RUN] ' if dry_run else ''}Summary:") print(f" Total Runs: {len(runs)}") - print(f" Processed: {len(runs) - skipped_count}") - print(f" Skipped: {skipped_count}") + print(f" Processed: {len(runs) - run_skipped_count}") + print(f" Skipped (no YAML/remote): {run_skipped_count}") if not dry_run: - print(f" Ports Created: {total_ports}") - print(f" Connections Created: {total_connections}") + print(f" Ports: {total_ports_created} created, {total_ports_skipped} skipped") + print(f" Connections: {total_connections_created} created, {total_connections_skipped} skipped") + if total_ports_skipped > 0 or total_connections_skipped > 0: + print(f"\n✅ This migration is idempotent - skipped items already existed.") def migrate_single_run(run_id: int, dry_run: bool = False): - """特定のRunのポート情報をマイグレーション""" + """特定のRunのポート情報をマイグレーション(冪等性対応)""" with SessionLocal() as session: run = session.query(Run).filter(Run.id == run_id).first() if not run: @@ -104,7 +118,14 @@ def migrate_single_run(run_id: int, dry_run: bool = False): try: importer = YAMLPortImporter(session) result = importer.import_from_run(run.id, run.storage_address) - print(f" ✅ Ports: {result['ports_created']}, Connections: {result['connections_created']}") + + # 結果表示 + if result['ports_skipped'] > 0 or result['connections_skipped'] > 0: + print(f" ✅ Created: {result['ports_created']} ports, {result['connections_created']} connections") + print(f" Skipped: {result['ports_skipped']} ports, {result['connections_skipped']} connections (already exist)") + print(f"\n✅ This migration is idempotent - skipped items already existed.") + else: + print(f" ✅ Ports: {result['ports_created']}, Connections: {result['connections_created']}") except Exception as e: print(f" ❌ Error: {e}") diff --git a/app/services/yaml_importer.py b/app/services/yaml_importer.py index 35465e5..c31914a 100644 --- a/app/services/yaml_importer.py +++ b/app/services/yaml_importer.py @@ -15,16 +15,17 @@ class YAMLPortImporter: def __init__(self, session: Session): self.session = session - def import_from_run(self, run_id: int, storage_address: str) -> Dict[str, int]: + def import_from_run(self, run_id: int, storage_address: str, skip_existing: bool = True) -> Dict[str, int]: """ - Runのポート情報をYAMLから一括インポート + Runのポート情報をYAMLから一括インポート(冪等性対応) Args: run_id: Run ID storage_address: YAMLファイルのあるディレクトリパス + skip_existing: True=既存データはスキップ(デフォルト)、False=エラー Returns: - {"ports_created": 10, "connections_created": 5} + {"ports_created": 10, "ports_skipped": 5, "connections_created": 5, "connections_skipped": 2} Raises: FileNotFoundError: YAML不存在 @@ -49,35 +50,42 @@ def import_from_run(self, run_id: int, storage_address: str) -> Dict[str, int]: ).all() ports_created = 0 + ports_skipped = 0 connections_created = 0 + connections_skipped = 0 # 各ProcessのPorts作成 for process in processes: - created = self._import_ports_for_process( - process, protocol_data, manipulate_data + result = self._import_ports_for_process( + process, protocol_data, manipulate_data, skip_existing ) - ports_created += created + ports_created += result['created'] + ports_skipped += result['skipped'] # Connections作成 - created = self._import_connections( - run_id, processes, protocol_data + result = self._import_connections( + run_id, processes, protocol_data, skip_existing ) - connections_created += created + connections_created += result['created'] + connections_skipped += result['skipped'] self.session.commit() return { "ports_created": ports_created, - "connections_created": connections_created + "ports_skipped": ports_skipped, + "connections_created": connections_created, + "connections_skipped": connections_skipped } def _import_ports_for_process( self, process: Process, protocol_data: Dict, - manipulate_data: List[Dict] - ) -> int: - """1つのProcessのPorts作成""" + manipulate_data: List[Dict], + skip_existing: bool = True + ) -> Dict[str, int]: + """1つのProcessのPorts作成(重複チェック付き)""" # protocol.yamlからプロセスタイプを取得 process_type = None for op in protocol_data.get('operations', []): @@ -87,7 +95,7 @@ def _import_ports_for_process( if not process_type: print(f"Warning: Process type not found for {process.name}") - return 0 + return {'created': 0, 'skipped': 0} # ★NEW: ProcessレコードにもProcess typeを保存 if not process.process_type: @@ -102,15 +110,32 @@ def _import_ports_for_process( if not process_def: print(f"Warning: Process definition not found for type {process_type}") - return 0 + return {'created': 0, 'skipped': 0} created_count = 0 + skipped_count = 0 # 入力ポート作成 for idx, port_def in enumerate(process_def.get('input', [])): + port_name = port_def.get('id') + + # ★重複チェック: 既存ポートがあるかチェック + existing_port = self.session.query(Port).filter( + Port.process_id == process.id, + Port.port_type == 'input', + Port.port_name == port_name + ).first() + + if existing_port: + if skip_existing: + skipped_count += 1 + continue + else: + raise ValueError(f"Port already exists: process_id={process.id}, port_name={port_name}, port_type=input") + port = Port( process_id=process.id, - port_name=port_def.get('id'), + port_name=port_name, port_type='input', data_type=port_def.get('type'), position=idx, @@ -123,9 +148,25 @@ def _import_ports_for_process( # 出力ポート作成 for idx, port_def in enumerate(process_def.get('output', [])): + port_name = port_def.get('id') + + # ★重複チェック: 既存ポートがあるかチェック + existing_port = self.session.query(Port).filter( + Port.process_id == process.id, + Port.port_type == 'output', + Port.port_name == port_name + ).first() + + if existing_port: + if skip_existing: + skipped_count += 1 + continue + else: + raise ValueError(f"Port already exists: process_id={process.id}, port_name={port_name}, port_type=output") + port = Port( process_id=process.id, - port_name=port_def.get('id'), + port_name=port_name, port_type='output', data_type=port_def.get('type'), position=idx, @@ -136,17 +177,19 @@ def _import_ports_for_process( self.session.add(port) created_count += 1 - return created_count + return {'created': created_count, 'skipped': skipped_count} def _import_connections( self, run_id: int, processes: List[Process], - protocol_data: Dict - ) -> int: - """PortConnection作成""" + protocol_data: Dict, + skip_existing: bool = True + ) -> Dict[str, int]: + """PortConnection作成(重複チェック付き)""" connections = protocol_data.get('connections', []) created_count = 0 + skipped_count = 0 # プロセス名→Processオブジェクトのマップ process_map = {p.name: p for p in processes} @@ -187,6 +230,20 @@ def _import_connections( if not source_port or not target_port: continue + # ★重複チェック: 既存接続があるかチェック + existing_connection = self.session.query(PortConnection).filter( + PortConnection.run_id == run_id, + PortConnection.source_port_id == source_port.id, + PortConnection.target_port_id == target_port.id + ).first() + + if existing_connection: + if skip_existing: + skipped_count += 1 + continue + else: + raise ValueError(f"Connection already exists: run_id={run_id}, source_port_id={source_port.id}, target_port_id={target_port.id}") + # 接続作成 connection = PortConnection( run_id=run_id, @@ -196,17 +253,19 @@ def _import_connections( self.session.add(connection) created_count += 1 - return created_count + return {'created': created_count, 'skipped': skipped_count} def import_ports_for_all_runs(): - """全Runのポート情報をインポート (既存データ移行用)""" + """全Runのポート情報をインポート (既存データ移行用、冪等性対応)""" with SessionLocal() as session: runs = session.query(Run).filter(Run.deleted_at.is_(None)).all() importer = YAMLPortImporter(session) - total_ports = 0 - total_connections = 0 + total_ports_created = 0 + total_ports_skipped = 0 + total_connections_created = 0 + total_connections_skipped = 0 success_count = 0 failure_count = 0 @@ -221,9 +280,15 @@ def import_ports_for_all_runs(): try: print(f"Processing Run {run.id}...") result = importer.import_from_run(run.id, run.storage_address) - print(f" Created {result['ports_created']} ports, {result['connections_created']} connections") - total_ports += result['ports_created'] - total_connections += result['connections_created'] + created_msg = f"Created: {result['ports_created']} ports, {result['connections_created']} connections" + skipped_msg = f"Skipped: {result['ports_skipped']} ports, {result['connections_skipped']} connections" + print(f" {created_msg}") + if result['ports_skipped'] > 0 or result['connections_skipped'] > 0: + print(f" {skipped_msg} (already exist)") + total_ports_created += result['ports_created'] + total_ports_skipped += result['ports_skipped'] + total_connections_created += result['connections_created'] + total_connections_skipped += result['connections_skipped'] success_count += 1 except Exception as e: print(f" Error: {e}") @@ -233,8 +298,10 @@ def import_ports_for_all_runs(): print(f"Total runs: {len(runs)}") print(f"Success: {success_count}") print(f"Failure: {failure_count}") - print(f"Total ports imported: {total_ports}") - print(f"Total connections imported: {total_connections}") + print(f"Ports created: {total_ports_created}, skipped: {total_ports_skipped}") + print(f"Connections created: {total_connections_created}, skipped: {total_connections_skipped}") + if total_ports_skipped > 0 or total_connections_skipped > 0: + print(f"\n✅ This migration is idempotent - skipped items already existed in the database.") if __name__ == "__main__":