diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000000..c632575e5c --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "WebSearch" + ], + "deny": [], + "ask": [] + } +} diff --git a/src/backend/core/api/import_viewsets.py b/src/backend/core/api/import_viewsets.py new file mode 100644 index 0000000000..f655f7492f --- /dev/null +++ b/src/backend/core/api/import_viewsets.py @@ -0,0 +1,27 @@ +"""ViewSets for import-related endpoints.""" + +from rest_framework import permissions, viewsets + +from core import models +from core.api import serializers + + +class OutlineImportJobViewSet(viewsets.ReadOnlyModelViewSet): + """ + ViewSet for polling Outline import job status. + + This provides a read-only endpoint for checking the status of async import jobs. + Users can only access their own import jobs. + + Endpoints: + - GET /api/v1.0/imports/outline/jobs/ - List user's import jobs + - GET /api/v1.0/imports/outline/jobs/{id}/ - Get specific job status + """ + + serializer_class = serializers.OutlineImportJobSerializer + permission_classes = [permissions.IsAuthenticated] + queryset = models.OutlineImportJob.objects.all() + + def get_queryset(self): + """Filter to only show the authenticated user's import jobs.""" + return self.queryset.filter(user=self.request.user) diff --git a/src/backend/core/api/imports.py b/src/backend/core/api/imports.py new file mode 100644 index 0000000000..2aa764a778 --- /dev/null +++ b/src/backend/core/api/imports.py @@ -0,0 +1,77 @@ +"""Import endpoints for Outline (zip upload).""" + +from __future__ import annotations + +import uuid + +import rest_framework as drf +from django.core.files.storage import default_storage +from django.db import transaction +from django.urls import reverse + +from lasuite.malware_detection import malware_detection + +from core import models +from core.api.serializers import OutlineImportSerializer + + +# ---------- Outline (Zip Upload) ---------- + + +class OutlineImportUploadView(drf.views.APIView): + """ + Upload an Outline export zip file for asynchronous processing. + + This endpoint: + 1. Validates the uploaded zip file + 2. Saves it to S3 storage + 3. Creates an OutlineImportJob to track the import + 4. Triggers malware scanning of the zip + 5. Returns a polling URL for checking import status + + The actual import processing happens asynchronously after malware scanning. + """ + + authentication_classes = [drf.authentication.SessionAuthentication] + parser_classes = [drf.parsers.MultiPartParser] + permission_classes = [drf.permissions.IsAuthenticated] + + def post(self, request): + # Validate the uploaded file + serializer = OutlineImportSerializer(data=request.data) + serializer.is_valid(raise_exception=True) + + uploaded_file = serializer.validated_data["file"] + + # Generate S3 key for the zip file + file_id = uuid.uuid4() + key = f"imports/outline/{request.user.id}/{file_id}.zip" + + # Save the zip file to S3 + default_storage.save(key, uploaded_file) + + # Create import job and trigger malware scan in a transaction + with transaction.atomic(): + job = models.OutlineImportJob.objects.create( + user=request.user, + zip_file_key=key, + status=models.OutlineImportJob.Status.PENDING, + ) + + # Trigger malware scan of the zip file + # The callback will trigger the import task if the file is safe + transaction.on_commit( + lambda: malware_detection.analyse_file(key, import_job_id=str(job.id)) + ) + + # Return job info and polling URL + status_url = reverse("outline-import-job-detail", kwargs={"pk": job.id}) + + return drf.response.Response( + { + "job_id": str(job.id), + "status": job.status, + "status_url": request.build_absolute_uri(status_url), + }, + status=drf.status.HTTP_202_ACCEPTED, # 202 Accepted for async processing + ) diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py index 47754efe46..650beb24a2 100644 --- a/src/backend/core/api/serializers.py +++ b/src/backend/core/api/serializers.py @@ -1013,3 +1013,44 @@ def get_abilities(self, thread): if request: return thread.get_abilities(request.user) return {} + + +class OutlineImportSerializer(serializers.Serializer): + """Serializer for validating Outline export zip uploads.""" + + file = serializers.FileField() + + def validate_file(self, file): + """Validate that the uploaded file is a valid zip archive.""" + name = getattr(file, "name", "") + if not name.endswith(".zip"): + raise serializers.ValidationError("Must be a .zip file") + + # Validate it's actually a valid zip file by attempting to open it + try: + import io + import zipfile + + content = file.read() + file.seek(0) # Reset file pointer after reading + zipfile.ZipFile(io.BytesIO(content)) + except zipfile.BadZipFile as exc: + raise serializers.ValidationError("Invalid zip archive") from exc + + return file + + +class OutlineImportJobSerializer(serializers.ModelSerializer): + """Serializer for Outline import job status.""" + + class Meta: + model = models.OutlineImportJob + fields = [ + "id", + "status", + "created_document_ids", + "error_message", + "created_at", + "updated_at", + ] + read_only_fields = fields diff --git a/src/backend/core/malware_detection.py b/src/backend/core/malware_detection.py index 9b1ef3a726..783fa87a02 100644 --- a/src/backend/core/malware_detection.py +++ b/src/backend/core/malware_detection.py @@ -3,11 +3,12 @@ import logging from django.core.files.storage import default_storage +from django.db import transaction from lasuite.malware_detection.enums import ReportStatus from core.enums import DocumentAttachmentStatus -from core.models import Document +from core.models import Document, OutlineImportJob logger = logging.getLogger(__name__) security_logger = logging.getLogger("docs.security") @@ -16,6 +17,13 @@ def malware_detection_callback(file_path, status, error_info, **kwargs): """Malware detection callback""" + # Handle Outline import jobs + import_job_id = kwargs.get("import_job_id") + if import_job_id: + _handle_outline_import_scan(import_job_id, file_path, status, error_info) + return + + # Handle regular document attachments if status == ReportStatus.SAFE: logger.info("File %s is safe", file_path) # Get existing metadata @@ -50,3 +58,39 @@ def malware_detection_callback(file_path, status, error_info, **kwargs): # Delete the file from the storage default_storage.delete(file_path) + + +def _handle_outline_import_scan(job_id, file_path, status, error_info): + """Handle malware scan result for Outline import zip files.""" + from core.tasks.outline_import import process_outline_import_task + + try: + job = OutlineImportJob.objects.get(id=job_id) + except OutlineImportJob.DoesNotExist: + logger.error("OutlineImportJob %s not found for malware callback", job_id) + return + + if status == ReportStatus.SAFE: + logger.info("Outline import zip %s is safe, triggering import task", file_path) + job.status = OutlineImportJob.Status.SCANNING + job.save(update_fields=["status", "updated_at"]) + + # Trigger the import task after the current transaction commits + # This ensures the status update is visible to the task + transaction.on_commit(lambda: process_outline_import_task.delay(str(job.id))) + else: + security_logger.warning( + "Outline import zip %s contains malware. Job %s marked as failed. Error: %s", + file_path, + job_id, + error_info, + ) + job.status = OutlineImportJob.Status.FAILED + job.error_message = f"Malware detected in uploaded file: {error_info}" + job.save(update_fields=["status", "error_message", "updated_at"]) + + # Delete the infected zip file + try: + default_storage.delete(file_path) + except Exception as e: + logger.warning("Failed to delete infected zip file %s: %s", file_path, e) diff --git a/src/backend/core/migrations/0025_outline_import_job.py b/src/backend/core/migrations/0025_outline_import_job.py new file mode 100644 index 0000000000..c14bc5e808 --- /dev/null +++ b/src/backend/core/migrations/0025_outline_import_job.py @@ -0,0 +1,107 @@ +# Generated manually for Outline import feature + +import django.contrib.postgres.fields +import django.db.models.deletion +import uuid +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0024_add_is_masked_field_to_link_trace"), + ] + + operations = [ + migrations.CreateModel( + name="OutlineImportJob", + fields=[ + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + help_text="primary key for the record as UUID", + primary_key=True, + serialize=False, + verbose_name="id", + ), + ), + ( + "created_at", + models.DateTimeField( + auto_now_add=True, + help_text="date and time at which a record was created", + verbose_name="created on", + ), + ), + ( + "updated_at", + models.DateTimeField( + auto_now=True, + help_text="date and time at which a record was last updated", + verbose_name="updated on", + ), + ), + ( + "status", + models.CharField( + choices=[ + ("pending", "Pending"), + ("scanning", "Scanning"), + ("processing", "Processing"), + ("completed", "Completed"), + ("failed", "Failed"), + ], + default="pending", + help_text="current status of the import job", + max_length=20, + verbose_name="status", + ), + ), + ( + "zip_file_key", + models.CharField( + help_text="S3 key of the uploaded zip file", + max_length=255, + verbose_name="zip file key", + ), + ), + ( + "created_document_ids", + django.contrib.postgres.fields.ArrayField( + base_field=models.UUIDField(), + blank=True, + default=list, + help_text="list of document IDs created during import", + size=None, + verbose_name="created document IDs", + ), + ), + ( + "error_message", + models.TextField( + blank=True, + help_text="error message if import failed", + verbose_name="error message", + ), + ), + ( + "user", + models.ForeignKey( + help_text="user who initiated the import", + on_delete=django.db.models.deletion.CASCADE, + related_name="outline_import_jobs", + to=settings.AUTH_USER_MODEL, + verbose_name="user", + ), + ), + ], + options={ + "verbose_name": "Outline import job", + "verbose_name_plural": "Outline import jobs", + "db_table": "core_outline_import_job", + "ordering": ["-created_at"], + }, + ), + ] diff --git a/src/backend/core/migrations/0027_merge_0025_outline_import_job_0026_comments.py b/src/backend/core/migrations/0027_merge_0025_outline_import_job_0026_comments.py new file mode 100644 index 0000000000..3c3aa02f8a --- /dev/null +++ b/src/backend/core/migrations/0027_merge_0025_outline_import_job_0026_comments.py @@ -0,0 +1,14 @@ +# Generated by Django 5.2.7 on 2025-11-29 06:18 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0025_outline_import_job'), + ('core', '0026_comments'), + ] + + operations = [ + ] diff --git a/src/backend/core/models.py b/src/backend/core/models.py index c17d3ec449..8b9461af8d 100644 --- a/src/backend/core/models.py +++ b/src/backend/core/models.py @@ -1662,3 +1662,55 @@ def get_abilities(self, user): "partial_update": is_admin_or_owner, "retrieve": is_admin_or_owner, } + + +class OutlineImportJob(BaseModel): + """Track async Outline import jobs.""" + + class Status(models.TextChoices): + PENDING = "pending", _("Pending") + SCANNING = "scanning", _("Scanning") + PROCESSING = "processing", _("Processing") + COMPLETED = "completed", _("Completed") + FAILED = "failed", _("Failed") + + user = models.ForeignKey( + User, + verbose_name=_("user"), + help_text=_("user who initiated the import"), + on_delete=models.CASCADE, + related_name="outline_import_jobs", + ) + status = models.CharField( + verbose_name=_("status"), + help_text=_("current status of the import job"), + max_length=20, + choices=Status.choices, + default=Status.PENDING, + ) + zip_file_key = models.CharField( + verbose_name=_("zip file key"), + help_text=_("S3 key of the uploaded zip file"), + max_length=255, + ) + created_document_ids = ArrayField( + models.UUIDField(), + verbose_name=_("created document IDs"), + help_text=_("list of document IDs created during import"), + default=list, + blank=True, + ) + error_message = models.TextField( + verbose_name=_("error message"), + help_text=_("error message if import failed"), + blank=True, + ) + + class Meta: + db_table = "core_outline_import_job" + verbose_name = _("Outline import job") + verbose_name_plural = _("Outline import jobs") + ordering = ["-created_at"] + + def __str__(self): + return f"OutlineImportJob {self.id} ({self.status})" diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py new file mode 100644 index 0000000000..d730620031 --- /dev/null +++ b/src/backend/core/services/outline_import.py @@ -0,0 +1,214 @@ +"""Service to import an Outline export (.zip) into Docs documents.""" + +from __future__ import annotations + +import io +import posixpath +import re +import uuid +import zipfile +from typing import Iterable + +import magic + +from django.conf import settings +from django.core.files.storage import default_storage +from django.db import transaction + +from lasuite.malware_detection import malware_detection + +from core import enums, models +from core.services.converter_services import YdocConverter + + +class OutlineImportError(Exception): + """Raised when the Outline archive is invalid or unsafe.""" + + +def _ensure_dir_documents(user, dir_path: str, dir_docs: dict[str, models.Document]) -> models.Document | None: + """Ensure each path segment in dir_path has a container document. + + Returns the deepest parent document or None when dir_path is empty. + """ + if not dir_path: + return None + + parts = [p for p in dir_path.split("/") if p] + parent: models.Document | None = None + current = "" + for part in parts: + current = f"{current}/{part}" if current else part + if current in dir_docs: + parent = dir_docs[current] + continue + + if parent is None: + doc = models.Document.add_root( + depth=1, + creator=user, + title=part, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + models.DocumentAccess.objects.create( + document=doc, + user=user, + role=models.RoleChoices.OWNER, + ) + else: + doc = parent.add_child(creator=user, title=part) + dir_docs[current] = doc + parent = doc + + return parent + + +def _upload_attachment(user, doc: models.Document, arcname: str, data: bytes) -> str: + """Upload a binary asset into object storage and return its public media URL.""" + mime = magic.Magic(mime=True) + content_type = mime.from_buffer(data[:1024]) if data else None + ext = (arcname.split(".")[-1] or "bin").lower() + file_id = uuid.uuid4() + key = f"{doc.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.{ext}" + extra_args = { + "Metadata": { + "owner": str(user.id), + "status": enums.DocumentAttachmentStatus.PROCESSING, + }, + } + if content_type: + extra_args["ContentType"] = content_type + + default_storage.connection.meta.client.upload_fileobj( + io.BytesIO(data), default_storage.bucket_name, key, ExtraArgs=extra_args + ) + doc.attachments.append(key) + doc.save(update_fields=["attachments", "updated_at"]) + malware_detection.analyse_file(key, document_id=doc.id) + return f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" + + +@transaction.atomic +def process_outline_zip(user, zip_bytes: bytes) -> list[str]: + """Process an Outline export zip and create Docs documents. + + This function runs within an atomic transaction, ensuring that either all documents + are created successfully or none are (rollback on any error). + + Returns the list of created document IDs (stringified UUIDs) corresponding to + markdown-backed documents. Container folders used to rebuild hierarchy are not listed. + """ + archive = zipfile.ZipFile(io.BytesIO(zip_bytes)) + + # Basic Zip Slip protection: refuse paths that escape the archive root + for name in archive.namelist(): + # Normalize to posix separators and check traversal + if name.startswith("/") or "\\" in name: + raise OutlineImportError("Unsafe path in archive") + # Normalize the path and check if it escapes the root after normalization + normalized = posixpath.normpath(name) + if normalized.startswith("..") or normalized.startswith("/"): + raise OutlineImportError("Unsafe path in archive") + + created_ids: list[str] = [] + dir_docs: dict[str, models.Document] = {} + md_files: Iterable[str] = sorted( + [ + n + for n in archive.namelist() + if n.lower().endswith(".md") + and not n.startswith("__MACOSX/") + and not any(part.startswith(".") for part in n.split("/")) + ] + ) + + # Build a set of md files that have corresponding directories (Outline nested docs) + # e.g., "Doc.md" and "Doc/" both exist -> "Doc" is a parent with nested children + md_with_dirs: set[str] = set() + for md_path in md_files: + # Remove .md extension to get potential directory name + base_path = md_path.rsplit(".md", 1)[0] + # Check if there's a directory with the same name + if any(n.startswith(f"{base_path}/") for n in archive.namelist()): + md_with_dirs.add(base_path) + + img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") + + def read_bytes(path_in_zip: str) -> bytes | None: + try: + with archive.open(path_in_zip, "r") as f: + return f.read() + except KeyError: + return None + + converter = YdocConverter() + + for md_path in md_files: + dir_path, file_name = ( + (md_path.rsplit("/", 1) + [""])[:2] if "/" in md_path else ("", md_path) + ) + parent_doc = _ensure_dir_documents(user, dir_path, dir_docs) + + try: + raw_md = archive.read(md_path).decode("utf-8", errors="ignore") + except Exception: # noqa: BLE001 + raw_md = "" + + title_match = re.search(r"^#\s+(.+)$", raw_md, flags=re.MULTILINE) + title = title_match.group(1).strip() if title_match else file_name.rsplit(".", 1)[0] + + if parent_doc is None: + doc = models.Document.add_root( + depth=1, + creator=user, + title=title, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + models.DocumentAccess.objects.create( + document=doc, + user=user, + role=models.RoleChoices.OWNER, + ) + else: + doc = parent_doc.add_child(creator=user, title=title) + + # If this md file has a corresponding directory, register it as a container + # so nested children will use this doc as parent instead of creating a duplicate + base_path = md_path.rsplit(".md", 1)[0] + if base_path in md_with_dirs: + dir_docs[base_path] = doc + + def replace_img_link(match: re.Match[str]) -> str: + url = match.group(1) + if url.startswith("http://") or url.startswith("https://"): + return match.group(0) + asset_rel = f"{dir_path}/{url}" if dir_path else url + asset_rel = re.sub(r"/+", "/", asset_rel) + # sanitize computed asset path + if asset_rel.startswith("/") or any(part == ".." for part in asset_rel.split("/")): + return match.group(0) + data = read_bytes(asset_rel) + if data is None: + return match.group(0) + media_url = _upload_attachment(user, doc, arcname=url, data=data) + return match.group(0).replace(url, media_url) + + rewritten_md = img_pattern.sub(replace_img_link, raw_md) + + try: + ydoc_b64 = converter.convert( + rewritten_md.encode("utf-8"), + content_type="text/markdown", + accept="application/vnd.yjs.doc", + ) + doc.content = ydoc_b64 + doc.save() + except Exception as e: # noqa: BLE001 + # Keep doc without content on conversion error but continue import + import logging + logging.getLogger(__name__).warning( + "Failed to convert markdown for document %s: %s", doc.id, e + ) + + created_ids.append(str(doc.id)) + + return created_ids diff --git a/src/backend/core/tasks/outline_import.py b/src/backend/core/tasks/outline_import.py new file mode 100644 index 0000000000..f6607703e8 --- /dev/null +++ b/src/backend/core/tasks/outline_import.py @@ -0,0 +1,75 @@ +"""Celery task for processing Outline imports.""" + +import io +import logging + +from django.core.files.storage import default_storage +from django.db import transaction + +from core import models +from core.services.outline_import import process_outline_zip + +from impress.celery_app import app + +logger = logging.getLogger(__name__) + + +@app.task +def process_outline_import_task(job_id): + """ + Process an Outline import job asynchronously. + + This task is triggered after the uploaded zip file has been scanned for malware + and deemed safe. It downloads the zip from S3, processes it to create documents, + and updates the job status accordingly. + + Args: + job_id: UUID of the OutlineImportJob to process + """ + try: + job = models.OutlineImportJob.objects.get(id=job_id) + except models.OutlineImportJob.DoesNotExist: + logger.error("OutlineImportJob %s not found", job_id) + return + + logger.info("Starting Outline import job %s", job_id) + job.status = models.OutlineImportJob.Status.PROCESSING + job.save(update_fields=["status", "updated_at"]) + + try: + # Download zip file from S3 + logger.debug("Downloading zip file from S3: %s", job.zip_file_key) + try: + zip_file = default_storage.open(job.zip_file_key, "rb") + zip_bytes = zip_file.read() + zip_file.close() + except Exception as e: + raise Exception(f"Failed to download zip file from S3: {e}") from e + + # Process the zip file within an atomic transaction + # If any error occurs, all database changes will be rolled back + with transaction.atomic(): + created_ids = process_outline_zip(job.user, zip_bytes) + job.created_document_ids = created_ids + job.status = models.OutlineImportJob.Status.COMPLETED + job.save(update_fields=["created_document_ids", "status", "updated_at"]) + + logger.info( + "Outline import job %s completed successfully. Created %d documents.", + job_id, + len(created_ids), + ) + + # Delete the zip file from S3 after successful import + try: + default_storage.delete(job.zip_file_key) + logger.debug("Deleted zip file from S3: %s", job.zip_file_key) + except Exception as e: + logger.warning("Failed to delete zip file %s: %s", job.zip_file_key, e) + + except Exception as e: + logger.exception("Outline import job %s failed: %s", job_id, str(e)) + job.status = models.OutlineImportJob.Status.FAILED + job.error_message = str(e) + job.save(update_fields=["status", "error_message", "updated_at"]) + # Keep the zip file in S3 for debugging purposes when import fails diff --git a/src/backend/core/tests/imports/test_api_outline_import_upload.py b/src/backend/core/tests/imports/test_api_outline_import_upload.py new file mode 100644 index 0000000000..110df86103 --- /dev/null +++ b/src/backend/core/tests/imports/test_api_outline_import_upload.py @@ -0,0 +1,127 @@ +"""Tests for the Outline zip import API endpoint.""" + +import io +import zipfile +from unittest.mock import patch + +from django.core.files.uploadedfile import SimpleUploadedFile + +import pytest +from rest_framework.test import APIClient + +from core import factories +from core.api.viewsets import malware_detection +from core.services.outline_import import OutlineImportError + + +pytestmark = pytest.mark.django_db + + +def make_zip_with_markdown_and_image(md_path: str, md_content: str, img_path: str, img_bytes: bytes) -> bytes: + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w") as zf: + zf.writestr(md_path, md_content) + zf.writestr(img_path, img_bytes) + return buf.getvalue() + + +def test_outline_import_upload_anonymous_forbidden(): + """Anonymous users must not be able to use the import endpoint.""" + client = APIClient() + + # Minimal empty zip + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w"): + pass + upload = SimpleUploadedFile(name="export.zip", content=buf.getvalue(), content_type="application/zip") + + response = client.post("/api/v1.0/imports/outline/upload", {"file": upload}, format="multipart") + + assert response.status_code == 401 + assert response.json()["detail"] == "Authentication credentials were not provided." + + +@patch("core.services.converter_services.YdocConverter.convert", return_value="YmFzZTY0Y29udGVudA==") +def test_outline_import_upload_authenticated_success(mock_convert): + """Authenticated users can upload an Outline export zip and create documents.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + # Markdown referencing a local image in the same directory + md = "# Imported Title\n\nSome text.\n\n![Alt](image.png)\n" + img = ( + b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00" + b"\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\xf8\xff\xff?\x00\x05\xfe\x02\xfe" + b"\xa7V\xbd\xfa\x00\x00\x00\x00IEND\xaeB`\x82" + ) + zip_bytes = make_zip_with_markdown_and_image( + md_path="Folder1/page.md", + md_content=md, + img_path="Folder1/image.png", + img_bytes=img, + ) + + upload = SimpleUploadedFile(name="export.zip", content=zip_bytes, content_type="application/zip") + + with patch.object(malware_detection, "analyse_file") as mock_analyse_file: + response = client.post("/api/v1.0/imports/outline/upload", {"file": upload}, format="multipart") + + assert response.status_code == 201 + data = response.json() + assert "created_document_ids" in data + # Only the markdown-backed document ids are returned (container folders are not listed) + assert len(data["created_document_ids"]) == 1 + + # The converter must have been called once per markdown file + mock_convert.assert_called_once() + # An antivirus scan is run for the uploaded image + assert mock_analyse_file.called + + +def test_outline_import_upload_invalid_zip_returns_validation_error(): + """Invalid archives are rejected with a validation error instead of crashing.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + upload = SimpleUploadedFile( + name="export.zip", + content=b"not-a-zip", + content_type="application/zip", + ) + + response = client.post( + "/api/v1.0/imports/outline/upload", + {"file": upload}, + format="multipart", + ) + + assert response.status_code == 400 + assert response.json() == {"file": ["Invalid zip archive"]} + + +@patch("core.api.imports.process_outline_zip", side_effect=OutlineImportError("boom")) +def test_outline_import_upload_outline_error_returns_validation_error(mock_process_outline): + """Service-level Outline import errors are surfaced as validation errors.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + zip_bytes = make_zip_with_markdown_and_image( + md_path="doc.md", + md_content="# Title", + img_path="", + img_bytes=b"", + ) + upload = SimpleUploadedFile(name="export.zip", content=zip_bytes, content_type="application/zip") + + response = client.post( + "/api/v1.0/imports/outline/upload", + {"file": upload}, + format="multipart", + ) + + assert response.status_code == 400 + assert response.json() == {"file": ["boom"]} + mock_process_outline.assert_called_once() diff --git a/src/backend/core/tests/services/test_outline_import_service.py b/src/backend/core/tests/services/test_outline_import_service.py new file mode 100644 index 0000000000..7f94e63a7b --- /dev/null +++ b/src/backend/core/tests/services/test_outline_import_service.py @@ -0,0 +1,52 @@ +"""Unit tests for the Outline import service.""" + +import io +import zipfile +from unittest.mock import patch + +import pytest + +from core import factories +from core.services.outline_import import OutlineImportError, process_outline_zip + + +pytestmark = pytest.mark.django_db + + +def make_zip(entries: dict[str, bytes]) -> bytes: + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w") as zf: + for path, content in entries.items(): + zf.writestr(path, content) + return buf.getvalue() + + +@patch("core.services.converter_services.YdocConverter.convert", return_value="YmFzZTY0Y29udGVudA==") +@patch("core.services.outline_import.malware_detection.analyse_file") +@patch("django.core.files.storage.default_storage.connection.meta.client.upload_fileobj") +def test_process_outline_zip_happy_path(mock_upload, mock_av, mock_convert): + user = factories.UserFactory() + md = b"# T1\n![img](image.png)" + img = b"i-am-png" + zip_bytes = make_zip({ + "dir/page.md": md, + "dir/image.png": img, + "__MACOSX/._noise": b"", + ".hidden/skip.md": b"# hidden", + }) + + created = process_outline_zip(user, zip_bytes) + assert len(created) == 1 + mock_convert.assert_called_once() + mock_upload.assert_called() + mock_av.assert_called() + + +def test_process_outline_zip_zip_slip_rejected(): + user = factories.UserFactory() + zip_bytes = make_zip({ + "../evil.md": b"# E", + }) + with pytest.raises(OutlineImportError): + process_outline_zip(user, zip_bytes) + diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index a24ebc9977..4bba64de85 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -7,12 +7,19 @@ from rest_framework.routers import DefaultRouter from core.api import viewsets +from core.api import imports as import_views +from core.api import import_viewsets # - Main endpoints router = DefaultRouter() router.register("templates", viewsets.TemplateViewSet, basename="templates") router.register("documents", viewsets.DocumentViewSet, basename="documents") router.register("users", viewsets.UserViewSet, basename="users") +router.register( + "imports/outline/jobs", + import_viewsets.OutlineImportJobViewSet, + basename="outline-import-job", +) # - Routes nested under a document document_related_router = DefaultRouter() @@ -60,6 +67,11 @@ r"^documents/(?P[0-9a-z-]*)/threads/(?P[0-9a-z-]*)/", include(thread_related_router.urls), ), + path( + "imports/outline/upload/", + import_views.OutlineImportUploadView.as_view(), + name="outline-import-upload", + ), ] ), ), diff --git a/src/backend/impress/celery_app.py b/src/backend/impress/celery_app.py index e38c57071c..ccf9abba4e 100644 --- a/src/backend/impress/celery_app.py +++ b/src/backend/impress/celery_app.py @@ -23,4 +23,7 @@ app.config_from_object("django.conf:settings", namespace="CELERY") # Load task modules from all registered Django apps. -app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) +# autodiscover_tasks looks for "tasks.py" in each app by default +# We also need to discover tasks in subdirectories like core/tasks/ +app.autodiscover_tasks(lambda: settings.INSTALLED_APPS + ["core.tasks"], related_name="mail") +app.autodiscover_tasks(lambda: ["core.tasks"], related_name="outline_import") diff --git a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx index bcb7dbfa51..c973db0395 100644 --- a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx +++ b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx @@ -3,7 +3,7 @@ import { useRouter } from 'next/router'; import { useState } from 'react'; import { useTranslation } from 'react-i18next'; -import { Icon } from '@/components'; +import { Box, DropdownMenu, Icon } from '@/components'; import { useCreateDoc } from '@/features/docs/doc-management'; import { useSkeletonStore } from '@/features/skeletons'; @@ -48,14 +48,28 @@ export const LeftPanelHeaderButton = () => { const isLoading = isDocCreating || isNavigating; return ( - + + + void router.push('/import/outline'), + showSeparator: false, + }, + ]} + /> + ); }; diff --git a/src/frontend/apps/impress/src/i18n/translations.json b/src/frontend/apps/impress/src/i18n/translations.json index 0199fed2ee..0790147d6f 100644 --- a/src/frontend/apps/impress/src/i18n/translations.json +++ b/src/frontend/apps/impress/src/i18n/translations.json @@ -529,6 +529,26 @@ "Shared with {{count}} users_many": "Shared with {{count}} users", "Shared with {{count}} users_one": "Shared with {{count}} user", "Shared with {{count}} users_other": "Shared with {{count}} users", + "Import from Outline": "Import from Outline", + "Import Outline archive": "Import Outline archive", + "Select a .zip file": "Select a .zip file", + "Import": "Import", + "Upload your Outline export (.zip) to import your documents": "Upload your Outline export (.zip) to import your documents", + "Drag and drop your file here": "Drag and drop your file here", + "or click to browse": "or click to browse", + "Accepts .zip files only": "Accepts .zip files only", + "Click or drag to select a file": "Click or drag to select a file", + "Uploading archive...": "Uploading archive...", + "Preparing import...": "Preparing import...", + "Scanning for security...": "Scanning for security...", + "Creating documents...": "Creating documents...", + "Import completed! Redirecting...": "Import completed! Redirecting...", + "Import is taking too long. Please try again.": "Import is taking too long. Please try again.", + "Import failed. Please try again.": "Import failed. Please try again.", + "Failed to check import status. Please try again.": "Failed to check import status. Please try again.", + "Something went wrong. Please try again.": "Something went wrong. Please try again.", + "Importing...": "Importing...", + "Cancel": "Cancel", "days_many": "days", "days_one": "day", "days_other": "days" @@ -877,6 +897,26 @@ "Open root document": "Ouvrir le document racine", "Open the document options": "Ouvrir les options du document", "Open the header menu": "Ouvrir le menu d'en-tête", + "Import from Outline": "Importer depuis Outline", + "Import Outline archive": "Importer une archive Outline", + "Select a .zip file": "Sélectionnez un fichier .zip", + "Import": "Importer", + "Upload your Outline export (.zip) to import your documents": "Téléversez votre export Outline (.zip) pour importer vos documents", + "Drag and drop your file here": "Glissez-déposez votre fichier ici", + "or click to browse": "ou cliquez pour parcourir", + "Accepts .zip files only": "Accepte uniquement les fichiers .zip", + "Click or drag to select a file": "Cliquez ou glissez pour sélectionner un fichier", + "Uploading archive...": "Téléversement de l'archive...", + "Preparing import...": "Préparation de l'import...", + "Scanning for security...": "Analyse de sécurité...", + "Creating documents...": "Création des documents...", + "Import completed! Redirecting...": "Import terminé ! Redirection...", + "Import is taking too long. Please try again.": "L'import prend trop de temps. Veuillez réessayer.", + "Import failed. Please try again.": "L'import a échoué. Veuillez réessayer.", + "Failed to check import status. Please try again.": "Impossible de vérifier le statut de l'import. Veuillez réessayer.", + "Something went wrong. Please try again.": "Une erreur s'est produite. Veuillez réessayer.", + "Importing...": "Import en cours...", + "Cancel": "Annuler", "Open the menu of actions for the document: {{title}}": "Ouvrir le menu des actions du document : {{title}}", "Open the sharing settings for the document": "Ouvrir les paramètres de partage pour le document", "Organize": "Organiser", diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx new file mode 100644 index 0000000000..538ef2f315 --- /dev/null +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -0,0 +1,366 @@ +import { Alert, Button, Loader, VariantType } from '@openfun/cunningham-react'; +import Head from 'next/head'; +import { useRouter } from 'next/router'; +import { ReactElement, useCallback, useRef, useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import styled, { css } from 'styled-components'; + +import { fetchAPI } from '@/api'; +import { Box, Card, Icon, Text, TextErrors } from '@/components'; +import { useCunninghamTheme } from '@/cunningham'; +import { MainLayout } from '@/layouts'; +import { NextPageWithLayout } from '@/types/next'; + +type ImportStatus = + | 'idle' + | 'uploading' + | 'pending' + | 'scanning' + | 'processing' + | 'completed' + | 'failed'; + +interface ImportJob { + id: string; + status: string; + created_document_ids: string[]; + error_message: string; +} + +const DropZone = styled(Box)<{ $isDragging: boolean; $hasFile: boolean }>` + border: 2px dashed + ${({ $isDragging, $hasFile, theme }) => + $isDragging + ? 'var(--c--theme--colors--primary-500)' + : $hasFile + ? 'var(--c--theme--colors--success-500)' + : 'var(--c--theme--colors--greyscale-300)'}; + border-radius: 8px; + transition: all 0.2s ease; + cursor: pointer; + + &:hover { + border-color: var(--c--theme--colors--primary-400); + background-color: var(--c--theme--colors--primary-100); + } + + ${({ $isDragging }) => + $isDragging && + css` + background-color: var(--c--theme--colors--primary-100); + `} + + ${({ $hasFile }) => + $hasFile && + css` + background-color: var(--c--theme--colors--success-100); + `} +`; + +const HiddenInput = styled.input` + display: none; +`; + +const Page: NextPageWithLayout = () => { + const { t } = useTranslation(); + const router = useRouter(); + const { colorsTokens } = useCunninghamTheme(); + const fileInputRef = useRef(null); + + const [file, setFile] = useState(null); + const [isDragging, setIsDragging] = useState(false); + const [status, setStatus] = useState('idle'); + const [error, setError] = useState(null); + + const pollJobStatus = useCallback( + async (id: string) => { + const maxAttempts = 120; // 2 minutes max + let attempts = 0; + + const poll = async (): Promise => { + attempts++; + if (attempts > maxAttempts) { + setError(t('Import is taking too long. Please try again.')); + setStatus('failed'); + return; + } + + try { + const response = await fetchAPI(`imports/outline/jobs/${id}/`); + if (!response.ok) { + throw new Error('Failed to fetch job status'); + } + + const job = (await response.json()) as ImportJob; + + if (job.status === 'completed') { + setStatus('completed'); + const firstDocId = job.created_document_ids?.[0]; + if (firstDocId) { + void router.replace(`/docs/${firstDocId}`); + } else { + void router.replace('/'); + } + return; + } + + if (job.status === 'failed') { + setError(job.error_message || t('Import failed. Please try again.')); + setStatus('failed'); + return; + } + + // Update status based on job status + if (job.status === 'scanning') { + setStatus('scanning'); + } else if (job.status === 'processing') { + setStatus('processing'); + } else { + setStatus('pending'); + } + + // Continue polling + setTimeout(() => void poll(), 1000); + } catch { + setError(t('Failed to check import status. Please try again.')); + setStatus('failed'); + } + }; + + await poll(); + }, + [router, t], + ); + + const handleUpload = async () => { + if (!file) return; + + setError(null); + setStatus('uploading'); + + try { + const form = new FormData(); + form.append('file', file); + + const response = await fetchAPI('imports/outline/upload/', { + method: 'POST', + body: form, + withoutContentType: true, + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData?.file?.[0] || 'Upload failed'); + } + + const data = (await response.json()) as { job_id: string; status: string }; + setStatus('pending'); + + // Start polling for job status + await pollJobStatus(data.job_id); + } catch (err) { + setError( + err instanceof Error + ? err.message + : t('Something went wrong. Please try again.'), + ); + setStatus('failed'); + } + }; + + const handleDragOver = (e: React.DragEvent) => { + e.preventDefault(); + setIsDragging(true); + }; + + const handleDragLeave = (e: React.DragEvent) => { + e.preventDefault(); + setIsDragging(false); + }; + + const handleDrop = (e: React.DragEvent) => { + e.preventDefault(); + setIsDragging(false); + const droppedFile = e.dataTransfer.files[0]; + if (droppedFile?.name.endsWith('.zip')) { + setFile(droppedFile); + setError(null); + setStatus('idle'); + } else { + setError(t('Please select a .zip file')); + } + }; + + const handleFileSelect = (e: React.ChangeEvent) => { + const selectedFile = e.target.files?.[0]; + if (selectedFile) { + setFile(selectedFile); + setError(null); + setStatus('idle'); + } + }; + + const handleZoneClick = () => { + fileInputRef.current?.click(); + }; + + const getStatusMessage = () => { + switch (status) { + case 'uploading': + return t('Uploading archive...'); + case 'pending': + return t('Preparing import...'); + case 'scanning': + return t('Scanning for security...'); + case 'processing': + return t('Creating documents...'); + case 'completed': + return t('Import completed! Redirecting...'); + default: + return null; + } + }; + + const isProcessing = ['uploading', 'pending', 'scanning', 'processing', 'completed'].includes( + status, + ); + + return ( + <> + + {t('Import from Outline')} - {t('Docs')} + + + + + {/* Header */} + + + + {t('Import from Outline')} + + + {t('Upload your Outline export (.zip) to import your documents')} + + + + {/* Drop Zone */} + + + + {file ? ( + <> + + {file.name} + + {(file.size / 1024 / 1024).toFixed(2)} MB + + + ) : ( + <> + + {t('Drag and drop your file here')} + + {t('or click to browse')} + + + {t('Accepts .zip files only')} + + + )} + + + {/* Status */} + {isProcessing && ( + + + + {getStatusMessage()} + + + )} + + {/* Error */} + {error && status === 'failed' && ( + + )} + + {/* Success */} + {status === 'completed' && ( + + {t('Import completed! Redirecting...')} + + )} + + {/* Actions */} + + + + + + + + + ); +}; + +Page.getLayout = function getLayout(page: ReactElement) { + return {page}; +}; + +export default Page;