Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
6dbd910
outline-import: backend upload endpoint + frontend upload page\n\nBac…
NicolasRitouet Sep 13, 2025
1fd4406
frontend(import-outline): fix baseApiUrl import path
NicolasRitouet Sep 13, 2025
becc514
outline-import: run malware scan on uploaded assets
NicolasRitouet Sep 13, 2025
9f4fb06
tests(outline-import): add API tests for upload (.zip) flow\n- Anonym…
NicolasRitouet Sep 13, 2025
4f3b62d
refactor(outline-import): move import logic to core/services/outline_…
NicolasRitouet Sep 13, 2025
fa65c45
outline-import: reinforce safety and tests\n- Zip Slip protection (re…
NicolasRitouet Sep 13, 2025
cce6c96
Add Outline import API view
NicolasRitouet Sep 16, 2025
6146a48
Remove legacy Outline import viewset
NicolasRitouet Sep 16, 2025
453b153
Improve Outline import validation and UI
NicolasRitouet Sep 17, 2025
b7a7663
feat(outline-import): Add markdown preprocessing for unsupported Bloc…
NicolasRitouet Sep 24, 2025
06d9c2b
Revert "feat(outline-import): Add markdown preprocessing for unsuppor…
NicolasRitouet Sep 26, 2025
95fa210
Merge main into feature/outline-import
NicolasRitouet Oct 9, 2025
68e58b2
fix(outline-import): Fix CSRF token and nested documents handling
NicolasRitouet Oct 12, 2025
538c641
Cleanup imports
NicolasRitouet Oct 12, 2025
619b624
Fix import outline
NicolasRitouet Oct 12, 2025
e1f5a13
add new line
NicolasRitouet Oct 12, 2025
7d6f055
es-lint fixes
NicolasRitouet Oct 13, 2025
1d65ca3
fix(outline-import): Address PR review comments
NicolasRitouet Nov 29, 2025
600672d
Merge upstream/main into feature/outline-import
NicolasRitouet Nov 29, 2025
be6a2cb
fix(outline-import): Add async processing, improve UI, and address PR…
NicolasRitouet Nov 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .claude/settings.local.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"permissions": {
"allow": [
"WebSearch"
],
"deny": [],
"ask": []
}
}
27 changes: 27 additions & 0 deletions src/backend/core/api/import_viewsets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""ViewSets for import-related endpoints."""

from rest_framework import permissions, viewsets

from core import models
from core.api import serializers


class OutlineImportJobViewSet(viewsets.ReadOnlyModelViewSet):
"""
ViewSet for polling Outline import job status.

This provides a read-only endpoint for checking the status of async import jobs.
Users can only access their own import jobs.

Endpoints:
- GET /api/v1.0/imports/outline/jobs/ - List user's import jobs
- GET /api/v1.0/imports/outline/jobs/{id}/ - Get specific job status
"""

serializer_class = serializers.OutlineImportJobSerializer
permission_classes = [permissions.IsAuthenticated]
queryset = models.OutlineImportJob.objects.all()

def get_queryset(self):
"""Filter to only show the authenticated user's import jobs."""
return self.queryset.filter(user=self.request.user)
77 changes: 77 additions & 0 deletions src/backend/core/api/imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Import endpoints for Outline (zip upload)."""

from __future__ import annotations

import uuid

import rest_framework as drf
from django.core.files.storage import default_storage
from django.db import transaction
from django.urls import reverse

from lasuite.malware_detection import malware_detection

from core import models
from core.api.serializers import OutlineImportSerializer


# ---------- Outline (Zip Upload) ----------


class OutlineImportUploadView(drf.views.APIView):
"""
Upload an Outline export zip file for asynchronous processing.

This endpoint:
1. Validates the uploaded zip file
2. Saves it to S3 storage
3. Creates an OutlineImportJob to track the import
4. Triggers malware scanning of the zip
5. Returns a polling URL for checking import status

The actual import processing happens asynchronously after malware scanning.
"""

authentication_classes = [drf.authentication.SessionAuthentication]
parser_classes = [drf.parsers.MultiPartParser]
permission_classes = [drf.permissions.IsAuthenticated]

def post(self, request):
# Validate the uploaded file
serializer = OutlineImportSerializer(data=request.data)
serializer.is_valid(raise_exception=True)

uploaded_file = serializer.validated_data["file"]

# Generate S3 key for the zip file
file_id = uuid.uuid4()
key = f"imports/outline/{request.user.id}/{file_id}.zip"

# Save the zip file to S3
default_storage.save(key, uploaded_file)

# Create import job and trigger malware scan in a transaction
with transaction.atomic():
job = models.OutlineImportJob.objects.create(
user=request.user,
zip_file_key=key,
status=models.OutlineImportJob.Status.PENDING,
)

# Trigger malware scan of the zip file
# The callback will trigger the import task if the file is safe
transaction.on_commit(
lambda: malware_detection.analyse_file(key, import_job_id=str(job.id))
)

# Return job info and polling URL
status_url = reverse("outline-import-job-detail", kwargs={"pk": job.id})

return drf.response.Response(
{
"job_id": str(job.id),
"status": job.status,
"status_url": request.build_absolute_uri(status_url),
},
status=drf.status.HTTP_202_ACCEPTED, # 202 Accepted for async processing
)
41 changes: 41 additions & 0 deletions src/backend/core/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,3 +1013,44 @@ def get_abilities(self, thread):
if request:
return thread.get_abilities(request.user)
return {}


class OutlineImportSerializer(serializers.Serializer):
"""Serializer for validating Outline export zip uploads."""

file = serializers.FileField()

def validate_file(self, file):
"""Validate that the uploaded file is a valid zip archive."""
name = getattr(file, "name", "")
if not name.endswith(".zip"):
raise serializers.ValidationError("Must be a .zip file")

# Validate it's actually a valid zip file by attempting to open it
try:
import io
import zipfile

content = file.read()
file.seek(0) # Reset file pointer after reading
zipfile.ZipFile(io.BytesIO(content))
except zipfile.BadZipFile as exc:
raise serializers.ValidationError("Invalid zip archive") from exc

return file


class OutlineImportJobSerializer(serializers.ModelSerializer):
"""Serializer for Outline import job status."""

class Meta:
model = models.OutlineImportJob
fields = [
"id",
"status",
"created_document_ids",
"error_message",
"created_at",
"updated_at",
]
read_only_fields = fields
46 changes: 45 additions & 1 deletion src/backend/core/malware_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
import logging

from django.core.files.storage import default_storage
from django.db import transaction

from lasuite.malware_detection.enums import ReportStatus

from core.enums import DocumentAttachmentStatus
from core.models import Document
from core.models import Document, OutlineImportJob

logger = logging.getLogger(__name__)
security_logger = logging.getLogger("docs.security")
Expand All @@ -16,6 +17,13 @@
def malware_detection_callback(file_path, status, error_info, **kwargs):
"""Malware detection callback"""

# Handle Outline import jobs
import_job_id = kwargs.get("import_job_id")
if import_job_id:
_handle_outline_import_scan(import_job_id, file_path, status, error_info)
return

# Handle regular document attachments
if status == ReportStatus.SAFE:
logger.info("File %s is safe", file_path)
# Get existing metadata
Expand Down Expand Up @@ -50,3 +58,39 @@ def malware_detection_callback(file_path, status, error_info, **kwargs):

# Delete the file from the storage
default_storage.delete(file_path)


def _handle_outline_import_scan(job_id, file_path, status, error_info):
"""Handle malware scan result for Outline import zip files."""
from core.tasks.outline_import import process_outline_import_task

try:
job = OutlineImportJob.objects.get(id=job_id)
except OutlineImportJob.DoesNotExist:
logger.error("OutlineImportJob %s not found for malware callback", job_id)
return

if status == ReportStatus.SAFE:
logger.info("Outline import zip %s is safe, triggering import task", file_path)
job.status = OutlineImportJob.Status.SCANNING
job.save(update_fields=["status", "updated_at"])

# Trigger the import task after the current transaction commits
# This ensures the status update is visible to the task
transaction.on_commit(lambda: process_outline_import_task.delay(str(job.id)))
else:
security_logger.warning(
"Outline import zip %s contains malware. Job %s marked as failed. Error: %s",
file_path,
job_id,
error_info,
)
job.status = OutlineImportJob.Status.FAILED
job.error_message = f"Malware detected in uploaded file: {error_info}"
job.save(update_fields=["status", "error_message", "updated_at"])

# Delete the infected zip file
try:
default_storage.delete(file_path)
except Exception as e:
logger.warning("Failed to delete infected zip file %s: %s", file_path, e)
107 changes: 107 additions & 0 deletions src/backend/core/migrations/0025_outline_import_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Generated manually for Outline import feature

import django.contrib.postgres.fields
import django.db.models.deletion
import uuid
from django.conf import settings
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("core", "0024_add_is_masked_field_to_link_trace"),
]

operations = [
migrations.CreateModel(
name="OutlineImportJob",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
help_text="primary key for the record as UUID",
primary_key=True,
serialize=False,
verbose_name="id",
),
),
(
"created_at",
models.DateTimeField(
auto_now_add=True,
help_text="date and time at which a record was created",
verbose_name="created on",
),
),
(
"updated_at",
models.DateTimeField(
auto_now=True,
help_text="date and time at which a record was last updated",
verbose_name="updated on",
),
),
(
"status",
models.CharField(
choices=[
("pending", "Pending"),
("scanning", "Scanning"),
("processing", "Processing"),
("completed", "Completed"),
("failed", "Failed"),
],
default="pending",
help_text="current status of the import job",
max_length=20,
verbose_name="status",
),
),
(
"zip_file_key",
models.CharField(
help_text="S3 key of the uploaded zip file",
max_length=255,
verbose_name="zip file key",
),
),
(
"created_document_ids",
django.contrib.postgres.fields.ArrayField(
base_field=models.UUIDField(),
blank=True,
default=list,
help_text="list of document IDs created during import",
size=None,
verbose_name="created document IDs",
),
),
(
"error_message",
models.TextField(
blank=True,
help_text="error message if import failed",
verbose_name="error message",
),
),
(
"user",
models.ForeignKey(
help_text="user who initiated the import",
on_delete=django.db.models.deletion.CASCADE,
related_name="outline_import_jobs",
to=settings.AUTH_USER_MODEL,
verbose_name="user",
),
),
],
options={
"verbose_name": "Outline import job",
"verbose_name_plural": "Outline import jobs",
"db_table": "core_outline_import_job",
"ordering": ["-created_at"],
},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Generated by Django 5.2.7 on 2025-11-29 06:18

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('core', '0025_outline_import_job'),
('core', '0026_comments'),
]

operations = [
]
Loading
Loading