From e5d909522c1392dee12d3a61e8c1b151ae5a29ed Mon Sep 17 00:00:00 2001 From: HeatCrab Date: Wed, 26 Nov 2025 09:36:47 +0800 Subject: [PATCH 1/9] Add syscall dispatcher abstraction layer The generic syscall dispatcher coupled privilege transition mechanisms with table lookup logic, preventing architecture-specific trap implementations from reusing the dispatch table. Introduce separate dispatcher for direct table lookup that trap handlers can invoke without triggering privilege transitions. Mark user-space interface as weak symbol to enable architecture overrides. Rename wrapper functions to match generated short names. --- kernel/syscall.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/kernel/syscall.c b/kernel/syscall.c index 6ad4493f..7aef52ee 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -16,19 +16,30 @@ SYSCALL_TABLE static const void *syscall_table[SYS_COUNT] = {SYSCALL_TABLE}; #undef _ -/* Weak, generic dispatcher */ -int syscall(int num, void *a1, void *a2, void *a3) +/* Core syscall execution via direct table lookup. + * Called by trap handlers to invoke syscall implementations without + * triggering privilege transitions. User space must not call this directly. + */ +int do_syscall(int num, void *a1, void *a2, void *a3) { if (unlikely(num <= 0 || num >= SYS_COUNT)) return -ENOSYS; - /* safety check for valid function pointer */ if (unlikely(!syscall_table[num])) return -ENOSYS; return ((int (*)(void *, void *, void *)) syscall_table[num])(a1, a2, a3); } +/* Generic user-space syscall interface. + * This weak symbol allows architecture-specific implementations to override + * with trap-based entry mechanisms. + */ +__attribute__((weak)) int syscall(int num, void *a1, void *a2, void *a3) +{ + return do_syscall(num, a1, a2, a3); +} + static char *_env[1] = {0}; char **environ = _env; int errno = 0; @@ -239,7 +250,7 @@ static int _tadd(void *task, int stack_size) return mo_task_spawn(task, stack_size); } -int sys_task_add(void *task, int stack_size) +int sys_tadd(void *task, int stack_size) { return syscall(SYS_tadd, task, (void *) stack_size, 0); } @@ -252,7 +263,7 @@ static int _tcancel(int id) return mo_task_cancel(id); } -int sys_task_cancel(int id) +int sys_tcancel(int id) { return syscall(SYS_tcancel, (void *) id, 0, 0); } @@ -263,7 +274,7 @@ static int _tyield(void) return 0; } -int sys_task_yield(void) +int sys_tyield(void) { return syscall(SYS_tyield, 0, 0, 0); } @@ -277,7 +288,7 @@ static int _tdelay(int ticks) return 0; } -int sys_task_delay(int ticks) +int sys_tdelay(int ticks) { return syscall(SYS_tdelay, (void *) ticks, 0, 0); } @@ -290,7 +301,7 @@ static int _tsuspend(int id) return mo_task_suspend(id); } -int sys_task_suspend(int id) +int sys_tsuspend(int id) { return syscall(SYS_tsuspend, (void *) id, 0, 0); } @@ -303,7 +314,7 @@ static int _tresume(int id) return mo_task_resume(id); } -int sys_task_resume(int id) +int sys_tresume(int id) { return syscall(SYS_tresume, (void *) id, 0, 0); } @@ -316,7 +327,7 @@ static int _tpriority(int id, int priority) return mo_task_priority(id, priority); } -int sys_task_priority(int id, int priority) +int sys_tpriority(int id, int priority) { return syscall(SYS_tpriority, (void *) id, (void *) priority, 0); } @@ -326,7 +337,7 @@ static int _tid(void) return mo_task_id(); } -int sys_task_id(void) +int sys_tid(void) { return syscall(SYS_tid, 0, 0, 0); } @@ -337,7 +348,7 @@ static int _twfi(void) return 0; } -int sys_task_wfi(void) +int sys_twfi(void) { return syscall(SYS_twfi, 0, 0, 0); } @@ -347,7 +358,7 @@ static int _tcount(void) return mo_task_count(); } -int sys_task_count(void) +int sys_tcount(void) { return syscall(SYS_tcount, 0, 0, 0); } From 5be94813146baa33a6cbb3073d3cb5f6fe61f925 Mon Sep 17 00:00:00 2001 From: HeatCrab Date: Wed, 26 Nov 2025 09:37:06 +0800 Subject: [PATCH 2/9] Add ecall-based syscall entry point Architecture-specific implementations require direct linkage to override weak symbols. Archives extract objects only when symbols are unresolved, skipping strong overrides when weak symbols satisfy references. Introduce trap-based syscall entry using ecall instruction and modify build system to link entry point before archive, ensuring architecture override takes precedence at link time. --- Makefile | 6 +++--- arch/riscv/build.mk | 8 ++++++++ arch/riscv/entry.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 arch/riscv/entry.c diff --git a/Makefile b/Makefile index 23ef4d06..68175601 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ deps += $(LIB_OBJS:%.o=%.o.d) APPS := coop echo hello mqueues semaphore mutex cond \ pipes pipes_small pipes_struct prodcons progress \ rtsched suspend test64 timer timer_kill \ - cpubench test_libc + cpubench test_libc umode # Output files for __link target IMAGE_BASE := $(BUILD_DIR)/image @@ -66,9 +66,9 @@ $(APPS): %: rebuild $(BUILD_APP_DIR)/%.o linmo # Link target - creates all output files __link: $(IMAGE_FILES) -$(IMAGE_BASE).elf: $(BUILD_APP_DIR)/*.o $(BUILD_DIR)/liblinmo.a +$(IMAGE_BASE).elf: $(BUILD_APP_DIR)/*.o $(BUILD_DIR)/liblinmo.a $(ENTRY_OBJ) $(VECHO) " LD\t$@\n" - $(Q)$(LD) $(LDFLAGS) -T$(LDSCRIPT) -Map $(IMAGE_BASE).map -o $@ $(BUILD_APP_DIR)/*.o -L$(BUILD_DIR) -llinmo + $(Q)$(LD) $(LDFLAGS) -T$(LDSCRIPT) -Map $(IMAGE_BASE).map -o $@ $(BUILD_APP_DIR)/*.o $(ENTRY_OBJ) -L$(BUILD_DIR) -llinmo $(IMAGE_BASE).lst: $(IMAGE_BASE).elf $(VECHO) " DUMP\t$@\n" diff --git a/arch/riscv/build.mk b/arch/riscv/build.mk index c4dc46da..243a6ea2 100644 --- a/arch/riscv/build.mk +++ b/arch/riscv/build.mk @@ -74,6 +74,14 @@ HAL_OBJS := boot.o hal.o muldiv.o HAL_OBJS := $(addprefix $(BUILD_KERNEL_DIR)/,$(HAL_OBJS)) deps += $(HAL_OBJS:%.o=%.o.d) +# Architecture-specific syscall entry point requiring direct linkage. +# Archives only extract objects when symbols are unresolved. Since the generic +# syscall dispatcher provides a weak symbol, the archive mechanism would skip +# the strong override. Direct linking ensures the architecture-specific +# implementation takes precedence at link time. +ENTRY_OBJ := $(BUILD_KERNEL_DIR)/entry.o +deps += $(ENTRY_OBJ).d + $(BUILD_KERNEL_DIR)/%.o: $(ARCH_DIR)/%.c | $(BUILD_DIR) $(VECHO) " CC\t$@\n" $(Q)$(CC) $(CFLAGS) -o $@ -c -MMD -MF $@.d $< diff --git a/arch/riscv/entry.c b/arch/riscv/entry.c new file mode 100644 index 00000000..9956558e --- /dev/null +++ b/arch/riscv/entry.c @@ -0,0 +1,42 @@ +/* RISC-V Kernel Entry Points + * + * This file implements architecture-specific entry mechanisms into the kernel, + * primarily the system call trap interface using the RISC-V ecall instruction. + * + * System Call Calling Convention (RISC-V ABI): + * - a7 (x17): System call number + * - a0 (x10): Argument 1 / Return value + * - a1 (x11): Argument 2 + * - a2 (x12): Argument 3 + * + * The ecall instruction triggers an environment call exception that transfers + * control to the M-mode exception handler (hal.c), which then dispatches to + * the appropriate system call implementation via the syscall table. + */ + +#include + +/* Architecture-specific syscall implementation using ecall trap. + * This overrides the weak symbol defined in kernel/syscall.c. + */ +int syscall(int num, void *arg1, void *arg2, void *arg3) +{ + register int a0 asm("a0") = (int) arg1; + register int a1 asm("a1") = (int) arg2; + register int a2 asm("a2") = (int) arg3; + register int a7 asm("a7") = num; + + /* Execute ecall instruction to trap into M-mode. + * The M-mode exception handler will: + * 1. Save the current task context + * 2. Dispatch to the syscall handler based on a7 + * 3. Place the return value in a0 + * 4. Restore context and return to user mode via mret + */ + asm volatile("ecall" + : "+r"(a0) /* a0 is both input (arg1) and output (retval) */ + : "r"(a1), "r"(a2), "r"(a7) + : "memory", "cc"); + + return a0; +} From 40fc9f937b67385370bab1e09df88e5956f55658 Mon Sep 17 00:00:00 2001 From: HeatCrab Date: Wed, 26 Nov 2025 09:37:34 +0800 Subject: [PATCH 3/9] Handle user mode ecall for syscall dispatch User mode tasks require privilege escalation to invoke kernel services. Without proper trap frame preservation, context switches corrupt privilege state, preventing tasks from resuming at correct levels. Add trap handler for user mode environment calls to dispatch syscalls. Extend trap frame to preserve privilege mode across context switches. Correct frame layout to match actual register storage order in trap entry sequence. --- arch/riscv/boot.c | 30 ++++++---- arch/riscv/hal.c | 148 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 143 insertions(+), 35 deletions(-) diff --git a/arch/riscv/boot.c b/arch/riscv/boot.c index 37978025..8e46f4c9 100644 --- a/arch/riscv/boot.c +++ b/arch/riscv/boot.c @@ -94,10 +94,10 @@ __attribute__((naked, section(".text.prologue"))) void _entry(void) } /* Size of the full trap context frame saved on the stack by the ISR. - * 30 GPRs (x1, x3-x31) + mcause + mepc = 32 registers * 4 bytes = 128 bytes. - * This provides a 16-byte aligned full context save. + * 30 GPRs (x1, x3-x31) + mcause + mepc + mstatus = 33 words * 4 bytes = 132 + * bytes. Round up to 144 bytes for 16-byte alignment. */ -#define ISR_CONTEXT_SIZE 128 +#define ISR_CONTEXT_SIZE 144 /* Low-level Interrupt Service Routine (ISR) trampoline. * @@ -154,11 +154,15 @@ __attribute__((naked, aligned(4))) void _isr(void) "sw t6, 29*4(sp)\n" /* Save trap-related CSRs and prepare arguments for do_trap */ - "csrr a0, mcause\n" /* Arg 1: cause */ - "csrr a1, mepc\n" /* Arg 2: epc */ - "mv a2, sp\n" /* Arg 3: isr_sp (current stack frame) */ + "csrr a0, mcause\n" + "csrr a1, mepc\n" + "csrr a2, mstatus\n" /* For context switching in privilege change */ + "sw a0, 30*4(sp)\n" "sw a1, 31*4(sp)\n" + "sw a2, 32*4(sp)\n" + + "mv a2, sp\n" /* a2 = isr_sp */ /* Call the high-level C trap handler. * Returns: a0 = SP to use for restoring context (may be different @@ -169,9 +173,13 @@ __attribute__((naked, aligned(4))) void _isr(void) /* Use returned SP for context restore (enables context switching) */ "mv sp, a0\n" - /* Restore context. mepc might have been modified by the handler */ - "lw a1, 31*4(sp)\n" - "csrw mepc, a1\n" + /* Restore mstatus from frame[32] */ + "lw t0, 32*4(sp)\n" + "csrw mstatus, t0\n" + + /* Restore mepc from frame[31] (might have been modified by handler) */ + "lw t1, 31*4(sp)\n" + "csrw mepc, t1\n" "lw ra, 0*4(sp)\n" "lw gp, 1*4(sp)\n" "lw tp, 2*4(sp)\n" @@ -208,7 +216,7 @@ __attribute__((naked, aligned(4))) void _isr(void) /* Return from trap */ "mret\n" - : /* no outputs */ - : "i"(ISR_CONTEXT_SIZE) + : /* no outputs */ + : "i"(ISR_CONTEXT_SIZE) /* +16 for mcause, mepc, mstatus */ : "memory"); } diff --git a/arch/riscv/hal.c b/arch/riscv/hal.c index 04ecc7d0..774855af 100644 --- a/arch/riscv/hal.c +++ b/arch/riscv/hal.c @@ -35,12 +35,53 @@ #define CONTEXT_MSTATUS 16 /* Machine Status CSR */ /* Defines the size of the full trap frame saved by the ISR in 'boot.c'. - * The _isr routine saves 32 registers (30 GPRs + mcause + mepc), resulting - * in a 128-byte frame. This space MUST be reserved at the top of every task's - * stack (as a "red zone") to guarantee that an interrupt, even at peak stack - * usage, will not corrupt memory outside the task's stack bounds. + * The _isr routine saves 33 words (30 GPRs + mcause + mepc + mstatus), + * resulting in a 144-byte frame with alignment padding. This space MUST be + * reserved at the top of every task's stack (as a "red zone") to guarantee + * that an interrupt, even at peak stack usage, will not corrupt memory + * outside the task's stack bounds. */ -#define ISR_STACK_FRAME_SIZE 128 +#define ISR_STACK_FRAME_SIZE 144 + +/* ISR frame register indices (as 32-bit word offsets from isr_sp). + * This layout matches the stack frame created by _isr in boot.c. + * Indices are in word offsets (divide byte offset by 4). + */ +enum { + FRAME_RA = 0, /* x1 - Return Address */ + FRAME_GP = 1, /* x3 - Global Pointer */ + FRAME_TP = 2, /* x4 - Thread Pointer */ + FRAME_T0 = 3, /* x5 - Temporary register 0 */ + FRAME_T1 = 4, /* x6 - Temporary register 1 */ + FRAME_T2 = 5, /* x7 - Temporary register 2 */ + FRAME_S0 = 6, /* x8 - Saved register 0 / Frame Pointer */ + FRAME_S1 = 7, /* x9 - Saved register 1 */ + FRAME_A0 = 8, /* x10 - Argument/Return 0 */ + FRAME_A1 = 9, /* x11 - Argument/Return 1 */ + FRAME_A2 = 10, /* x12 - Argument 2 */ + FRAME_A3 = 11, /* x13 - Argument 3 */ + FRAME_A4 = 12, /* x14 - Argument 4 */ + FRAME_A5 = 13, /* x15 - Argument 5 */ + FRAME_A6 = 14, /* x16 - Argument 6 */ + FRAME_A7 = 15, /* x17 - Argument 7 / Syscall Number */ + FRAME_S2 = 16, /* x18 - Saved register 2 */ + FRAME_S3 = 17, /* x19 - Saved register 3 */ + FRAME_S4 = 18, /* x20 - Saved register 4 */ + FRAME_S5 = 19, /* x21 - Saved register 5 */ + FRAME_S6 = 20, /* x22 - Saved register 6 */ + FRAME_S7 = 21, /* x23 - Saved register 7 */ + FRAME_S8 = 22, /* x24 - Saved register 8 */ + FRAME_S9 = 23, /* x25 - Saved register 9 */ + FRAME_S10 = 24, /* x26 - Saved register 10 */ + FRAME_S11 = 25, /* x27 - Saved register 11 */ + FRAME_T3 = 26, /* x28 - Temporary register 3 */ + FRAME_T4 = 27, /* x29 - Temporary register 4 */ + FRAME_T5 = 28, /* x30 - Temporary register 5 */ + FRAME_T6 = 29, /* x31 - Temporary register 6 */ + FRAME_MCAUSE = 30, /* Machine Cause CSR */ + FRAME_EPC = 31, /* Machine Exception PC (mepc) */ + FRAME_MSTATUS = 32 /* Machine Status CSR */ +}; /* Global variable to hold the new stack pointer for pending context switch. * When a context switch is needed, hal_switch_stack() saves the current SP @@ -238,6 +279,21 @@ void hal_hardware_init(void) _stdout_install(__putchar); _stdin_install(__getchar); _stdpoll_install(__kbhit); + + /* Grant U-mode access to all memory for validation purposes. + * By default, RISC-V PMP denies all access to U-mode, which would cause + * instruction access faults immediately upon task switch. This minimal + * setup allows U-mode tasks to execute and serves as a placeholder until + * the full PMP driver is integrated. + */ + uint32_t pmpaddr = -1UL; /* Cover entire address space */ + uint8_t pmpcfg = 0x0F; /* TOR, R, W, X enabled */ + + asm volatile( + "csrw pmpaddr0, %0\n" + "csrw pmpcfg0, %1\n" + : + : "r"(pmpaddr), "r"(pmpcfg)); } /* Halts the system in an unrecoverable state */ @@ -321,6 +377,34 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp) } else { /* Synchronous Exception */ uint32_t code = MCAUSE_GET_CODE(cause); + /* Handle ecall from U-mode - system calls */ + if (code == MCAUSE_ECALL_UMODE) { + /* Advance mepc past the ecall instruction (4 bytes) */ + uint32_t new_epc = epc + 4; + write_csr(mepc, new_epc); + + /* Extract syscall arguments from ISR frame */ + uint32_t *f = (uint32_t *) isr_sp; + + int syscall_num = f[FRAME_A7]; + void *arg1 = (void *) f[FRAME_A0]; + void *arg2 = (void *) f[FRAME_A1]; + void *arg3 = (void *) f[FRAME_A2]; + + /* Dispatch to syscall implementation via direct table lookup. + * Must use do_syscall here instead of syscall() to avoid recursive + * traps, as the user-space syscall() may be overridden with ecall. + */ + extern int do_syscall(int num, void *arg1, void *arg2, void *arg3); + int retval = do_syscall(syscall_num, arg1, arg2, arg3); + + /* Store return value and updated PC */ + f[FRAME_A0] = (uint32_t) retval; + f[FRAME_EPC] = new_epc; + + return isr_sp; + } + /* Handle ecall from M-mode - used for yielding in preemptive mode */ if (code == MCAUSE_ECALL_MMODE) { /* Advance mepc past the ecall instruction (4 bytes) */ @@ -328,12 +412,11 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp) write_csr(mepc, new_epc); /* Also update mepc in the ISR frame on the stack! - * The ISR epilogue will restore mepc from the frame (offset 31*4 = - * 124 bytes). If we don't update the frame, mret will jump back to - * the ecall instruction! + * The ISR epilogue will restore mepc from the frame. If we don't + * update the frame, mret will jump back to the ecall instruction! */ - uint32_t *isr_frame = (uint32_t *) isr_sp; - isr_frame[31] = new_epc; + uint32_t *f = (uint32_t *) isr_sp; + f[FRAME_EPC] = new_epc; /* Invoke dispatcher for context switch - parameter 0 = from ecall, * don't increment ticks. @@ -355,6 +438,7 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp) uint32_t nibble = (epc >> i) & 0xF; _putchar(nibble < 10 ? '0' + nibble : 'A' + nibble - 10); } + trap_puts("\r\n"); hal_panic(); @@ -409,7 +493,9 @@ extern uint32_t _gp, _end; * 0: ra, 4: gp, 8: tp, 12: t0, ... 116: t6 * 120: mcause, 124: mepc */ -void *hal_build_initial_frame(void *stack_top, void (*task_entry)(void)) +void *hal_build_initial_frame(void *stack_top, + void (*task_entry)(void), + int user_mode) { #define INITIAL_STACK_RESERVE \ 256 /* Reserve space below stack_top for task startup */ @@ -432,11 +518,16 @@ void *hal_build_initial_frame(void *stack_top, void (*task_entry)(void)) /* Initialize critical registers for proper task startup: * - frame[1] = gp: Global pointer, required for accessing global variables * - frame[2] = tp: Thread pointer, required for thread-local storage - * - frame[31] = mepc: Task entry point, where mret will jump to + * - frame[32] = mepc: Task entry point, where mret will jump to */ - frame[1] = (uint32_t) &_gp; /* gp - global pointer */ - frame[2] = tp_val; /* tp - thread pointer */ - frame[31] = (uint32_t) task_entry; /* mepc - entry point */ + frame[1] = (uint32_t) &_gp; /* gp - global pointer */ + frame[2] = tp_val; /* tp - thread pointer */ + + uint32_t mstatus_val = MSTATUS_MIE | MSTATUS_MPIE | + (user_mode ? MSTATUS_MPP_USER : MSTATUS_MPP_MACH); + frame[FRAME_MSTATUS] = mstatus_val; /* mstatus - enable interrupts */ + + frame[FRAME_EPC] = (uint32_t) task_entry; /* mepc - entry point */ return (void *) frame; } @@ -696,8 +787,9 @@ static void __attribute__((naked, used)) __dispatch_init(void) "lw gp, 12*4(a0)\n" "lw tp, 13*4(a0)\n" "lw sp, 14*4(a0)\n" - "lw ra, 15*4(a0)\n" - "ret\n"); /* Jump to the task's entry point */ + "lw t0, 15*4(a0)\n" + "csrw mepc, t0\n" /* Load task entry point into mepc */ + "mret\n"); /* Jump to the task's entry point */ } /* Transfers control from the kernel's main thread to the first task */ @@ -721,12 +813,19 @@ __attribute__((noreturn)) void hal_dispatch_init(jmp_buf env) } /* Builds an initial 'jmp_buf' context for a brand-new task. - * @ctx : Pointer to the 'jmp_buf' to initialize (must be valid). - * @sp : Base address of the task's stack (must be valid). - * @ss : Total size of the stack in bytes (must be > ISR_STACK_FRAME_SIZE). - * @ra : The task's entry point function, used as the initial return address. + * @ctx : Pointer to the 'jmp_buf' to initialize (must be valid). + * @sp : Base address of the task's stack (must be valid). + * @ss : Total size of the stack in bytes (must be > + * ISR_STACK_FRAME_SIZE). + * @ra : The task's entry point function, used as the initial return + * address. + * @user_mode : Non-zero to initialize for user mode, zero for machine mode. */ -void hal_context_init(jmp_buf *ctx, size_t sp, size_t ss, size_t ra) +void hal_context_init(jmp_buf *ctx, + size_t sp, + size_t ss, + size_t ra, + int user_mode) { if (unlikely(!ctx || !sp || ss < (ISR_STACK_FRAME_SIZE + 64) || !ra)) hal_panic(); /* Invalid parameters - cannot safely initialize context */ @@ -759,12 +858,13 @@ void hal_context_init(jmp_buf *ctx, size_t sp, size_t ss, size_t ra) /* Set the essential registers for a new task: * - SP is set to the prepared top of the task's stack. * - RA is set to the task's entry point. - * - mstatus is set to enable interrupts and ensure machine mode. + * - mstatus is set to enable interrupts and configure privilege mode. * * When this context is first restored, the ret instruction will effectively * jump to this entry point, starting the task. */ (*ctx)[CONTEXT_SP] = (uint32_t) stack_top; (*ctx)[CONTEXT_RA] = (uint32_t) ra; - (*ctx)[CONTEXT_MSTATUS] = MSTATUS_MIE | MSTATUS_MPP_MACH; + (*ctx)[CONTEXT_MSTATUS] = MSTATUS_MIE | MSTATUS_MPIE | + (user_mode ? MSTATUS_MPP_USER : MSTATUS_MPP_MACH); } From cf21bdb378d972f6d2ecbcd989391ac9ef4c268c Mon Sep 17 00:00:00 2001 From: HeatCrab Date: Wed, 26 Nov 2025 09:38:06 +0800 Subject: [PATCH 4/9] Add user mode task spawning interface Kernel requires distinct privilege modes for kernel services and user applications. Return from trap instruction needs previous interrupt enable bit set to preserve interrupt state across privilege transitions. Parameterize context initialization to configure privilege mode during task creation. Set previous interrupt enable bit for correct interrupt behavior after mode transitions. Provide separate interface for spawning user mode tasks alongside existing kernel task interface. --- arch/riscv/hal.h | 18 ++++++++++++------ include/sys/task.h | 13 ++++++++++++- kernel/task.c | 19 ++++++++++++++++--- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/arch/riscv/hal.h b/arch/riscv/hal.h index 45a16409..e91c5512 100644 --- a/arch/riscv/hal.h +++ b/arch/riscv/hal.h @@ -105,15 +105,21 @@ void hal_timer_irq_disable( void hal_interrupt_tick(void); /* Enable interrupts on first task run */ void *hal_build_initial_frame( void *stack_top, - void (*task_entry)(void)); /* Build ISR frame for preemptive mode */ + void (*task_entry)(void), + int user_mode); /* Build ISR frame for preemptive mode */ /* Initializes the context structure for a new task. - * @ctx : Pointer to jmp_buf to initialize (must be non-NULL). - * @sp : Base address of the task's stack (must be valid). - * @ss : Total size of the stack in bytes (must be >= MIN_STACK_SIZE). - * @ra : The task's entry point function (must be non-NULL). + * @ctx : Pointer to jmp_buf to initialize (must be non-NULL). + * @sp : Base address of the task's stack (must be valid). + * @ss : Total size of the stack in bytes (must be >= MIN_STACK_SIZE). + * @ra : The task's entry point function (must be non-NULL). + * @user_mode : Non-zero to initialize for user mode, zero for machine mode. */ -void hal_context_init(jmp_buf *ctx, size_t sp, size_t ss, size_t ra); +void hal_context_init(jmp_buf *ctx, + size_t sp, + size_t ss, + size_t ra, + int user_mode); /* Halts the CPU in an unrecoverable error state, shutting down if possible */ void hal_panic(void); diff --git a/include/sys/task.h b/include/sys/task.h index 0d3aaa4d..ccf5f4fa 100644 --- a/include/sys/task.h +++ b/include/sys/task.h @@ -187,7 +187,7 @@ void _yield(void); /* Task Lifecycle Management */ -/* Creates and starts a new task. +/* Creates and starts a new task in machine mode. * @task_entry : Pointer to the task's entry function (void func(void)) * @stack_size : The desired stack size in bytes (minimum is enforced) * @@ -195,6 +195,17 @@ void _yield(void); */ int32_t mo_task_spawn(void *task_entry, uint16_t stack_size); +/* Creates and starts a new task in user mode. + * User mode tasks run with reduced privileges and must use syscalls to access + * kernel services. This provides memory protection and privilege separation. + * + * @task_entry : Pointer to the task's entry function (void func(void)) + * @stack_size : The desired stack size in bytes (minimum is enforced) + * + * Returns the new task's ID on success. Panics on memory allocation failure. + */ +int32_t mo_task_spawn_user(void *task_entry, uint16_t stack_size); + /* Cancels and removes a task from the system. A task cannot cancel itself. * @id : The ID of the task to cancel * diff --git a/kernel/task.c b/kernel/task.c index 1304a1ee..c9973e19 100644 --- a/kernel/task.c +++ b/kernel/task.c @@ -712,7 +712,10 @@ static bool init_task_stack(tcb_t *tcb, size_t stack_size) /* Task Management API */ -int32_t mo_task_spawn(void *task_entry, uint16_t stack_size_req) +/* Internal task spawning implementation with privilege mode control */ +static int32_t task_spawn_impl(void *task_entry, + uint16_t stack_size_req, + int user_mode) { if (!task_entry) panic(ERR_TCB_ALLOC); @@ -777,13 +780,13 @@ int32_t mo_task_spawn(void *task_entry, uint16_t stack_size_req) /* Initialize execution context outside critical section. */ hal_context_init(&tcb->context, (size_t) tcb->stack, new_stack_size, - (size_t) task_entry); + (size_t) task_entry, user_mode); /* Initialize SP for preemptive mode. * Build initial ISR frame on stack with mepc pointing to task entry. */ void *stack_top = (void *) ((uint8_t *) tcb->stack + new_stack_size); - tcb->sp = hal_build_initial_frame(stack_top, task_entry); + tcb->sp = hal_build_initial_frame(stack_top, task_entry, user_mode); printf("task %u: entry=%p stack=%p size=%u prio_level=%u time_slice=%u\n", tcb->id, task_entry, tcb->stack, (unsigned int) new_stack_size, @@ -796,6 +799,16 @@ int32_t mo_task_spawn(void *task_entry, uint16_t stack_size_req) return tcb->id; } +int32_t mo_task_spawn(void *task_entry, uint16_t stack_size_req) +{ + return task_spawn_impl(task_entry, stack_size_req, false); +} + +int32_t mo_task_spawn_user(void *task_entry, uint16_t stack_size_req) +{ + return task_spawn_impl(task_entry, stack_size_req, true); +} + int32_t mo_task_cancel(uint16_t id) { if (id == 0 || id == mo_task_id()) From 473a3d35f92706f4993d7018433b5c24b294bd4e Mon Sep 17 00:00:00 2001 From: HeatCrab Date: Wed, 26 Nov 2025 14:38:39 +0800 Subject: [PATCH 5/9] Fix preemptive mode task initialization The preemptive scheduler requires interrupt frame restoration during task startup to properly transition privilege modes. However, the dispatcher was initializing tasks using cooperative mode context structures, which lack the necessary state for privilege transitions. This mismatch caused privilege mode corruption and prevented tasks from executing correctly. The dispatcher initialization now selects the appropriate context type based on the active scheduler mode. For preemptive scheduling, the system restores the full interrupt frame and uses trap return instructions to transfer control with proper privilege level switching. The initial status register configuration has been adjusted to prevent interrupts from enabling prematurely during the restoration sequence, avoiding race conditions during task startup. --- arch/riscv/hal.c | 121 ++++++++++++++++++++++++++++++++++++++++------- arch/riscv/hal.h | 8 +++- kernel/main.c | 6 ++- 3 files changed, 116 insertions(+), 19 deletions(-) diff --git a/arch/riscv/hal.c b/arch/riscv/hal.c index 774855af..7ad5806f 100644 --- a/arch/riscv/hal.c +++ b/arch/riscv/hal.c @@ -523,9 +523,15 @@ void *hal_build_initial_frame(void *stack_top, frame[1] = (uint32_t) &_gp; /* gp - global pointer */ frame[2] = tp_val; /* tp - thread pointer */ - uint32_t mstatus_val = MSTATUS_MIE | MSTATUS_MPIE | - (user_mode ? MSTATUS_MPP_USER : MSTATUS_MPP_MACH); - frame[FRAME_MSTATUS] = mstatus_val; /* mstatus - enable interrupts */ + /* Initialize mstatus for new task: + * - MPIE=1: mret will copy this to MIE, enabling interrupts after task + * starts + * - MPP: Set privilege level (U-mode or M-mode) + * - MIE=0: Keep interrupts disabled during frame restoration + */ + uint32_t mstatus_val = + MSTATUS_MPIE | (user_mode ? MSTATUS_MPP_USER : MSTATUS_MPP_MACH); + frame[FRAME_MSTATUS] = mstatus_val; frame[FRAME_EPC] = (uint32_t) task_entry; /* mepc - entry point */ @@ -792,23 +798,102 @@ static void __attribute__((naked, used)) __dispatch_init(void) "mret\n"); /* Jump to the task's entry point */ } -/* Transfers control from the kernel's main thread to the first task */ -__attribute__((noreturn)) void hal_dispatch_init(jmp_buf env) +/* Low-level routine to restore context from ISR frame and jump to task. + * This is used in preemptive mode where tasks are managed via ISR frames. + */ +static void __attribute__((naked, used)) __dispatch_init_isr(void) { - if (unlikely(!env)) + asm volatile( + /* a0 contains the ISR frame pointer (sp value) */ + "mv sp, a0\n" + + /* Restore mstatus from frame[32] */ + "lw t0, 32*4(sp)\n" + "csrw mstatus, t0\n" + + /* Restore mepc from frame[31] */ + "lw t1, 31*4(sp)\n" + "csrw mepc, t1\n" + + /* Restore all general-purpose registers */ + "lw ra, 0*4(sp)\n" + "lw gp, 1*4(sp)\n" + "lw tp, 2*4(sp)\n" + "lw t0, 3*4(sp)\n" + "lw t1, 4*4(sp)\n" + "lw t2, 5*4(sp)\n" + "lw s0, 6*4(sp)\n" + "lw s1, 7*4(sp)\n" + "lw a0, 8*4(sp)\n" + "lw a1, 9*4(sp)\n" + "lw a2, 10*4(sp)\n" + "lw a3, 11*4(sp)\n" + "lw a4, 12*4(sp)\n" + "lw a5, 13*4(sp)\n" + "lw a6, 14*4(sp)\n" + "lw a7, 15*4(sp)\n" + "lw s2, 16*4(sp)\n" + "lw s3, 17*4(sp)\n" + "lw s4, 18*4(sp)\n" + "lw s5, 19*4(sp)\n" + "lw s6, 20*4(sp)\n" + "lw s7, 21*4(sp)\n" + "lw s8, 22*4(sp)\n" + "lw s9, 23*4(sp)\n" + "lw s10, 24*4(sp)\n" + "lw s11, 25*4(sp)\n" + "lw t3, 26*4(sp)\n" + "lw t4, 27*4(sp)\n" + "lw t5, 28*4(sp)\n" + "lw t6, 29*4(sp)\n" + + /* Deallocate stack frame */ + "addi sp, sp, %0\n" + + /* Return from trap - jump to task entry point */ + "mret\n" + : + : "i"(ISR_STACK_FRAME_SIZE) + : "memory"); +} + +/* Transfers control from the kernel's main thread to the first task. + * In preemptive mode, ctx should be the ISR frame pointer (void *sp). + * In cooperative mode, ctx should be the jmp_buf context. + */ +__attribute__((noreturn)) void hal_dispatch_init(void *ctx) +{ + if (unlikely(!ctx)) hal_panic(); /* Cannot proceed without valid context */ - if (kcb->preemptive) - hal_timer_enable(); + if (kcb->preemptive) { + /* Preemptive mode: ctx is ISR frame pointer, restore from it. + * Enable timer before jumping to task. Global interrupts will be + * enabled by mret based on MPIE bit in restored mstatus. + */ + /* Save ctx before hal_timer_enable modifies registers */ + void *saved_ctx = ctx; - _ei(); /* Enable global interrupts just before launching the first task */ + hal_timer_enable(); - asm volatile( - "mv a0, %0\n" /* Move @env (the task's context) into 'a0' */ - "call __dispatch_init\n" /* Call the low-level restore routine */ - : - : "r"(env) - : "a0", "memory"); + /* Restore ISR frame pointer and call dispatch */ + asm volatile( + "mv a0, %0\n" /* Load ISR frame pointer into a0 */ + "call __dispatch_init_isr\n" /* Restore from ISR frame */ + : + : "r"(saved_ctx) + : "a0", "memory"); + } else { + /* Cooperative mode: ctx is jmp_buf, use standard dispatch */ + _ei(); /* Enable global interrupts */ + + asm volatile( + "mv a0, %0\n" /* Move @env (the task's context) into 'a0' */ + "call __dispatch_init\n" /* Call the low-level restore routine */ + : + : "r"(ctx) + : "a0", "memory"); + } __builtin_unreachable(); } @@ -865,6 +950,8 @@ void hal_context_init(jmp_buf *ctx, */ (*ctx)[CONTEXT_SP] = (uint32_t) stack_top; (*ctx)[CONTEXT_RA] = (uint32_t) ra; - (*ctx)[CONTEXT_MSTATUS] = MSTATUS_MIE | MSTATUS_MPIE | - (user_mode ? MSTATUS_MPP_USER : MSTATUS_MPP_MACH); + /* Note: CONTEXT_MSTATUS not used in cooperative mode (setjmp/longjmp), + * but set it for consistency with ISR frame initialization */ + (*ctx)[CONTEXT_MSTATUS] = + MSTATUS_MPIE | (user_mode ? MSTATUS_MPP_USER : MSTATUS_MPP_MACH); } diff --git a/arch/riscv/hal.h b/arch/riscv/hal.h index e91c5512..7946a0fe 100644 --- a/arch/riscv/hal.h +++ b/arch/riscv/hal.h @@ -74,7 +74,13 @@ void longjmp(jmp_buf env, int32_t val); /* HAL context switching routines for complete context management */ int32_t hal_context_save(jmp_buf env); void hal_context_restore(jmp_buf env, int32_t val); -void hal_dispatch_init(jmp_buf env); + +/* Transfers control from kernel main thread to the first task. + * In preemptive mode, ctx should be the ISR frame pointer (void *sp). + * In cooperative mode, ctx should be the jmp_buf context. + * @ctx : ISR frame pointer (preemptive) or jmp_buf (cooperative). + */ +void hal_dispatch_init(void *ctx); /* Stack switching for preemptive context switch. * Saves current SP to *old_sp and loads new SP from new_sp. diff --git a/kernel/main.c b/kernel/main.c index ce0dc08a..0015dca7 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -72,7 +72,11 @@ int32_t main(void) */ scheduler_started = true; - hal_dispatch_init(first_task->context); + /* In preemptive mode, tasks are managed via ISR frames (sp). + * In cooperative mode, tasks are managed via jmp_buf (context). + */ + void *ctx = kcb->preemptive ? first_task->sp : first_task->context; + hal_dispatch_init(ctx); /* This line should be unreachable. */ panic(ERR_UNKNOWN); From 7cfa23be410c03be556f00634ab9930a068c9f3c Mon Sep 17 00:00:00 2001 From: HeatCrab Date: Wed, 26 Nov 2025 15:08:21 +0800 Subject: [PATCH 6/9] Add U-mode safe output syscall User mode tasks cannot directly use the standard output functions because the logger system requires privileged operations for synchronization. When user mode code attempts these operations, the processor triggers illegal instruction exceptions that prevent normal execution. To address this limitation, a new system call interface provides safe output capabilities for user mode tasks. The implementation splits the work between user and machine modes: formatting occurs in user space using only unprivileged operations, while the actual output is performed through a system call that executes in machine mode where privileged operations are permitted. The kernel handles all synchronization and hardware access transparently, allowing user mode tasks to produce output without violating privilege boundaries. --- include/lib/libc.h | 6 ++++++ include/sys/syscall.h | 3 ++- kernel/syscall.c | 18 ++++++++++++++++++ lib/stdio.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/include/lib/libc.h b/include/lib/libc.h index 43ef61f5..12f6f35d 100644 --- a/include/lib/libc.h +++ b/include/lib/libc.h @@ -153,3 +153,9 @@ char *getline(char *s); int32_t printf(const char *fmt, ...); int32_t snprintf(char *str, size_t size, const char *fmt, ...); int vsnprintf(char *str, size_t size, const char *fmt, va_list args); + +/* User mode safe formatted output. + * Similar to printf but safe for U-mode tasks - formats in user space + * then uses syscall to output, avoiding privilege violations. + */ +int32_t umode_printf(const char *fmt, ...); diff --git a/include/sys/syscall.h b/include/sys/syscall.h index 6d62c1f9..218e4b66 100644 --- a/include/sys/syscall.h +++ b/include/sys/syscall.h @@ -48,7 +48,8 @@ _(twfi, 40, int, (void) ) \ _(tcount, 41, int, (void) ) \ _(ticks, 42, int, (void) ) \ - _(uptime, 43, int, (void) ) + _(uptime, 43, int, (void) ) \ + _(tputs, 44, int, (const char *str)) /* Generate enumeration of system call numbers */ #define _(name, num, rettype, arglist) SYS_##name = num, diff --git a/kernel/syscall.c b/kernel/syscall.c index 7aef52ee..7be66632 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -382,3 +382,21 @@ int sys_uptime(void) { return syscall(SYS_uptime, 0, 0, 0); } + +/* User mode safe output syscall. + * Outputs a string from user mode by executing puts() in kernel context. + * This avoids privilege violations from printf's logger mutex operations. + */ +static int _tputs(const char *str) +{ + if (unlikely(!str)) + return -EINVAL; + + /* Use puts() which will handle logger enqueue or direct output */ + return puts(str); +} + +int sys_tputs(const char *str) +{ + return syscall(SYS_tputs, (void *) str, 0, 0); +} diff --git a/lib/stdio.c b/lib/stdio.c index f5eec7d7..5f02e14b 100644 --- a/lib/stdio.c +++ b/lib/stdio.c @@ -446,3 +446,32 @@ char *getline(char *s) return s; } + +/* User mode safe formatted output. + * Formats string in user space using vsnprintf, then outputs via syscall. + * This avoids privilege violations from printf's logger mutex operations. + * + * Safe for U-mode tasks because: + * 1. vsnprintf runs in user space (no privileged operations) + * 2. sys_tputs syscall transitions to M-mode for actual output + * 3. Kernel handles logger mutex/direct output decision + */ +int32_t umode_printf(const char *fmt, ...) +{ + char buf[256]; /* Stack buffer for formatted output */ + va_list args; + + va_start(args, fmt); + int32_t len = vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + /* Handle vsnprintf error */ + if (len < 0) + return len; + + /* Output via syscall - safe for U-mode */ + extern int sys_tputs(const char *str); + sys_tputs(buf); + + return len; +} From 3830d80b24775f1e25d2ea6259a33af38bbc0401 Mon Sep 17 00:00:00 2001 From: HeatCrab Date: Wed, 26 Nov 2025 15:16:05 +0800 Subject: [PATCH 7/9] Add U-mode validation test This test application validates both the system call interface and privilege isolation mechanisms in a two-phase approach. The first phase verifies that system calls execute correctly from user mode. It invokes several read-only system calls to confirm that the trap-based calling convention functions properly and that return values propagate correctly across privilege boundaries. All output uses the safe user mode output interface to avoid triggering privilege violations during the test itself. The second phase validates security isolation by deliberately attempting to execute a privileged instruction from user mode. The test expects this to trigger an illegal instruction exception, confirming that the hardware properly enforces privilege restrictions. When the exception occurs as expected, it demonstrates that user mode code cannot bypass the privilege system to access machine mode resources. This intentional test failure is the correct outcome and proves the isolation mechanism works as designed. --- app/umode.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 app/umode.c diff --git a/app/umode.c b/app/umode.c new file mode 100644 index 00000000..518e111d --- /dev/null +++ b/app/umode.c @@ -0,0 +1,75 @@ +#include + +/* U-mode Validation Task + * + * Integrates two tests into a single task flow to ensure sequential execution: + * 1. Phase 1: Mechanism Check - Verify syscalls work. + * 2. Phase 2: Security Check - Verify privileged instructions trigger a trap. + */ +void umode_validation_task(void) +{ + /* --- Phase 1: Mechanism Check (Syscalls) --- */ + umode_printf("[umode] Phase 1: Testing Syscall Mechanism\n"); + + /* Test 1: sys_tid() - Simplest read-only syscall. */ + int my_tid = sys_tid(); + if (my_tid > 0) { + umode_printf("[umode] PASS: sys_tid() returned %d\n", my_tid); + } else { + umode_printf("[umode] FAIL: sys_tid() failed (ret=%d)\n", my_tid); + } + + /* Test 2: sys_uptime() - Verify value transmission is correct. */ + int uptime = sys_uptime(); + if (uptime >= 0) { + umode_printf("[umode] PASS: sys_uptime() returned %d\n", uptime); + } else { + umode_printf("[umode] FAIL: sys_uptime() failed (ret=%d)\n", uptime); + } + + /* Note: Skipping sys_tadd for now, as kernel user pointer checks might + * block function pointers in the .text segment, avoiding distraction. + */ + + /* --- Phase 2: Security Check (Privileged Access) --- */ + umode_printf("[umode] ========================================\n"); + umode_printf("[umode] Phase 2: Testing Security Isolation\n"); + umode_printf( + "[umode] Action: Attempting to read 'mstatus' CSR from U-mode.\n"); + umode_printf("[umode] Expect: Kernel Panic with 'Illegal instruction'.\n"); + umode_printf("[umode] ========================================\n"); + + /* CRITICAL: Delay before suicide to ensure logs are flushed from + * buffer to UART. + */ + sys_tdelay(10); + + /* Privileged Instruction Trigger */ + uint32_t mstatus; + asm volatile("csrr %0, mstatus" : "=r"(mstatus)); + + /* If execution reaches here, U-mode isolation failed (still has + * privileges). + */ + umode_printf( + "[umode] FAIL: Privileged instruction executed! (mstatus=0x%lx)\n", + (long) mstatus); + + /* Spin loop to prevent further execution. */ + while (1) + sys_tyield(); +} + +int32_t app_main(void) +{ + umode_printf("[Kernel] Spawning U-mode validation task...\n"); + + /* app_main is called from kernel context during bootstrap. + * Use mo_task_spawn_user to create the validation task in user mode. + * This ensures privilege isolation is properly tested. + */ + mo_task_spawn_user(umode_validation_task, DEFAULT_STACK_SIZE); + + /* Return 1 to enable preemptive scheduler */ + return 1; +} From f032cc026df6c3f88b3d0db3fae3a97ba0525a0c Mon Sep 17 00:00:00 2001 From: HeatCrab Date: Wed, 26 Nov 2025 21:00:37 +0800 Subject: [PATCH 8/9] Add U-mode test to functional test suite The user mode validation test intentionally triggers an illegal instruction exception to verify privilege isolation, which would normally be classified as a test failure in the standard application test suite. This test has been moved to the functional test suite where its expected behavior can be properly validated. The application test suite now excludes this test to avoid false negatives. The functional test suite has been updated to recognize the expected privilege violation as a valid success criterion alongside the syscall mechanism validation. The crash detection logic now permits expected exceptions for tests that intentionally verify security boundaries. --- .ci/run-app-tests.sh | 2 +- .ci/run-functional-tests.sh | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.ci/run-app-tests.sh b/.ci/run-app-tests.sh index dcf4f040..27db98c1 100755 --- a/.ci/run-app-tests.sh +++ b/.ci/run-app-tests.sh @@ -47,7 +47,7 @@ else fi # Filter excluded apps -EXCLUDED_APPS="" +EXCLUDED_APPS="umode" if [ -n "$EXCLUDED_APPS" ]; then FILTERED_APPS="" for app in $APPS; do diff --git a/.ci/run-functional-tests.sh b/.ci/run-functional-tests.sh index 14b1afed..e66de5af 100755 --- a/.ci/run-functional-tests.sh +++ b/.ci/run-functional-tests.sh @@ -11,6 +11,7 @@ TOOLCHAIN_TYPE=${TOOLCHAIN_TYPE:-gnu} declare -A FUNCTIONAL_TESTS FUNCTIONAL_TESTS["mutex"]="Fairness: PASS,Mutual Exclusion: PASS,Data Consistency: PASS,Overall: PASS" FUNCTIONAL_TESTS["semaphore"]="Overall: PASS" +FUNCTIONAL_TESTS["umode"]="PASS: sys_tid() returned,PASS: sys_uptime() returned,[EXCEPTION] Illegal instruction" #FUNCTIONAL_TESTS["test64"]="Unsigned Multiply: PASS,Unsigned Divide: PASS,Signed Multiply: PASS,Signed Divide: PASS,Left Shifts: PASS,Logical Right Shifts: PASS,Arithmetic Right Shifts: PASS,Overall: PASS" #FUNCTIONAL_TESTS["suspend"]="Suspend: PASS,Resume: PASS,Self-Suspend: PASS,Overall: PASS" @@ -75,7 +76,8 @@ test_functional_app() { IFS=',' read -ra PASS_CRITERIA <<< "$expected_passes" # Check for crashes first - if echo "$output" | grep -qiE "(trap|exception|fault|panic|illegal|segfault)"; then + # Special case: umode test expects an illegal instruction exception + if [ "$test" != "umode" ] && echo "$output" | grep -qiE "(trap|exception|fault|panic|illegal|segfault)"; then echo "[!] Crash detected" # Mark all criteria as crashed From 814b636cbbb63d788daab899401113cd99542078 Mon Sep 17 00:00:00 2001 From: HeatCrab Date: Tue, 2 Dec 2025 10:44:05 +0800 Subject: [PATCH 9/9] Update HAL documentation for dual-mode support The hardware abstraction layer now supports both cooperative and preemptive scheduling modes with distinct context management approaches. The documentation has been updated to reflect these architectural differences and their implications for task initialization and privilege management. The interrupt frame structure preserves complete trap context with 33 words for register state and control registers, plus 12 bytes of padding to maintain 16-byte alignment, totaling 144 bytes. This frame supports both interrupt handling and initial task setup for preemptive scheduling, where tasks launch through trap return rather than standard function calls. Task initialization varies between modes. Cooperative mode uses lightweight context structures containing only callee-saved registers for voluntary yielding. Preemptive mode builds complete interrupt frames with all registers initialized to zero, global and thread pointers configured, and processor state set for proper privilege transitions. The frame is positioned with a 256-byte initial stack reserve below the stack top to accommodate startup requirements. The dispatcher initialization process differs for each scheduling mode. Cooperative tasks transfer control through standard calling conventions with global interrupts enabled before execution. Preemptive tasks restore interrupt frames and execute trap return instructions, allowing hardware to transition to the configured privilege level and enable interrupts based on the saved processor state. The system call interface operates through the RISC-V trap mechanism for privilege boundary crossing. User mode tasks invoke kernel services using environment call instructions that trigger synchronous exceptions. The trap handler preserves all registers except the return value, maintaining standard calling convention semantics across the privilege boundary while the kernel validates parameters and mediates access to protected resources. --- Documentation/hal-calling-convention.md | 37 ++++++++---- Documentation/hal-riscv-context-switch.md | 74 +++++++++++++++++++++-- 2 files changed, 93 insertions(+), 18 deletions(-) diff --git a/Documentation/hal-calling-convention.md b/Documentation/hal-calling-convention.md index d29fa643..6df5017f 100644 --- a/Documentation/hal-calling-convention.md +++ b/Documentation/hal-calling-convention.md @@ -109,13 +109,14 @@ void hal_context_restore(jmp_buf env, int32_t val); /* Restore context + process The ISR in `boot.c` performs a complete context save of all registers: ``` -Stack Frame Layout (128 bytes, offsets from sp): +Stack Frame Layout (144 bytes, 33 words × 4 bytes, offsets from sp): 0: ra, 4: gp, 8: tp, 12: t0, 16: t1, 20: t2 - 24: s0, 28: s1, 32: a0, 36: a1, 40: a2, 44: a3 + 24: s0, 28: s1, 32: a0, 36: a1, 40: a2, 44: a3 48: a4, 52: a5, 56: a6, 60: a7, 64: s2, 68: s3 72: s4, 76: s5, 80: s6, 84: s7, 88: s8, 92: s9 96: s10, 100:s11, 104:t3, 108: t4, 112: t5, 116: t6 -120: mcause, 124: mepc +120: mcause, 124: mepc, 128: mstatus +132-143: padding (12 bytes for 16-byte alignment) ``` Why full context save in ISR? @@ -128,7 +129,7 @@ Why full context save in ISR? Each task stack must reserve space for the ISR frame: ```c -#define ISR_STACK_FRAME_SIZE 128 /* 32 registers × 4 bytes */ +#define ISR_STACK_FRAME_SIZE 144 /* 33 words × 4 bytes, 16-byte aligned */ ``` This "red zone" is reserved at the top of every task stack to guarantee ISR safety. @@ -147,10 +148,20 @@ int32_t result = mo_task_spawn(task_function, 2048); ### System Call Interface -Linmo uses standard function calls (not trap instructions) for system services: -- Arguments passed in `a0-a7` registers -- Return values in `a0` -- No special calling convention required +Linmo provides system calls through the RISC-V trap mechanism for privilege +boundary crossing. User mode tasks invoke system calls using the environment +call instruction, which triggers a synchronous exception handled by the kernel. + +System call convention: +- Arguments passed in `a0-a7` registers before trap +- System call number in `a7` register +- Trap handler preserves all registers except return value +- Return value delivered in `a0` register after trap return +- Standard RISC-V calling convention maintained across privilege boundary + +The trap-based interface allows user mode tasks to safely access kernel +services without requiring privileged instruction execution. The kernel +validates all parameters and mediates access to protected resources. ### Task Entry Points @@ -174,9 +185,9 @@ Each task has its own stack with this layout: ``` High Address -+------------------+ <- stack_base + stack_size -| ISR Red Zone | <- 128 bytes reserved for ISR -| (128 bytes) | ++------------------+ <- stack_base + stack_size +| ISR Red Zone | <- 144 bytes reserved for ISR +| (144 bytes) | +------------------+ <- Initial SP (16-byte aligned) | | | Task Stack | <- Grows downward @@ -251,8 +262,8 @@ Minimal context (jmp_buf): - 17 × 32-bit loads/stores = 68 bytes - Essential for cooperative scheduling -Full context (ISR): -- 32 × 32-bit loads/stores = 128 bytes +Full context (ISR): +- 33 × 32-bit loads/stores = 144 bytes (includes padding for alignment) - Required for preemptive interrupts ### Function Call Overhead diff --git a/Documentation/hal-riscv-context-switch.md b/Documentation/hal-riscv-context-switch.md index a7c120d7..f66a41f4 100644 --- a/Documentation/hal-riscv-context-switch.md +++ b/Documentation/hal-riscv-context-switch.md @@ -96,7 +96,11 @@ State Preservation: - Nested interrupts are handled correctly by hardware's automatic state stacking ### Task Initialization -New tasks are initialized with proper processor state: +Task initialization differs between cooperative and preemptive modes due to +their distinct context management approaches. + +In cooperative mode, tasks use lightweight context structures for voluntary +yielding. New tasks are initialized with execution context only: ```c void hal_context_init(jmp_buf *ctx, size_t sp, size_t ss, size_t ra) @@ -109,7 +113,58 @@ void hal_context_init(jmp_buf *ctx, size_t sp, size_t ss, size_t ra) } ``` -This ensures new tasks start with interrupts enabled in machine mode. +This lightweight approach uses standard calling conventions where tasks +return control through normal function returns. + +Preemptive mode requires interrupt frame structures to support trap-based +context switching and privilege mode transitions. Task initialization builds +a complete interrupt service routine frame: + +```c +void *hal_build_initial_frame(void *stack_top, + void (*task_entry)(void), + int user_mode) +{ + /* Place frame in stack with initial reserve below for proper startup */ + uint32_t *frame = (uint32_t *) ((uint8_t *) stack_top - 256 - + ISR_STACK_FRAME_SIZE); + + /* Initialize all general purpose registers to zero */ + for (int i = 0; i < 32; i++) + frame[i] = 0; + + /* Compute thread pointer: aligned to 64 bytes from _end */ + uint32_t tp_val = ((uint32_t) &_end + 63) & ~63U; + + /* Set essential pointers */ + frame[FRAME_GP] = (uint32_t) &_gp; /* Global pointer */ + frame[FRAME_TP] = tp_val; /* Thread pointer */ + + /* Configure processor state for task entry: + * - MPIE=1: Interrupts will enable when task starts + * - MPP: Target privilege level (user or machine mode) + * - MIE=0: Keep interrupts disabled during frame restoration + */ + uint32_t mstatus_val = + MSTATUS_MPIE | (user_mode ? MSTATUS_MPP_USER : MSTATUS_MPP_MACH); + frame[FRAME_MSTATUS] = mstatus_val; + + /* Set entry point */ + frame[FRAME_EPC] = (uint32_t) task_entry; + + return frame; /* Return frame base as initial stack pointer */ +} +``` + +The interrupt frame layout reserves space for all register state, control +registers, and alignment padding. When the scheduler first dispatches this +task, the trap return mechanism restores the frame and transfers control to +the entry point with the configured privilege level. + +Key differences from cooperative mode include full register state allocation +rather than minimal callee-saved registers, trap return semantics rather than +function return, support for privilege level transitions through MPP +configuration, and proper interrupt state initialization through MPIE bit. ## Implementation Details @@ -168,10 +223,19 @@ New Task Creation: 4. Processor state initialized with interrupts enabled First Task Launch: -1. `hal_dispatch_init` transfers control from kernel to first task + +**Cooperative Mode**: +1. `hal_dispatch_init` receives lightweight context structure 2. Global interrupts enabled just before task execution -3. Timer interrupts activated for preemptive scheduling -4. Task begins execution at its entry point +3. Control transfers to first task through standard function call +4. Task begins execution and voluntarily yields control + +**Preemptive Mode**: +1. `hal_dispatch_init` receives interrupt frame pointer +2. Timer interrupt enabled for periodic preemption +3. Dispatcher loads frame and executes trap return instruction +4. Hardware restores registers and transitions to configured privilege level +5. Task begins execution and can be preempted by timer Context Switch Cycle: 1. Timer interrupt triggers scheduler entry