cachestore dump task

mag1c-h · mag1c-h · commit 6347050d90ec · 2025-12-10T16:39:38.000+08:00
diff --git a/ucm/store/cache/cc/dump_queue.cc b/ucm/store/cache/cc/dump_queue.cc
@@ -0,0 +1,185 @@
+/**
+ * MIT License
+ *
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ * */
+#include "dump_queue.h"
+#include "logger/logger.h"
+#include "trans/device.h"
+
+namespace UC::CacheStore {
+
+static constexpr size_t spinThreshold = 1000;
+static constexpr auto tryPopInterval = std::chrono::microseconds(100);
+
+DumpQueue::~DumpQueue()
+{
+    stop_.store(true);
+    if (dispatcher_.joinable()) { dispatcher_.join(); }
+    if (dumper_.joinable()) { dumper_.join(); }
+}
+
+Status DumpQueue::Setup(const Config& config, TaskIdSet* failureSet, TransBuffer* buffer)
+{
+    failureSet_ = failureSet;
+    buffer_ = buffer;
+    backend_ = static_cast<Store*>((void*)config.backend);
+    dumper_ = std::thread{&DumpQueue::BackendDumpStage, this};
+    std::promise<Status> started;
+    auto fut = started.get_future();
+    dispatcher_ = std::thread{&DumpQueue::DispatchStage, this, config.deviceId, config.tensorSize,
+                              std::ref(started)};
+    return fut.get();
+}
+
+void DumpQueue::Submit(TaskPtr task, WaiterPtr waiter)
+{
+    waiter->Up();
+    auto success = waiting_.TryPush({task, waiter});
+    if (success) { return; }
+    UC_ERROR("Waiting queue full, submit dump task({}) failed.", task->id);
+    failureSet_->Insert(task->id);
+    waiter->Done();
+}
+
+void DumpQueue::DispatchStage(int32_t deviceId, size_t tensorSize, std::promise<Status>& started)
+{
+    Trans::Device device;
+    auto s = device.Setup(deviceId);
+    if (s.Failure()) [[unlikely]] {
+        UC_ERROR("Failed({}) to setup device({}).", s, deviceId);
+        started.set_value(s);
+        return;
+    }
+    auto stream = device.MakeStream();
+    if (!stream) [[unlikely]] {
+        UC_ERROR("Failed to make stream on device({}).", deviceId);
+        started.set_value(Status::Error());
+        return;
+    }
+    started.set_value(Status::OK());
+    size_t spinCount = 0;
+    TaskPair pair;
+    while (!stop_.load(std::memory_order_acquire)) {
+        if (waiting_.TryPop(pair)) {
+            spinCount = 0;
+            DispatchOneTask(stream.get(), tensorSize, std::move(pair.first),
+                            std::move(pair.second));
+        } else {
+            if (++spinCount < spinThreshold) {
+                std::this_thread::yield();
+            } else {
+                std::this_thread::sleep_for(tryPopInterval);
+                spinCount = 0;
+            }
+        }
+    }
+}
+
+void DumpQueue::DispatchOneTask(Trans::Stream* stream, size_t tensorSize, TaskPtr task,
+                                WaiterPtr waiter)
+{
+    if (!failureSet_->Contains(task->id)) {
+        auto s = DumpOneTask(stream, tensorSize, task);
+        if (s.Failure()) [[unlikely]] { failureSet_->Insert(task->id); }
+    }
+    waiter->Done();
+}
+
+Status DumpQueue::DumpOneTask(Trans::Stream* stream, size_t tensorSize, TaskPtr task)
+{
+    Detail::TaskDesc backendTaskDesc;
+    backendTaskDesc.brief = "Cache2Backend";
+    const auto nShard = task->desc.size();
+    std::vector<size_t> backendTaskIndex;
+    backendTaskIndex.reserve(nShard);
+    std::vector<ShardTask> shardTasks(nShard);
+    for (size_t i = 0; i < task->desc.size(); i++) {
+        auto& shard = task->desc[i];
+        auto& shardTask = shardTasks[i];
+        shardTask.bufferHandle = buffer_->Get(shard.owner, shard.index);
+        if (!shardTask.bufferHandle.Owner()) { continue; }
+        if (!shardTask.bufferHandle.Ready()) {
+            auto s = stream->DeviceToHostAsync(shard.addrs.data(), shardTask.bufferHandle.Data(),
+                                               tensorSize, shard.addrs.size());
+            if (s.Failure()) [[unlikely]] {
+                UC_ERROR("Failed({}) to do D2H({}) batch({}) async.", s, tensorSize,
+                         shard.addrs.size());
+                return s;
+            }
+        }
+        backendTaskDesc.push_back(
+            Detail::Shard{shard.owner, shard.index, {shardTask.bufferHandle.Data()}});
+        backendTaskIndex.emplace_back(i);
+    }
+    if (backendTaskIndex.empty()) { return Status::OK(); }
+    auto s = stream->Synchronized();
+    if (s.Failure()) [[unlikely]] {
+        UC_ERROR("Failed({}) to sync on stream.", s);
+        return s;
+    }
+    for (const auto& i : backendTaskIndex) { shardTasks[i].bufferHandle.MarkReady(); }
+    auto res = backend_->Dump(std::move(backendTaskDesc));
+    if (!res) [[unlikely]] {
+        UC_ERROR("Failed({}) to submit dump task to backend.", res.Error());
+        return res.Error();
+    }
+    for (const auto& i : backendTaskIndex) {
+        auto& shardTask = shardTasks[i];
+        shardTask.backendTaskHandle = res.Value();
+        dumping_.Push(std::move(shardTask));
+    }
+    return Status::OK();
+}
+
+void DumpQueue::BackendDumpStage()
+{
+    size_t spinCount = 0;
+    ShardTask task;
+    while (!stop_.load(std::memory_order_acquire)) {
+        if (dumping_.TryPop(task)) {
+            spinCount = 0;
+            HandleOneShardTask(task);
+        } else {
+            if (++spinCount < spinThreshold) {
+                std::this_thread::yield();
+            } else {
+                std::this_thread::sleep_for(tryPopInterval);
+                spinCount = 0;
+            }
+        }
+    }
+}
+
+void DumpQueue::HandleOneShardTask(ShardTask& task)
+{
+    static Detail::TaskHandle finishedBackendTaskHandle = 0;
+    if (task.backendTaskHandle > finishedBackendTaskHandle) {
+        auto s = backend_->Wait(task.backendTaskHandle);
+        if (s.Failure()) {
+            UC_ERROR("Failed({}) to wait backend task({}).", s, task.backendTaskHandle);
+            return;
+        }
+        finishedBackendTaskHandle = task.backendTaskHandle;
+    }
+}
+
+}  // namespace UC::CacheStore
diff --git a/ucm/store/cache/cc/dump_queue.h b/ucm/store/cache/cc/dump_queue.h
@@ -0,0 +1,74 @@
+/**
+ * MIT License
+ *
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ * */
+#ifndef UNIFIEDCACHE_CACHE_STORE_CC_DUMP_QUEUE_H
+#define UNIFIEDCACHE_CACHE_STORE_CC_DUMP_QUEUE_H
+
+#include <future>
+#include <thread>
+#include "template/hashset.h"
+#include "template/spsc_ring_queue.h"
+#include "thread/latch.h"
+#include "trans/stream.h"
+#include "trans_buffer.h"
+#include "trans_task.h"
+#include "ucmstore.h"
+
+namespace UC::CacheStore {
+
+class DumpQueue {
+    using TaskPtr = std::shared_ptr<TransTask>;
+    using WaiterPtr = std::shared_ptr<Latch>;
+    using TaskPair = std::pair<TaskPtr, WaiterPtr>;
+    using TaskIdSet = HashSet<Detail::TaskHandle>;
+    struct ShardTask {
+        Detail::TaskHandle backendTaskHandle;
+        TransBuffer::Handle bufferHandle;
+    };
+
+private:
+    alignas(64) std::atomic_bool stop_{false};
+    TaskIdSet* failureSet_{nullptr};
+    TransBuffer* buffer_{nullptr};
+    Store* backend_{nullptr};
+    SpscRingQueue<TaskPair, 1024> waiting_;
+    SpscRingQueue<ShardTask, 65536> dumping_;
+    std::thread dispatcher_;
+    std::thread dumper_;
+
+public:
+    ~DumpQueue();
+    Status Setup(const Config& config, TaskIdSet* failureSet, TransBuffer* buffer);
+    void Submit(TaskPtr task, WaiterPtr waiter);
+
+private:
+    void DispatchStage(int32_t deviceId, size_t tensorSize, std::promise<Status>& started);
+    void DispatchOneTask(Trans::Stream* stream, size_t tensorSize, TaskPtr task, WaiterPtr waiter);
+    Status DumpOneTask(Trans::Stream* stream, size_t tensorSize, TaskPtr task);
+    void BackendDumpStage();
+    void HandleOneShardTask(ShardTask& task);
+};
+
+}  // namespace UC::CacheStore
+
+#endif
diff --git a/ucm/store/cache/cc/load_queue.cc b/ucm/store/cache/cc/load_queue.cc
@@ -52,7 +52,7 @@ void LoadQueue::Submit(TaskPtr task, WaiterPtr waiter)
     waiter->Up();
     auto success = waiting_.TryPush({task, waiter});
     if (success) { return; }
-    UC_ERROR("Waiting queue full, submit task({}) failed.", task->id);
+    UC_ERROR("Waiting queue full, submit load task({}) failed.", task->id);
     failureSet_->Insert(task->id);
     waiter->Done();
 }

Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,7 @@ void LoadQueue::Submit(TaskPtr task, WaiterPtr waiter)`
`52`	`52`	`waiter->Up();`
`53`	`53`	`auto success = waiting_.TryPush({task, waiter});`
`54`	`54`	`if (success) { return; }`
`55`		`- UC_ERROR("Waiting queue full, submit task({}) failed.", task->id);`
	`55`	`+ UC_ERROR("Waiting queue full, submit load task({}) failed.", task->id);`
`56`	`56`	`failureSet_->Insert(task->id);`
`57`	`57`	`waiter->Done();`
`58`	`58`	`}`