ModelEngine-Group
diff --git a/‎.github/workflows/unifiedcache_test.yml‎
Lines changed: 0 additions & 37 deletions b/‎.github/workflows/unifiedcache_test.yml‎
Lines changed: 0 additions & 37 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎MANIFEST.in‎
Lines changed: 7 additions & 2 deletions b/‎MANIFEST.in‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 6 additions & 6 deletions b/‎README.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎docs/source/getting-started/installation_gpu.md‎
Lines changed: 0 additions & 86 deletions b/‎docs/source/getting-started/installation_gpu.md‎
Lines changed: 0 additions & 86 deletions
diff --git a/‎docs/source/getting-started/installation_npu.md‎
Lines changed: 0 additions & 88 deletions b/‎docs/source/getting-started/installation_npu.md‎
Lines changed: 0 additions & 88 deletions
diff --git a/‎docs/source/getting-started/quick_start.md‎
Lines changed: 0 additions & 119 deletions b/‎docs/source/getting-started/quick_start.md‎
Lines changed: 0 additions & 119 deletions
@@ -18,40 +18,3 @@ jobs:
 
   call-lint:
     uses: ./.github/workflows/pre-commit.yml
-
-  unit-test:
-    needs: call-lint
-    name: Run Unittests
-    runs-on: ubuntu-latest
-    steps:
-      - name: Free disk space
-        run: |
-          sudo rm -rf /usr/share/dotnet
-          sudo rm -rf /opt/ghc
-          sudo rm -rf "/usr/local/share/boost"
-          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-          docker system prune -af
-          df -h
-
-      - name: Checkout unified-cache-management repo
-        uses: actions/checkout@v4
-
-      - name: Run unit test inside vLLM container
-        run: |
-          docker run --rm \
-            -e VLLM_USE_PRECOMPILED=1 \
-            -e PLATFORM=cuda \
-            -v ${{ github.workspace }}:/workspace/unified-cache-management \
-            -w /workspace/unified-cache-management \
-            --entrypoint /bin/bash \
-            vllm/vllm-openai:v0.9.2 \
-            -c "
-              set -euo pipefail
-              pip install -v -e . --no-build-isolation
-              cd \$(pip show vllm | grep Location | awk '{print \$2}') &&
-              git apply /workspace/unified-cache-management/ucm/integration/vllm/patch/0.9.2/vllm-adapt-pc.patch
-              git apply /workspace/unified-cache-management/ucm/integration/vllm/patch/0.9.2/vllm-adapt-aggre.patch
-              git apply /workspace/unified-cache-management/ucm/integration/vllm/patch/0.9.2/vllm-adapt-sparse.patch
-              cd /workspace/unified-cache-management
-              python3 -m unittest discover -s test
-            "
@@ -7,7 +7,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 option(BUILD_UCM_STORE "build ucm store module." ON)
-option(BUILD_UCM_SPARSE "build ucm sparse module." ON)
+option(BUILD_UCM_SPARSE "build ucm sparse module." OFF)
 option(BUILD_UNIT_TESTS "build all unit test suits." OFF)
 option(BUILD_NUMA "build numactl library." OFF)
 option(DOWNLOAD_DEPENDENCE "download dependence by cmake." ON)
 
@@ -1,2 +1,7 @@
-recursive-include ucm/csrc *
-exclude CMakeLists.txt
+include LICENSE
+include pyproject.toml
+include CMakeLists.txt
+include requirements.txt
+
+recursive-include examples *
+recursive-include benchmarks *
@@ -1,7 +1,7 @@
 <p align="center">
   <picture>
-    <source media="(prefers-color-scheme: dark)" srcset="docs/source/logos/UCM-dark.png">
-    <img alt="UCM" src="docs/source/logos/UCM-light.png" width=50%>
+    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/main/docs/source/logos/UCM-dark.png">
+    <img alt="UCM" src="https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/main/docs/source/logos/UCM-light.png" width=50%>
   </picture>
 </p>
 
@@ -32,7 +32,7 @@ Sparse KV cache have many different choices. Recently paper point out that there
 and all models. So better to build a common framework then different sparse algorithms can be plugin to it like KV
 connector for PC.
 
-![architecture.png](./docs/source/_static/images/idea.png)
+![architecture.png](https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/main/docs/source/_static/images/idea.png)
 
 All gray boxes in the diagram represent existing classes in vLLM version 0.9.2, while the green boxes indicate newly added components by UCM. 
 The light green boxes demonstrate potential future subclass extensions based on this framework.
@@ -68,7 +68,7 @@ in either a local filesystem for single-machine scenarios or through NFS mount p
 
 ## Quick Start
 
-please refer to [Quick Start](./docs/source/getting-started/quick_start.md).
+please refer to [Quick Start](https://ucm.readthedocs.io/en/latest/getting-started/quick_start.html).
 
 ---
 
@@ -85,8 +85,8 @@ please refer to [Quick Start](./docs/source/getting-started/quick_start.md).
 1. For technical questions and feature requests, please use GitHub [Issues](https://github.com/ModelEngine-Group/unified-cache-management/issues).
 2. WeChat technical discussion group: Scan the QR code below.
 
-<img src="docs/source/_static/images/qrcode_for_wechat.png" alt="wechat-gh" width="40%">
+<img src="https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/main/docs/source/_static/images/qrcode_for_wechat.png" alt="wechat-gh" width="40%">
 
 ## License
 
-UCM is licensed under the MIT with additional conditions. Please read the [LICENSE](./LICENSE) file for details.
+UCM is licensed under the MIT with additional conditions. Please read the [LICENSE](https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/main/LICENSE) file for details.