Skip to content

Commit 914dcd4

Browse files
committed
v1.1
1 parent 46ce3fc commit 914dcd4

File tree

4 files changed

+41
-24
lines changed

4 files changed

+41
-24
lines changed

README.md

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
**Authors:** Junhao Zhuang, Shi Guo, Xin Cai, Xiaohui Li, Yihao Liu, Chun Yuan, Tianfan Xue
66

77
<a href='http://zhuang2002.github.io/FlashVSR'><img src='https://img.shields.io/badge/Project-Page-Green'></a> &nbsp;
8-
<a href="https://huggingface.co/JunhaoZhuang/FlashVSR"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue"></a> &nbsp;
8+
<a href="https://huggingface.co/JunhaoZhuang/FlashVSR"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model%20(v1)-blue"></a> &nbsp;
9+
<a href="https://huggingface.co/JunhaoZhuang/FlashVSR-v1.1"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model%20(v1.1)-blue"></a> &nbsp;
910
<a href="https://huggingface.co/datasets/JunhaoZhuang/VSR-120K"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Dataset-orange"></a> &nbsp;
1011
<a href="https://arxiv.org/abs/2510.12747"><img src="https://img.shields.io/badge/arXiv-2510.12747-b31b1b.svg"></a>
1112

@@ -23,7 +24,8 @@ Diffusion models have recently advanced video restoration, but applying them to
2324

2425
### 📰 News
2526

26-
- **Release Date:** October 2025 — Inference code and model weights are available now! 🎉
27+
- **Nov 2025 — 🎉 [FlashVSR v1.1](https://huggingface.co/JunhaoZhuang/FlashVSR-v1.1) released:** enhanced stability + fidelity
28+
- **Oct 2025 — [FlashVSR v1](https://huggingface.co/JunhaoZhuang/FlashVSR) (initial release)**: Inference code and model weights are available now! 🎉
2729
- **Bug Fix (October 21, 2025):** Fixed `local_attention_mask` update logic to prevent artifacts when switching between different aspect ratios during continuous inference.
2830
- **Coming Soon:** Dataset release (**VSR-120K**) for large-scale training.
2931

@@ -109,7 +111,8 @@ python setup.py install
109111

110112
#### 4️⃣ Download Model Weights from Hugging Face
111113

112-
Weights are hosted on **Hugging Face** via **Git LFS**. Please install Git LFS first:
114+
FlashVSR provides both **v1** and **v1.1** model weights on Hugging Face (via **Git LFS**).
115+
Please install Git LFS first:
113116

114117
```bash
115118
# From the repo root
@@ -118,34 +121,48 @@ cd examples/WanVSR
118121
# Install Git LFS (once per machine)
119122
git lfs install
120123
121-
# Clone the model repository into examples/WanVSR
122-
git lfs clone https://huggingface.co/JunhaoZhuang/FlashVSR
124+
# Clone v1 (original) or v1.1 (recommended)
125+
git lfs clone https://huggingface.co/JunhaoZhuang/FlashVSR # v1
126+
# or
127+
git lfs clone https://huggingface.co/JunhaoZhuang/FlashVSR-v1.1 # v1.1
123128
```
124129

125-
After cloning, you should have:
130+
After cloning, you should have one of the following folders:
126131

127132
```
128-
./examples/WanVSR/FlashVSR/
133+
./examples/WanVSR/FlashVSR/ # v1
134+
./examples/WanVSR/FlashVSR-v1.1/ # v1.1
129135
130-
├── LQ_proj_in.ckpt
131-
├── TCDecoder.ckpt
132-
├── Wan2.1_VAE.pth
133-
├── diffusion_pytorch_model_streaming_dmd.safetensors
136+
├── LQ_proj_in.ckpt
137+
├── TCDecoder.ckpt
138+
├── Wan2.1_VAE.pth
139+
├── diffusion_pytorch_model_streaming_dmd.safetensors
134140
└── README.md
135141
```
136142
137-
> The inference scripts will load weights from `./examples/WanVSR/FlashVSR/` by default.
143+
> Inference scripts automatically load weights from the corresponding folder.
144+
145+
---
138146
139147
#### 5️⃣ Run Inference
140148
141149
```bash
142150
# From the repo root
143151
cd examples/WanVSR
144-
python infer_flashvsr_full.py # Full model
152+
153+
# v1 (original)
154+
python infer_flashvsr_full.py
155+
# or
156+
python infer_flashvsr_tiny.py
157+
# or
158+
python infer_flashvsr_tiny_long_video.py
159+
160+
# v1.1 (recommended)
161+
python infer_flashvsr_v1.1_full.py
145162
# or
146-
python infer_flashvsr_tiny.py # Tiny model
163+
python infer_flashvsr_v1.1_tiny.py
147164
# or
148-
python infer_flashvsr_tiny_long_video.py # Tiny model for long videos
165+
python infer_flashvsr_v1.1_tiny_long_video.py
149166
```
150167

151168
---

examples/WanVSR/infer_flashvsr_v1.1_full.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,12 @@ def init_pipeline():
161161
print(torch.cuda.current_device(), torch.cuda.get_device_name(torch.cuda.current_device()))
162162
mm = ModelManager(torch_dtype=torch.bfloat16, device="cpu")
163163
mm.load_models([
164-
"./FlashVSR/diffusion_pytorch_model_streaming_dmd.safetensors",
165-
"./FlashVSR/Wan2.1_VAE.pth",
164+
"./FlashVSR-v1.1/diffusion_pytorch_model_streaming_dmd.safetensors",
165+
"./FlashVSR-v1.1/Wan2.1_VAE.pth",
166166
])
167167
pipe = FlashVSRFullPipeline.from_model_manager(mm, device="cuda")
168168
pipe.denoising_model().LQ_proj_in = Causal_LQ4x_Proj(in_dim=3, out_dim=1536, layer_num=1).to("cuda", dtype=torch.bfloat16)
169-
LQ_proj_in_path = "./FlashVSR/LQ_proj_in.ckpt"
169+
LQ_proj_in_path = "./FlashVSR-v1.1/LQ_proj_in.ckpt"
170170
if os.path.exists(LQ_proj_in_path):
171171
pipe.denoising_model().LQ_proj_in.load_state_dict(torch.load(LQ_proj_in_path, map_location="cpu"), strict=True)
172172

examples/WanVSR/infer_flashvsr_v1.1_tiny.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,18 +175,18 @@ def init_pipeline():
175175
print(torch.cuda.current_device(), torch.cuda.get_device_name(torch.cuda.current_device()))
176176
mm = ModelManager(torch_dtype=torch.bfloat16, device="cpu")
177177
mm.load_models([
178-
"./FlashVSR/diffusion_pytorch_model_streaming_dmd.safetensors",
178+
"./FlashVSR-v1.1/diffusion_pytorch_model_streaming_dmd.safetensors",
179179
])
180180
pipe = FlashVSRTinyPipeline.from_model_manager(mm, device="cuda")
181181
pipe.denoising_model().LQ_proj_in = Causal_LQ4x_Proj(in_dim=3, out_dim=1536, layer_num=1).to("cuda", dtype=torch.bfloat16)
182-
LQ_proj_in_path = "./FlashVSR/LQ_proj_in.ckpt"
182+
LQ_proj_in_path = "./FlashVSR-v1.1/LQ_proj_in.ckpt"
183183
if os.path.exists(LQ_proj_in_path):
184184
pipe.denoising_model().LQ_proj_in.load_state_dict(torch.load(LQ_proj_in_path, map_location="cpu"), strict=True)
185185
pipe.denoising_model().LQ_proj_in.to('cuda')
186186

187187
multi_scale_channels = [512, 256, 128, 128]
188188
pipe.TCDecoder = build_tcdecoder(new_channels=multi_scale_channels, new_latent_channels=16+768)
189-
mis = pipe.TCDecoder.load_state_dict(torch.load("./FlashVSR/TCDecoder.ckpt"), strict=False)
189+
mis = pipe.TCDecoder.load_state_dict(torch.load("./FlashVSR-v1.1/TCDecoder.ckpt"), strict=False)
190190
print(mis)
191191

192192
pipe.to('cuda'); pipe.enable_vram_management(num_persistent_param_in_dit=None)

examples/WanVSR/infer_flashvsr_v1.1_tiny_long_video.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,18 +179,18 @@ def init_pipeline():
179179
print(torch.cuda.current_device(), torch.cuda.get_device_name(torch.cuda.current_device()))
180180
mm = ModelManager(torch_dtype=torch.bfloat16, device="cpu")
181181
mm.load_models([
182-
"./FlashVSR/diffusion_pytorch_model_streaming_dmd.safetensors",
182+
"./FlashVSR-v1.1/diffusion_pytorch_model_streaming_dmd.safetensors",
183183
])
184184
pipe = FlashVSRTinyLongPipeline.from_model_manager(mm, device="cuda")
185185
pipe.denoising_model().LQ_proj_in = Causal_LQ4x_Proj(in_dim=3, out_dim=1536, layer_num=1).to("cuda", dtype=torch.bfloat16)
186-
LQ_proj_in_path = "./FlashVSR/LQ_proj_in.ckpt"
186+
LQ_proj_in_path = "./FlashVSR-v1.1/LQ_proj_in.ckpt"
187187
if os.path.exists(LQ_proj_in_path):
188188
pipe.denoising_model().LQ_proj_in.load_state_dict(torch.load(LQ_proj_in_path, map_location="cpu"), strict=True)
189189
pipe.denoising_model().LQ_proj_in.to('cuda')
190190

191191
multi_scale_channels = [512, 256, 128, 128]
192192
pipe.TCDecoder = build_tcdecoder(new_channels=multi_scale_channels, new_latent_channels=16+768)
193-
mis = pipe.TCDecoder.load_state_dict(torch.load("./FlashVSR/TCDecoder.ckpt"), strict=False)
193+
mis = pipe.TCDecoder.load_state_dict(torch.load("./FlashVSR-v1.1/TCDecoder.ckpt"), strict=False)
194194
print(mis)
195195

196196
pipe.to('cuda'); pipe.enable_vram_management(num_persistent_param_in_dit=None)

0 commit comments

Comments
 (0)