Skip to content

Commit 501a86c

Browse files
committed
fix: 跟随前端更新TTS等部分
1 parent 4b5fc2e commit 501a86c

File tree

4 files changed

+36
-13
lines changed

4 files changed

+36
-13
lines changed

docs/media-generation-guide.md

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ payload = {
5454
response = requests.post(url, json=payload)
5555
data = response.json()
5656

57-
for i, image in enumerate(data['images']):
58-
image_bytes = base64.b64decode(image['data'])
57+
for i, img in enumerate(data['generatedImages']):
58+
image_bytes = base64.b64decode(img['image']['imageBytes'])
5959
with open(f'imagen_output_{i}.png', 'wb') as f:
6060
f.write(image_bytes)
6161
```
@@ -71,7 +71,7 @@ $body = @{
7171
} | ConvertTo-Json
7272
7373
$response = Invoke-RestMethod -Uri "http://localhost:2048/generate-image" -Method Post -ContentType "application/json" -Body $body
74-
$imageData = $response.images[0].data
74+
$imageData = $response.generatedImages[0].image.imageBytes
7575
[System.IO.File]::WriteAllBytes("C:\output.png", [Convert]::FromBase64String($imageData))
7676
```
7777

@@ -120,8 +120,8 @@ payload = {
120120
response = requests.post(url, json=payload, timeout=600)
121121
data = response.json()
122122

123-
for i, video in enumerate(data['videos']):
124-
video_bytes = base64.b64decode(video['data'])
123+
for i, vid in enumerate(data['generatedVideos']):
124+
video_bytes = base64.b64decode(vid['video']['videoBytes'])
125125
with open(f'veo_output_{i}.mp4', 'wb') as f:
126126
f.write(video_bytes)
127127
```
@@ -147,7 +147,7 @@ payload = {
147147
response = requests.post(url, json=payload, timeout=600)
148148
data = response.json()
149149

150-
video_bytes = base64.b64decode(data['videos'][0]['data'])
150+
video_bytes = base64.b64decode(data['generatedVideos'][0]['video']['videoBytes'])
151151
with open('output_video.mp4', 'wb') as f:
152152
f.write(video_bytes)
153153
```
@@ -256,17 +256,35 @@ for i, part in enumerate(parts):
256256

257257
## 响应格式
258258

259-
### Imagen / Veo 响应
259+
### Imagen 响应
260260

261261
```json
262262
{
263-
"images": [
263+
"generatedImages": [
264264
{
265-
"data": "<Base64 编码的图片数据>",
266-
"mimeType": "image/png",
267-
"index": 0
265+
"image": {
266+
"imageBytes": "<Base64 编码的图片数据>",
267+
"mimeType": "image/png"
268+
}
268269
}
269-
]
270+
],
271+
"modelVersion": "imagen-3.0-generate-002"
272+
}
273+
```
274+
275+
### Veo 响应
276+
277+
```json
278+
{
279+
"generatedVideos": [
280+
{
281+
"video": {
282+
"videoBytes": "<Base64 编码的视频数据>",
283+
"mimeType": "video/mp4"
284+
}
285+
}
286+
],
287+
"modelVersion": "veo-2.0-generate-001"
270288
}
271289
```
272290

src/config/tts_selectors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
TTS_FOOTER_AUDIO_PLAYER_WRAPPER_SELECTOR = '.speech-prompt-footer-actions-left'
1111
TTS_AUDIO_PLAYER_SELECTOR = '.speech-prompt-footer audio[controls]'
1212
TTS_RUN_BUTTON_WRAPPER_SELECTOR = '.speech-prompt-footer .button-wrapper'
13-
TTS_RUN_BUTTON_SELECTOR = '.speech-prompt-footer button[aria-label="Run"].run-button'
13+
TTS_RUN_BUTTON_SELECTOR = 'ms-run-button button[aria-label="Run"]'
1414

1515
TTS_SINGLE_SPEAKER_BUILDER_SELECTOR = '.single-speaker-prompt-builder-wrapper'
1616
TTS_SINGLE_SPEAKER_STYLE_INPUT_SELECTOR = 'ms-autosize-textarea.style-instructions-textarea textarea'

src/media/imagen_controller.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ async def wait_for_images(self, expected_count: int, check_client_disconnected:
175175
for i in range(current_count):
176176
img = image_locator.nth(i)
177177
src = await img.get_attribute('src') or ''
178+
self.logger.info(f'[{self.req_id}] 图片 {i} src 类型: {src[:50] if src else "空"}...')
178179
if src.startswith('data:image/'):
179180
if ',' in src:
180181
header, base64_data = src.split(',', 1)

src/media/media_processor.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ async def process_image_request(
9090

9191
images = await controller.wait_for_images(config.number_of_images, check_client_disconnected)
9292

93+
logger.info(f'[{req_id}] 📦 处理 {len(images)} 张图片数据...')
94+
9395
generated_images = []
9496
for img in images:
9597
generated_images.append({
@@ -99,6 +101,8 @@ async def process_image_request(
99101
}
100102
})
101103

104+
logger.info(f'[{req_id}] ✅ 返回响应 (generatedImages: {len(generated_images)})')
105+
102106
return {
103107
'generatedImages': generated_images,
104108
'modelVersion': config.model

0 commit comments

Comments
 (0)