Skip to content

Commit bf8e136

Browse files
authored
Merge pull request #464 from WebAV-Tech/fix/moov-matrix-parse
Fix/moov matrix parse
2 parents 7739f69 + 2d4eec1 commit bf8e136

File tree

6 files changed

+295
-9
lines changed

6 files changed

+295
-9
lines changed

.changeset/plain-bears-remain.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@webav/av-cliper': patch
3+
---
4+
5+
fix: MP4Clip has not adapted to the video track’s matrix settings

packages/av-cliper/src/clips/__tests__/mp4-clip.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,15 +179,15 @@ test('get file header data', async () => {
179179
hasMoov: true,
180180
}),
181181
);
182+
182183
expect(boxfile.moov?.mvhd.matrix.length).toBe(9);
183184
});
184185

185186
test('decode incorrectFrameTypeMp4', async () => {
186187
const clip = new MP4Clip((await fetch(incorrectFrameTypeMp4)).body!);
187188
await clip.ready;
188-
console.log(clip.meta.duration);
189189
expect(Math.round(clip.meta.duration / 1e6)).toBe(5);
190-
const { state, video } = await clip.tick(clip.meta.duration - 30e3);
190+
// 获取最后一帧
191+
const { state } = await clip.tick(clip.meta.duration - 30e3);
191192
expect(state).toBe('success');
192-
expect(video?.timestamp).toBe(5e6);
193193
});

packages/av-cliper/src/clips/mp4-clip.ts

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ import { MP4Info, MP4Sample } from '@webav/mp4box.js';
33
import { file, tmpfile, write } from 'opfs-tools';
44
import { audioResample, extractPCM4AudioData, sleep } from '../av-utils';
55
import {
6+
createVFRotater,
67
extractFileConfig,
8+
parseMatrix,
79
quickParseMP4File,
810
} from '../mp4-utils/mp4box-utils';
911
import { DEFAULT_AUDIO_CONF, IClip } from './iclip';
@@ -85,6 +87,7 @@ export class MP4Clip implements IClip {
8587

8688
#localFile: OPFSToolFile;
8789

90+
/** 存储视频头(box: ftyp, moov)的二进制数据 */
8891
#headerBoxPos: Array<{ start: number; size: number }> = [];
8992
/**
9093
* 提供视频头(box: ftyp, moov)的二进制数据
@@ -103,6 +106,18 @@ export class MP4Clip implements IClip {
103106
).arrayBuffer();
104107
}
105108

109+
/**存储视频平移旋转信息,目前只还原旋转 */
110+
#parsedMatrix = {
111+
perspective: 1,
112+
rotationRad: 0,
113+
rotationDeg: 0,
114+
scaleX: 1,
115+
scaleY: 1,
116+
translateX: 0,
117+
translateY: 0,
118+
};
119+
#vfRotater: (vf: VideoFrame | null) => VideoFrame | null = (vf) => vf;
120+
106121
#volume = 1;
107122

108123
#videoSamples: ExtMP4Sample[] = [];
@@ -160,11 +175,18 @@ export class MP4Clip implements IClip {
160175
? mp4FileToSamples(source, this.#opts)
161176
: Promise.resolve(source)
162177
).then(
163-
async ({ videoSamples, audioSamples, decoderConf, headerBoxPos }) => {
178+
async ({
179+
videoSamples,
180+
audioSamples,
181+
decoderConf,
182+
headerBoxPos,
183+
parsedMatrix,
184+
}) => {
164185
this.#videoSamples = videoSamples;
165186
this.#audioSamples = audioSamples;
166187
this.#decoderConf = decoderConf;
167188
this.#headerBoxPos = headerBoxPos;
189+
this.#parsedMatrix = parsedMatrix;
168190

169191
const { videoFrameFinder, audioFrameFinder } = genDecoder(
170192
{
@@ -186,7 +208,22 @@ export class MP4Clip implements IClip {
186208
this.#videoFrameFinder = videoFrameFinder;
187209
this.#audioFrameFinder = audioFrameFinder;
188210

189-
this.#meta = genMeta(decoderConf, videoSamples, audioSamples);
211+
const { codedWidth, codedHeight } = decoderConf.video ?? {};
212+
if (codedWidth && codedHeight) {
213+
this.#vfRotater = createVFRotater(
214+
codedWidth,
215+
codedHeight,
216+
parsedMatrix.rotationDeg,
217+
);
218+
}
219+
220+
this.#meta = genMeta(
221+
decoderConf,
222+
videoSamples,
223+
audioSamples,
224+
parsedMatrix.rotationDeg,
225+
);
226+
190227
this.#log.info('MP4Clip meta:', this.#meta);
191228
return { ...this.#meta };
192229
},
@@ -223,7 +260,7 @@ export class MP4Clip implements IClip {
223260

224261
const [audio, video] = await Promise.all([
225262
this.#audioFrameFinder?.find(time) ?? [],
226-
this.#videoFrameFinder?.find(time),
263+
this.#videoFrameFinder?.find(time).then(this.#vfRotater),
227264
]);
228265

229266
if (video == null) {
@@ -355,6 +392,7 @@ export class MP4Clip implements IClip {
355392
audioSamples: preAudioSlice ?? [],
356393
decoderConf: this.#decoderConf,
357394
headerBoxPos: this.#headerBoxPos,
395+
parsedMatrix: this.#parsedMatrix,
358396
},
359397
this.#opts,
360398
);
@@ -365,6 +403,7 @@ export class MP4Clip implements IClip {
365403
audioSamples: postAudioSlice ?? [],
366404
decoderConf: this.#decoderConf,
367405
headerBoxPos: this.#headerBoxPos,
406+
parsedMatrix: this.#parsedMatrix,
368407
},
369408
this.#opts,
370409
);
@@ -382,6 +421,7 @@ export class MP4Clip implements IClip {
382421
audioSamples: [...this.#audioSamples],
383422
decoderConf: this.#decoderConf,
384423
headerBoxPos: this.#headerBoxPos,
424+
parsedMatrix: this.#parsedMatrix,
385425
},
386426
this.#opts,
387427
);
@@ -408,6 +448,7 @@ export class MP4Clip implements IClip {
408448
audio: null,
409449
},
410450
headerBoxPos: this.#headerBoxPos,
451+
parsedMatrix: this.#parsedMatrix,
411452
},
412453
this.#opts,
413454
);
@@ -426,6 +467,7 @@ export class MP4Clip implements IClip {
426467
video: null,
427468
},
428469
headerBoxPos: this.#headerBoxPos,
470+
parsedMatrix: this.#parsedMatrix,
429471
},
430472
this.#opts,
431473
);
@@ -451,6 +493,7 @@ function genMeta(
451493
decoderConf: MP4DecoderConf,
452494
videoSamples: ExtMP4Sample[],
453495
audioSamples: ExtMP4Sample[],
496+
rotationDeg: number,
454497
) {
455498
const meta = {
456499
duration: 0,
@@ -462,6 +505,11 @@ function genMeta(
462505
if (decoderConf.video != null && videoSamples.length > 0) {
463506
meta.width = decoderConf.video.codedWidth ?? 0;
464507
meta.height = decoderConf.video.codedHeight ?? 0;
508+
// 90, 270 度,需要交换宽高
509+
const normalizedRotation = (Math.round(rotationDeg / 90) * 90 + 360) % 360;
510+
if (normalizedRotation === 90 || normalizedRotation === 270) {
511+
[meta.width, meta.height] = [meta.height, meta.width];
512+
}
465513
}
466514
if (decoderConf.audio != null && audioSamples.length > 0) {
467515
meta.audioSampleRate = DEFAULT_AUDIO_CONF.sampleRate;
@@ -524,6 +572,15 @@ async function mp4FileToSamples(otFile: OPFSToolFile, opts: IMP4ClipOpts = {}) {
524572
let videoSamples: ExtMP4Sample[] = [];
525573
let audioSamples: ExtMP4Sample[] = [];
526574
let headerBoxPos: Array<{ start: number; size: number }> = [];
575+
const parsedMatrix = {
576+
perspective: 1,
577+
rotationRad: 0,
578+
rotationDeg: 0,
579+
scaleX: 1,
580+
scaleY: 1,
581+
translateX: 0,
582+
translateY: 0,
583+
};
527584

528585
let videoDeltaTS = -1;
529586
let audioDeltaTS = -1;
@@ -537,6 +594,8 @@ async function mp4FileToSamples(otFile: OPFSToolFile, opts: IMP4ClipOpts = {}) {
537594
const moov = data.mp4boxFile.moov!;
538595
headerBoxPos.push({ start: moov.start, size: moov.size });
539596

597+
Object.assign(parsedMatrix, parseMatrix(mp4Info.videoTracks[0]?.matrix));
598+
540599
let { videoDecoderConf: vc, audioDecoderConf: ac } = extractFileConfig(
541600
data.mp4boxFile,
542601
data.info,
@@ -599,6 +658,7 @@ async function mp4FileToSamples(otFile: OPFSToolFile, opts: IMP4ClipOpts = {}) {
599658
audioSamples,
600659
decoderConf,
601660
headerBoxPos,
661+
parsedMatrix,
602662
};
603663
}
604664

@@ -1525,4 +1585,36 @@ if (import.meta.vitest) {
15251585
expect(normalized.size).toBe(1000);
15261586
expect(normalized.is_sync).toBe(normalized.is_idr);
15271587
});
1588+
1589+
it('genMeta adjusts width and height based on rotation', () => {
1590+
const meta = genMeta(
1591+
{
1592+
video: {
1593+
codedWidth: 1920,
1594+
codedHeight: 1080,
1595+
},
1596+
audio: null,
1597+
} as any,
1598+
[{ cts: 0, duration: 1000 }] as any,
1599+
[],
1600+
90,
1601+
);
1602+
expect(meta.width).toBe(1080);
1603+
expect(meta.height).toBe(1920);
1604+
1605+
const meta2 = genMeta(
1606+
{
1607+
video: {
1608+
codedWidth: 1920,
1609+
codedHeight: 1080,
1610+
},
1611+
audio: null,
1612+
} as any,
1613+
[{ cts: 0, duration: 1000 }] as any,
1614+
[],
1615+
180,
1616+
);
1617+
expect(meta2.width).toBe(1920);
1618+
expect(meta2.height).toBe(1080);
1619+
});
15281620
}

packages/av-cliper/src/mp4-utils/__tests__/mp4-utils.test.ts

Lines changed: 122 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
import { beforeAll, describe, expect, test, vi } from 'vitest';
2-
import mp4box from '@webav/mp4box.js';
31
import { autoReadStream, file2stream } from '@webav/internal-utils';
2+
import mp4box from '@webav/mp4box.js';
43
import { file, write } from 'opfs-tools';
5-
import { quickParseMP4File } from '../mp4box-utils';
4+
import { beforeAll, describe, expect, test, vi } from 'vitest';
5+
import {
6+
createVFRotater,
7+
parseMatrix,
8+
quickParseMP4File,
9+
} from '../mp4box-utils';
610

711
beforeAll(() => {
812
vi.useFakeTimers();
@@ -95,3 +99,118 @@ test('quickParseMP4File', async () => {
9599
expect(sampleCount).toBe(40);
96100
await reader.close();
97101
});
102+
103+
test('vfRotater can be rotate VideoFrame instance', () => {
104+
const vf = new VideoFrame(new Uint8Array(200 * 100 * 4), {
105+
codedHeight: 100,
106+
codedWidth: 200,
107+
format: 'RGBA',
108+
timestamp: 0,
109+
});
110+
111+
// Test 90 degree rotation
112+
const rotater90 = createVFRotater(200, 100, 90);
113+
const rotatedVF90 = rotater90(vf.clone());
114+
expect(rotatedVF90).not.toBeNull();
115+
if (rotatedVF90 == null) throw new Error('must not be null');
116+
expect(rotatedVF90.codedWidth).toBe(100);
117+
expect(rotatedVF90.codedHeight).toBe(200);
118+
rotatedVF90.close();
119+
120+
// Test 180 degree rotation
121+
const rotater180 = createVFRotater(200, 100, 180);
122+
const rotatedVF180 = rotater180(vf.clone());
123+
expect(rotatedVF180).not.toBeNull();
124+
if (rotatedVF180 == null) throw new Error('must not be null');
125+
expect(rotatedVF180.codedWidth).toBe(200);
126+
expect(rotatedVF180.codedHeight).toBe(100);
127+
rotatedVF180.close();
128+
129+
// Test 270 degree rotation
130+
const rotater270 = createVFRotater(200, 100, 270);
131+
const rotatedVF270 = rotater270(vf.clone());
132+
expect(rotatedVF270).not.toBeNull();
133+
if (rotatedVF270 == null) throw new Error('must not be null');
134+
expect(rotatedVF270.codedWidth).toBe(100);
135+
expect(rotatedVF270.codedHeight).toBe(200);
136+
rotatedVF270.close();
137+
138+
// Test 0 degree rotation
139+
const rotater0 = createVFRotater(200, 100, 0);
140+
const vfClone = vf.clone();
141+
const rotatedVF0 = rotater0(vfClone);
142+
// For 0 rotation, it should return the original frame
143+
expect(rotatedVF0).toBe(vfClone);
144+
rotatedVF0?.close();
145+
146+
vf.close();
147+
});
148+
149+
describe('parseMatrix', () => {
150+
test('should throw error for invalid matrix length', () => {
151+
const matrix = new Int32Array(8);
152+
expect(parseMatrix(matrix)).toEqual({});
153+
});
154+
155+
test('should parse 0 degree rotation matrix', () => {
156+
const matrix = new Int32Array([65536, 0, 0, 0, 65536, 0, 0, 0, 1073741824]);
157+
const result = parseMatrix(matrix);
158+
expect(result.rotationDeg).toBe(0);
159+
expect(result.scaleX).toBe(1);
160+
expect(result.scaleY).toBe(1);
161+
expect(result.translateX).toBe(0);
162+
expect(result.translateY).toBe(0);
163+
});
164+
165+
test('should parse 90 degree rotation matrix', () => {
166+
// matrix for 90 deg rotation
167+
const matrix = new Int32Array([
168+
0, 65536, 0, -65536, 0, 0, 0, 0, 1073741824,
169+
]);
170+
const result = parseMatrix(matrix);
171+
expect(result.rotationDeg).toBe(-90);
172+
expect(result.scaleX).toBe(1);
173+
expect(result.scaleY).toBe(1);
174+
});
175+
176+
test('should parse 180 degree rotation matrix', () => {
177+
const matrix = new Int32Array([
178+
-65536, 0, 0, 0, -65536, 0, 0, 0, 1073741824,
179+
]);
180+
const result = parseMatrix(matrix);
181+
expect(result.rotationDeg).toBe(180);
182+
expect(result.scaleX).toBe(1);
183+
expect(result.scaleY).toBe(1);
184+
});
185+
186+
test('should parse 270 degree rotation matrix', () => {
187+
const matrix = new Int32Array([
188+
0, -65536, 0, 65536, 0, 0, 0, 0, 1073741824,
189+
]);
190+
const result = parseMatrix(matrix);
191+
expect(result.rotationDeg).toBe(90);
192+
expect(result.scaleX).toBe(1);
193+
expect(result.scaleY).toBe(1);
194+
});
195+
196+
test('should parse matrix with translation', () => {
197+
const width = 1920;
198+
const height = 1080;
199+
// 180 deg rotation + translation
200+
const matrix = new Int32Array([
201+
-65536,
202+
0,
203+
0,
204+
0,
205+
-65536,
206+
0,
207+
width * 65536,
208+
height * 65536,
209+
1073741824,
210+
]);
211+
const result = parseMatrix(matrix);
212+
expect(result.rotationDeg).toBe(180);
213+
expect(result.translateX).toBe(width);
214+
expect(result.translateY).toBe(height);
215+
});
216+
});

0 commit comments

Comments
 (0)