import assert from "node:assert/strict";
import {test} from "node:test";
import {normalizeRhubarbJson, normalizeVoicevoxTiming} from "./lipsync-utils.js";
test("maps Rhubarb mouth shapes to Japanese mouth shapes", () => {
const {timeline} = normalizeRhubarbJson(
{
metadata: {duration: 1.2},
mouthCues: [
{start: 0, end: 0.1, value: "X"},
{start: 0.1, end: 0.2, value: "D"},
{start: 0.2, end: 0.3, value: "F"},
],
},
{audio: "audio/example.wav"}
);
assert.deepEqual(
timeline.cues.map((cue) => cue.mouth),
["rest", "a", "u"]
);
});
test("uses rest for unknown shapes and reports a warning", () => {
const {timeline, warnings} = normalizeRhubarbJson(
{
mouthCues: [{start: 0, end: 0.1, value: "Z"}],
},
{audio: "audio/example.wav"}
);
assert.equal(timeline.cues[0].mouth, "rest");
assert.match(warnings[0], /Unknown Rhubarb mouth shape "Z"/);
});
test("uses metadata duration when available", () => {
const {timeline} = normalizeRhubarbJson({
metadata: {duration: 2.5},
mouthCues: [{start: 0, end: 0.1, value: "X"}],
});
assert.equal(timeline.duration, 2.5);
});
test("falls back to the last cue end for duration", () => {
const {timeline} = normalizeRhubarbJson({
mouthCues: [
{start: 0, end: 0.4, value: "X"},
{start: 0.4, end: 0.8, value: "D"},
],
});
assert.equal(timeline.duration, 0.8);
});
test("builds mouth cues from VOICEVOX vowels", () => {
const timeline = normalizeVoicevoxTiming(
{
prePhonemeLength: 0.1,
postPhonemeLength: 0.1,
accentPhrases: [
{
moras: [
{vowel: "a", vowelLength: 0.2},
{vowel: "i", vowelLength: 0.2},
{vowel: "u", vowelLength: 0.2},
{vowel: "e", vowelLength: 0.2},
{vowel: "o", vowelLength: 0.2},
{vowel: "N", vowelLength: 0.2},
],
},
],
},
{audio: "audio/example.wav"}
);
assert.deepEqual(
timeline.cues.map((cue) => cue.mouth),
["rest", "a", "i", "u", "e", "o", "closed", "rest"]
);
assert.equal(timeline.source.engine, "voicevox-query");
});
test("maps selected VOICEVOX consonants to closed and others to rest", () => {
const timeline = normalizeVoicevoxTiming({
accentPhrases: [
{
moras: [
{consonant: "p", consonantLength: 0.1, vowel: "a", vowelLength: 0.1},
{consonant: "m", consonantLength: 0.1, vowel: "i", vowelLength: 0.1},
{consonant: "k", consonantLength: 0.1, vowel: "u", vowelLength: 0.1},
],
},
],
});
assert.deepEqual(
timeline.cues.map((cue) => cue.mouth),
["closed", "a", "closed", "i", "rest", "u"]
);
});
test("scales VOICEVOX timing duration and merges adjacent mouths", () => {
const timeline = normalizeVoicevoxTiming(
{
prePhonemeLength: 0.1,
postPhonemeLength: 0.1,
accentPhrases: [
{
moras: [
{consonant: "k", consonantLength: 0.1, vowel: "x", vowelLength: 0.1},
],
pauseMora: {vowel: "pau", vowelLength: 0.1},
},
],
},
{durationSeconds: 1}
);
assert.equal(timeline.duration, 1);
assert.deepEqual(
timeline.cues.map((cue) => cue.mouth),
["rest"]
);
assert.equal(timeline.cues[0].start, 0);
assert.equal(timeline.cues[0].end, 1);
});