import {
defineVQScenarioAssetWorkflow,
type VQScenarioAssetWorkflow,
} from "../../lib/VQRemotionLib/scenario";
import {timeline, type SpeechEvent, type TimelineEvent} from "./script";
import voicevoxManifest from "./voicevox-manifest.json";
type ManifestEntry = {
id: string;
character?: string;
speakerName?: string;
styleName?: string;
speakerId?: number;
file: string;
durationSeconds: number;
};
const manifestEntries = voicevoxManifest as ManifestEntry[];
const manifestById = new Map(
manifestEntries.map((entry) => [entry.id, entry])
);
export const ZUNDAMON_JIRON_FPS = 30;
export const ZUNDAMON_JIRON_GAP_FRAMES = 6;
export const ZUNDAMON_JIRON_DEFAULT_SHOW_SECONDS = 1.5;
export const hasAudioForSpeech = (speech: SpeechEvent) =>
manifestById.has(speech.id);
export const audioFileForSpeech = (speech: SpeechEvent) =>
manifestById.get(speech.id)?.file ?? `audio/zundamon-jiron/lines/${speech.id}.wav`;
export const durationForSpeech = (
speech: SpeechEvent,
fps = ZUNDAMON_JIRON_FPS
) => {
const entry = manifestById.get(speech.id);
if (entry && Number.isFinite(entry.durationSeconds)) {
return Math.max(1, Math.ceil(entry.durationSeconds * fps));
}
const textForEstimate = speech.readAs ?? speech.text;
const estimatedSeconds = Math.max(1.2, textForEstimate.length * 0.11);
return Math.ceil(estimatedSeconds * fps);
};
export const durationForTimelineEvent = (
event: TimelineEvent,
fps = ZUNDAMON_JIRON_FPS
) => {
if (event.type === "say") {
return durationForSpeech(event, fps);
}
if (event.type === "audio") {
if (event.durationFrames && Number.isFinite(event.durationFrames)) {
return Math.max(1, Math.ceil(event.durationFrames));
}
if (event.durationSeconds && Number.isFinite(event.durationSeconds)) {
return Math.max(1, Math.ceil(event.durationSeconds * fps));
}
return 0;
}
if (event.type === "standeeShake") {
if (event.durationFrames && Number.isFinite(event.durationFrames)) {
return Math.max(1, Math.ceil(event.durationFrames));
}
if (event.durationSeconds && Number.isFinite(event.durationSeconds)) {
return Math.max(1, Math.ceil(event.durationSeconds * fps));
}
return 0;
}
if (
event.type === "still" ||
event.type === "clearStill" ||
event.type === "clearVideo" ||
event.type === "standeePosition" ||
event.type === "standeeFacingDirection" ||
event.type === "standeeVerticalOffset"
) {
return 0;
}
const durationSeconds =
"durationSeconds" in event && event.durationSeconds !== undefined
? event.durationSeconds
: ZUNDAMON_JIRON_DEFAULT_SHOW_SECONDS;
return Math.max(1, Math.ceil(durationSeconds * fps));
};
const doesTimelineEventAdvanceTimeline = (event: TimelineEvent) =>
event.type !== "still" &&
event.type !== "clearStill" &&
event.type !== "audio" &&
event.type !== "standeeFacingDirection" &&
event.type !== "standeeVerticalOffset" &&
event.type !== "standeeShake";
export const ZundamonJironAssetWorkflow: VQScenarioAssetWorkflow =
defineVQScenarioAssetWorkflow({
voicevox: {
scriptPath: "src/data/zundamon-jiron/script.ts",
outputDir: "public/audio/zundamon-jiron/lines",
manifestPath: "src/data/zundamon-jiron/voicevox-manifest.json",
},
rhubarb: {
sourceManifestPath: "src/data/zundamon-jiron/voicevox-manifest.json",
manifestPath: "src/generated/lipsync/manifest.json",
outputDir: "src/generated/lipsync",
rawOutputDir: "public/lipsync/raw",
},
});
export const totalZundamonJironDurationInFrames = (
fps = ZUNDAMON_JIRON_FPS
) =>
timeline.reduce((sum, event, index) => {
const durationInFrames = durationForTimelineEvent(event, fps);
if (!doesTimelineEventAdvanceTimeline(event) || durationInFrames <= 0) {
return sum;
}
const gap = index < timeline.length - 1 ? ZUNDAMON_JIRON_GAP_FRAMES : 0;
return sum + durationInFrames + gap;
}, 0);