diff --git a/.gitignore b/.gitignore index 4807c38..864c703 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,9 @@ voicevox-remotion-template/public/lipsync/raw/ voicevox-remotion-template/src/generated/lipsync/*.mouth.json +# Generated VOICEVOX audio for local scenario work +voicevox-remotion-template/public/audio/pizza-oven-project-01/ + # Build artifacts and caches dist/ build/ diff --git a/voicevox-remotion-template/README.md b/voicevox-remotion-template/README.md index 81c3de0..38ffa27 100644 --- a/voicevox-remotion-template/README.md +++ b/voicevox-remotion-template/README.md @@ -46,6 +46,12 @@ npm run voice:generate:pizza-kiln ``` +PizzaOvenProject01 の音声を生成する場合は、次を実行します。 + +```bash +npm run voice:generate:pizza-oven-project-01 +``` + ### 5. 口パク指示データを生成 Rhubarb Lip Sync CLI を使い、VOICEVOX 音声から口形タイムラインを生成します。 @@ -63,6 +69,18 @@ npm run lipsync:generate -- public/audio/lines/zunda-001.wav ``` +時系列シナリオ単位で再生成する場合は、対応する VOICEVOX manifest を指定します。 + +```bash +npm run lipsync:generate -- --source-manifest src/data/pizza-oven-project-01/voicevox-manifest.json +``` + +PizzaOvenProject01 には専用コマンドもあります。 + +```bash +npm run lipsync:generate:pizza-oven-project-01 +``` + 処理順は `1. npm run voice:generate`、`2. npm run lipsync:generate`、 `3. npm run start` です。音声を作り直したら、口パク指示データも再生成してください。 @@ -190,6 +208,10 @@ `speakingAnimationType: "rhubarbLipSync"` を指定すると、`src/generated/lipsync/manifest.json` の口形タイムラインに合わせて `mouthImageDir` の画像を切り替えます。 +新しい時系列コンポジションでは、`VQChronologicalScenario` の `assetWorkflow` に +VOICEVOX 音声生成と Rhubarb 口パク生成のパスをセットで定義します。口形はコンポジションに +固定値を書かず、`speech.id` をキーに `src/generated/lipsync/manifest.json` から参照します。 + ### 4. コンポジション固有の見栄えを調整 通常の全身表示は `src/standee-sets.ts` の `imageLayout` で調整します。 コンポジションごとに特別な配置がある場合だけ、描画側を調整します。 diff --git a/voicevox-remotion-template/package.json b/voicevox-remotion-template/package.json index 0d331e4..5ce86d9 100644 --- a/voicevox-remotion-template/package.json +++ b/voicevox-remotion-template/package.json @@ -8,9 +8,11 @@ "render": "remotion render", "lint": "eslint .", "lipsync:generate": "node scripts/generate-lipsync.js", + "lipsync:generate:pizza-oven-project-01": "node scripts/generate-lipsync.js --source-manifest src/data/pizza-oven-project-01/voicevox-manifest.json", "test:lipsync": "node --test scripts/lipsync-utils.test.js", "voice:generate": "node scripts/voicevox-generate.js", - "voice:generate:pizza-kiln": "node scripts/voicevox-generate.js --script src/data/pizza-kiln/script.ts --output public/audio/pizza-kiln/lines --manifest src/data/pizza-kiln/voicevox-manifest.json" + "voice:generate:pizza-kiln": "node scripts/voicevox-generate.js --script src/data/pizza-kiln/script.ts --output public/audio/pizza-kiln/lines --manifest src/data/pizza-kiln/voicevox-manifest.json", + "voice:generate:pizza-oven-project-01": "node scripts/voicevox-generate.js --script src/data/pizza-oven-project-01/script.ts --output public/audio/pizza-oven-project-01/lines --manifest src/data/pizza-oven-project-01/voicevox-manifest.json" }, "dependencies": { "@remotion/google-fonts": "4.0.460", diff --git a/voicevox-remotion-template/scripts/generate-lipsync.js b/voicevox-remotion-template/scripts/generate-lipsync.js index d44ebb1..61ea044 100644 --- a/voicevox-remotion-template/scripts/generate-lipsync.js +++ b/voicevox-remotion-template/scripts/generate-lipsync.js @@ -14,6 +14,7 @@ const DEFAULT_SOURCE_MANIFESTS = [ "src/data/voicevox-manifest.json", "src/data/pizza-kiln/voicevox-manifest.json", + "src/data/pizza-oven-project-01/voicevox-manifest.json", ]; const resolveProjectPath = (value) => @@ -45,6 +46,7 @@ out: undefined, rawOut: undefined, manifest: "src/generated/lipsync/manifest.json", + sourceManifests: [], }; const audioPaths = []; const args = process.argv.slice(2); @@ -57,7 +59,7 @@ } const key = arg.slice(2); - if (!["out", "raw-out", "manifest"].includes(key)) { + if (!["out", "raw-out", "manifest", "source-manifest"].includes(key)) { throw new Error(`Unknown option "${arg}".`); } @@ -66,7 +68,9 @@ throw new Error(`Option "${arg}" needs a value.`); } - if (key === "raw-out") { + if (key === "source-manifest") { + values.sourceManifests.push(value); + } else if (key === "raw-out") { values.rawOut = value; } else { values[key] = value; @@ -77,6 +81,9 @@ if (audioPaths.length > 1) { throw new Error("Only one audio path can be specified."); } + if (audioPaths[0] && values.sourceManifests.length > 0) { + throw new Error("Audio path and --source-manifest cannot be used together."); + } if (!audioPaths[0] && (values.out || values.rawOut)) { throw new Error("--out and --raw-out can only be used with one audio path."); } @@ -86,6 +93,7 @@ outPath: values.out ? resolveProjectPath(values.out) : undefined, rawOutPath: values.rawOut ? resolveProjectPath(values.rawOut) : undefined, manifestPath: resolveProjectPath(values.manifest), + sourceManifestPaths: values.sourceManifests.map(resolveProjectPath), }; }; @@ -240,11 +248,13 @@ ); }; -const defaultTasks = async () => { +const defaultTasks = async (sourceManifestPaths) => { + const manifests = + sourceManifestPaths.length > 0 + ? sourceManifestPaths + : DEFAULT_SOURCE_MANIFESTS.map(resolveProjectPath); const taskGroups = await Promise.all( - DEFAULT_SOURCE_MANIFESTS.map((manifest) => - tasksForVoicevoxManifest(resolveProjectPath(manifest)) - ) + manifests.map((manifest) => tasksForVoicevoxManifest(manifest)) ); const tasks = taskGroups.flat(); if (tasks.length === 0) { @@ -282,12 +292,15 @@ return timeline; }; -const {audioPath, outPath, rawOutPath, manifestPath} = parseArgs(); +const {audioPath, outPath, rawOutPath, manifestPath, sourceManifestPaths} = + parseArgs(); const rhubarbBin = await findRhubarbBin(); const tasks = audioPath ? [await taskForAudioPath({audioPath, outPath, rawOutPath})] - : await defaultTasks(); -const generatedManifest = audioPath + : await defaultTasks(sourceManifestPaths); +const shouldMergeExistingManifest = + Boolean(audioPath) || sourceManifestPaths.length > 0; +const generatedManifest = shouldMergeExistingManifest ? await loadExistingGeneratedManifest(manifestPath) : {version: 1, timelines: {}}; diff --git a/voicevox-remotion-template/scripts/voicevox-generate.js b/voicevox-remotion-template/scripts/voicevox-generate.js index aa0f350..de101d7 100644 --- a/voicevox-remotion-template/scripts/voicevox-generate.js +++ b/voicevox-remotion-template/scripts/voicevox-generate.js @@ -1,5 +1,7 @@ import fs from "node:fs/promises"; +import os from "node:os"; import path from "node:path"; +import {createRequire} from "node:module"; import {fileURLToPath} from "node:url"; import ts from "typescript"; @@ -100,33 +102,47 @@ }; const loadScriptModule = async () => { - const source = await fs.readFile(scriptPath, "utf8"); - const transpiled = ts.transpileModule(source, { - compilerOptions: { - module: ts.ModuleKind.ES2022, - target: ts.ScriptTarget.ES2022, - }, - fileName: scriptPath, - }); - const errors = transpiled.diagnostics?.filter( - (diagnostic) => diagnostic.category === ts.DiagnosticCategory.Error - ); + const outDir = await fs.mkdtemp(path.join(os.tmpdir(), "voicevox-script-")); + const compilerOptions = { + module: ts.ModuleKind.CommonJS, + moduleResolution: ts.ModuleResolutionKind.Node10, + target: ts.ScriptTarget.ES2022, + jsx: ts.JsxEmit.ReactJSX, + rootDir: projectRoot, + outDir, + esModuleInterop: true, + resolveJsonModule: true, + skipLibCheck: true, + }; + const program = ts.createProgram([scriptPath], compilerOptions); + const emit = program.emit(); + const errors = ts + .getPreEmitDiagnostics(program) + .concat(emit.diagnostics) + .filter((diagnostic) => diagnostic.category === ts.DiagnosticCategory.Error); - if (errors?.length) { + if (errors.length) { + await fs.rm(outDir, {recursive: true, force: true}); const message = errors .map((diagnostic) => ts.flattenDiagnosticMessageText(diagnostic.messageText, "\n") ) .join("\n"); throw new Error( - `Failed to transpile ${toProjectRelative(scriptPath)}:\n${message}` + `Failed to compile ${toProjectRelative(scriptPath)}:\n${message}` ); } - const moduleUrl = `data:text/javascript;base64,${Buffer.from( - transpiled.outputText - ).toString("base64")}`; - return import(moduleUrl); + const compiledPath = path + .join(outDir, path.relative(projectRoot, scriptPath)) + .replace(/\.[cm]?tsx?$/, ".js"); + const require = createRequire(import.meta.url); + + try { + return require(compiledPath); + } finally { + await fs.rm(outDir, {recursive: true, force: true}); + } }; const fetchSpeakers = async () => { diff --git a/voicevox-remotion-template/src/data/pizza-oven-project-01/script.ts b/voicevox-remotion-template/src/data/pizza-oven-project-01/script.ts new file mode 100644 index 0000000..388e6de --- /dev/null +++ b/voicevox-remotion-template/src/data/pizza-oven-project-01/script.ts @@ -0,0 +1,70 @@ +import {getStandeeSet, type AvatarDefinition} from "../../standee-sets"; + +export type VoicevoxVoice = Readonly<{ + speakerName: string; + styleName: string; +}>; + +export type CharacterDefinition = Readonly<{ + displayName: string; + voicevox: VoicevoxVoice; + avatar: AvatarDefinition; +}>; + +export const characters = { + sayo: { + displayName: "小夜", + voicevox: { + speakerName: "小夜/SAYO", + styleName: "ノーマル", + }, + avatar: { + ...getStandeeSet("sayo_ohnegus_ai"), + accentColor: "#6b5f83", + speakingAnimationType: "rhubarbLipSync", + idleAnimationType: "none", + }, + }, +} as const satisfies Record; + +export type CharacterId = keyof typeof characters; + +export type SpeechOptions = Readonly<{ + subtitle?: string; + voicevox?: Partial; + durationSeconds?: number; +}>; + +export type SpeechEvent = Readonly<{ + type: "say"; + id: string; + character: CharacterId; + text: string; + subtitle?: string; + voicevox?: Partial; + durationSeconds?: number; +}>; + +export type TimelineEvent = SpeechEvent; + +export const say = ( + id: string, + character: CharacterId, + text: string, + options: SpeechOptions = {} +): SpeechEvent => ({ + type: "say", + id, + character, + text, + ...options, +}); + +export const timeline = [ + say("pizza-oven-project-01-sayo-001", "sayo", "こんにちは。小夜です。"), + say("pizza-oven-project-01-sayo-002", "sayo", "ピザって美味しいじゃないですか。"), + say("pizza-oven-project-01-sayo-003", "sayo", "だから、作る事にしたんですよね。"), + say("pizza-oven-project-01-sayo-004", "sayo", "ピザ窯を。"), +] satisfies TimelineEvent[]; + +export const script = timeline; diff --git a/voicevox-remotion-template/src/data/pizza-oven-project-01/timing.ts b/voicevox-remotion-template/src/data/pizza-oven-project-01/timing.ts new file mode 100644 index 0000000..702c51c --- /dev/null +++ b/voicevox-remotion-template/src/data/pizza-oven-project-01/timing.ts @@ -0,0 +1,85 @@ +import { + defineVQChronologicalScenario, + defineVQScenarioAssetWorkflow, + totalVQChronologicalScenarioDurationInFrames, +} from "../../lib/VQRemotionLib/scenario"; +import {timeline, type SpeechEvent, type TimelineEvent} from "./script"; +import voicevoxManifest from "./voicevox-manifest.json"; + +type ManifestEntry = { + id: string; + character?: string; + speakerName?: string; + styleName?: string; + speakerId?: number; + file: string; + durationSeconds: number; +}; + +const manifestEntries = voicevoxManifest as ManifestEntry[]; +const manifestById = new Map( + manifestEntries.map((entry) => [entry.id, entry]) +); + +export const PIZZA_OVEN_PROJECT_01_FPS = 30; +export const PIZZA_OVEN_PROJECT_01_GAP_FRAMES = 6; + +export const hasAudioForSpeech = (speech: SpeechEvent) => + manifestById.has(speech.id); + +export const audioFileForSpeech = (speech: SpeechEvent) => + manifestById.get(speech.id)?.file ?? + `audio/pizza-oven-project-01/lines/${speech.id}.wav`; + +export const durationForSpeech = ( + speech: SpeechEvent, + fps = PIZZA_OVEN_PROJECT_01_FPS +) => { + const entry = manifestById.get(speech.id); + if (entry && Number.isFinite(entry.durationSeconds)) { + return Math.max(1, Math.ceil(entry.durationSeconds * fps)); + } + + if (speech.durationSeconds && Number.isFinite(speech.durationSeconds)) { + return Math.max(1, Math.ceil(speech.durationSeconds * fps)); + } + + const estimatedSeconds = Math.max(1.2, speech.text.length * 0.11); + return Math.ceil(estimatedSeconds * fps); +}; + +export const durationForTimelineEvent = ( + event: TimelineEvent, + fps = PIZZA_OVEN_PROJECT_01_FPS +) => durationForSpeech(event, fps); + +export const pizzaOvenProject01AssetWorkflow = + defineVQScenarioAssetWorkflow({ + voicevox: { + scriptPath: "src/data/pizza-oven-project-01/script.ts", + outputDir: "public/audio/pizza-oven-project-01/lines", + manifestPath: "src/data/pizza-oven-project-01/voicevox-manifest.json", + }, + rhubarb: { + sourceManifestPath: "src/data/pizza-oven-project-01/voicevox-manifest.json", + manifestPath: "src/generated/lipsync/manifest.json", + outputDir: "src/generated/lipsync", + rawOutputDir: "public/lipsync/raw", + }, + }); + +export const pizzaOvenProject01Scenario = + defineVQChronologicalScenario({ + timeline, + gapFrames: PIZZA_OVEN_PROJECT_01_GAP_FRAMES, + durationForEvent: durationForTimelineEvent, + assetWorkflow: pizzaOvenProject01AssetWorkflow, + }); + +export const totalPizzaOvenProject01DurationInFrames = ( + fps = PIZZA_OVEN_PROJECT_01_FPS +) => + totalVQChronologicalScenarioDurationInFrames( + pizzaOvenProject01Scenario, + fps + ); diff --git a/voicevox-remotion-template/src/data/pizza-oven-project-01/voicevox-manifest.json b/voicevox-remotion-template/src/data/pizza-oven-project-01/voicevox-manifest.json new file mode 100644 index 0000000..9c95c1b --- /dev/null +++ b/voicevox-remotion-template/src/data/pizza-oven-project-01/voicevox-manifest.json @@ -0,0 +1,29 @@ +[ + { + "id": "pizza-oven-project-01-sayo-001", + "character": "sayo", + "speakerName": "小夜/SAYO", + "styleName": "ノーマル", + "speakerId": 46, + "file": "audio/pizza-oven-project-01/lines/pizza-oven-project-01-sayo-001.wav", + "durationSeconds": 2.037333333333333 + }, + { + "id": "pizza-oven-project-01-sayo-002", + "character": "sayo", + "speakerName": "小夜/SAYO", + "styleName": "ノーマル", + "speakerId": 46, + "file": "audio/pizza-oven-project-01/lines/pizza-oven-project-01-sayo-002.wav", + "durationSeconds": 2.112 + }, + { + "id": "pizza-oven-project-01-sayo-003", + "character": "sayo", + "speakerName": "小夜/SAYO", + "styleName": "ノーマル", + "speakerId": 46, + "file": "audio/pizza-oven-project-01/lines/pizza-oven-project-01-sayo-003.wav", + "durationSeconds": 3.029333333333333 + } +] diff --git a/voicevox-remotion-template/src/generated/lipsync/manifest.json b/voicevox-remotion-template/src/generated/lipsync/manifest.json index 3750548..282bb6e 100644 --- a/voicevox-remotion-template/src/generated/lipsync/manifest.json +++ b/voicevox-remotion-template/src/generated/lipsync/manifest.json @@ -1744,6 +1744,398 @@ "source": "X" } ] + }, + "pizza-oven-project-01-sayo-001": { + "version": 1, + "source": { + "audio": "audio/pizza-oven-project-01/lines/pizza-oven-project-01-sayo-001.wav", + "engine": "rhubarb-lip-sync", + "recognizer": "phonetic" + }, + "duration": 2.03, + "cues": [ + { + "start": 0, + "end": 0.08, + "mouth": "rest", + "source": "X" + }, + { + "start": 0.08, + "end": 0.13, + "mouth": "i", + "source": "B" + }, + { + "start": 0.13, + "end": 0.18, + "mouth": "e", + "source": "C" + }, + { + "start": 0.18, + "end": 0.25, + "mouth": "a", + "source": "D" + }, + { + "start": 0.25, + "end": 0.6, + "mouth": "i", + "source": "B" + }, + { + "start": 0.6, + "end": 0.67, + "mouth": "u", + "source": "F" + }, + { + "start": 0.67, + "end": 0.74, + "mouth": "e", + "source": "C" + }, + { + "start": 0.74, + "end": 0.88, + "mouth": "i", + "source": "B" + }, + { + "start": 0.88, + "end": 1.15, + "mouth": "rest", + "source": "X" + }, + { + "start": 1.15, + "end": 1.23, + "mouth": "i", + "source": "B" + }, + { + "start": 1.23, + "end": 1.32, + "mouth": "closed", + "source": "A" + }, + { + "start": 1.32, + "end": 1.36, + "mouth": "u", + "source": "F" + }, + { + "start": 1.36, + "end": 1.89, + "mouth": "i", + "source": "B" + }, + { + "start": 1.89, + "end": 2.03, + "mouth": "rest", + "source": "X" + } + ] + }, + "pizza-oven-project-01-sayo-002": { + "version": 1, + "source": { + "audio": "audio/pizza-oven-project-01/lines/pizza-oven-project-01-sayo-002.wav", + "engine": "rhubarb-lip-sync", + "recognizer": "phonetic" + }, + "duration": 2.11, + "cues": [ + { + "start": 0, + "end": 0.1, + "mouth": "rest", + "source": "X" + }, + { + "start": 0.1, + "end": 0.19, + "mouth": "i", + "source": "B" + }, + { + "start": 0.19, + "end": 0.27, + "mouth": "closed", + "source": "A" + }, + { + "start": 0.27, + "end": 0.37, + "mouth": "o", + "source": "E" + }, + { + "start": 0.37, + "end": 0.71, + "mouth": "closed", + "source": "A" + }, + { + "start": 0.71, + "end": 0.81, + "mouth": "i", + "source": "B" + }, + { + "start": 0.81, + "end": 0.9, + "mouth": "closed", + "source": "A" + }, + { + "start": 0.9, + "end": 0.99, + "mouth": "i", + "source": "B" + }, + { + "start": 0.99, + "end": 1.07, + "mouth": "closed", + "source": "A" + }, + { + "start": 1.07, + "end": 1.29, + "mouth": "i", + "source": "B" + }, + { + "start": 1.29, + "end": 1.37, + "mouth": "closed", + "source": "A" + }, + { + "start": 1.37, + "end": 1.59, + "mouth": "i", + "source": "B" + }, + { + "start": 1.59, + "end": 1.69, + "mouth": "closed", + "source": "A" + }, + { + "start": 1.69, + "end": 1.94, + "mouth": "e", + "source": "C" + }, + { + "start": 1.94, + "end": 2.01, + "mouth": "i", + "source": "B" + }, + { + "start": 2.01, + "end": 2.11, + "mouth": "rest", + "source": "X" + } + ] + }, + "pizza-oven-project-01-sayo-003": { + "version": 1, + "source": { + "audio": "audio/pizza-oven-project-01/lines/pizza-oven-project-01-sayo-003.wav", + "engine": "rhubarb-lip-sync", + "recognizer": "phonetic" + }, + "duration": 3.02, + "cues": [ + { + "start": 0, + "end": 0.05, + "mouth": "rest", + "source": "X" + }, + { + "start": 0.05, + "end": 0.13, + "mouth": "i", + "source": "B" + }, + { + "start": 0.13, + "end": 0.21, + "mouth": "closed", + "source": "A" + }, + { + "start": 0.21, + "end": 0.49, + "mouth": "u", + "source": "F" + }, + { + "start": 0.49, + "end": 0.57, + "mouth": "closed", + "source": "A" + }, + { + "start": 0.57, + "end": 0.69, + "mouth": "i", + "source": "B" + }, + { + "start": 0.69, + "end": 1.09, + "mouth": "rest", + "source": "X" + }, + { + "start": 1.09, + "end": 1.17, + "mouth": "i", + "source": "B" + }, + { + "start": 1.17, + "end": 1.29, + "mouth": "closed", + "source": "A" + }, + { + "start": 1.29, + "end": 1.49, + "mouth": "u", + "source": "F" + }, + { + "start": 1.49, + "end": 1.56, + "mouth": "i", + "source": "B" + }, + { + "start": 1.56, + "end": 1.64, + "mouth": "closed", + "source": "A" + }, + { + "start": 1.64, + "end": 2.06, + "mouth": "i", + "source": "B" + }, + { + "start": 2.06, + "end": 2.27, + "mouth": "e", + "source": "C" + }, + { + "start": 2.27, + "end": 2.34, + "mouth": "i", + "source": "B" + }, + { + "start": 2.34, + "end": 2.45, + "mouth": "closed", + "source": "A" + }, + { + "start": 2.45, + "end": 2.62, + "mouth": "i", + "source": "B" + }, + { + "start": 2.62, + "end": 2.7, + "mouth": "closed", + "source": "A" + }, + { + "start": 2.7, + "end": 2.93, + "mouth": "i", + "source": "B" + }, + { + "start": 2.93, + "end": 3.02, + "mouth": "rest", + "source": "X" + } + ] + }, + "pizza-oven-project-01-sayo-004": { + "version": 1, + "source": { + "audio": "audio/pizza-oven-project-01/lines/pizza-oven-project-01-sayo-004.wav", + "engine": "rhubarb-lip-sync", + "recognizer": "phonetic" + }, + "duration": 1.01, + "cues": [ + { + "start": 0, + "end": 0.09, + "mouth": "rest", + "source": "X" + }, + { + "start": 0.09, + "end": 0.27, + "mouth": "i", + "source": "B" + }, + { + "start": 0.27, + "end": 0.48, + "mouth": "u", + "source": "F" + }, + { + "start": 0.48, + "end": 0.56, + "mouth": "closed", + "source": "A" + }, + { + "start": 0.56, + "end": 0.72, + "mouth": "i", + "source": "B" + }, + { + "start": 0.72, + "end": 0.82, + "mouth": "closed", + "source": "A" + }, + { + "start": 0.82, + "end": 0.93, + "mouth": "i", + "source": "B" + }, + { + "start": 0.93, + "end": 1.01, + "mouth": "rest", + "source": "X" + } + ] } } } diff --git a/voicevox-remotion-template/src/lib/VQRemotionLib/index.ts b/voicevox-remotion-template/src/lib/VQRemotionLib/index.ts index 1bff466..060d7f8 100644 --- a/voicevox-remotion-template/src/lib/VQRemotionLib/index.ts +++ b/voicevox-remotion-template/src/lib/VQRemotionLib/index.ts @@ -1,5 +1,6 @@ export * from "./types"; export * from "./avatarAnimations"; +export * from "./scenario"; export * from "./components/VQCaptionOverlay"; export * from "./components/VQCharacterAvatar"; export * from "./components/VQCharacterStage"; diff --git a/voicevox-remotion-template/src/lib/VQRemotionLib/scenario.ts b/voicevox-remotion-template/src/lib/VQRemotionLib/scenario.ts new file mode 100644 index 0000000..096f95a --- /dev/null +++ b/voicevox-remotion-template/src/lib/VQRemotionLib/scenario.ts @@ -0,0 +1,92 @@ +export type VQChronologicalScenario = Readonly<{ + timeline: readonly Event[]; + gapFrames: number; + durationForEvent: (event: Event, fps: number) => number; + assetWorkflow?: VQScenarioAssetWorkflow; +}>; + +export type VQScenarioVoicevoxWorkflow = Readonly<{ + scriptPath: string; + outputDir: string; + manifestPath: string; +}>; + +export type VQScenarioRhubarbWorkflow = Readonly<{ + sourceManifestPath: string; + manifestPath: string; + outputDir: string; + rawOutputDir: string; +}>; + +export type VQScenarioAssetWorkflow = Readonly<{ + voicevox: VQScenarioVoicevoxWorkflow; + rhubarb: VQScenarioRhubarbWorkflow; +}>; + +export type VQScheduledScenarioSegment = Readonly<{ + event: Event; + index: number; + from: number; + durationInFrames: number; +}>; + +export const defineVQChronologicalScenario = ( + scenario: VQChronologicalScenario +) => scenario; + +export const defineVQScenarioAssetWorkflow = ( + workflow: VQScenarioAssetWorkflow +) => workflow; + +export const scheduleVQChronologicalScenario = ( + scenario: VQChronologicalScenario, + fps: number +): VQScheduledScenarioSegment[] => { + let cursor = 0; + + return scenario.timeline.map((event, index) => { + const durationInFrames = scenario.durationForEvent(event, fps); + const segment = { + event, + index, + from: cursor, + durationInFrames, + }; + + cursor += durationInFrames; + if (index < scenario.timeline.length - 1) { + cursor += scenario.gapFrames; + } + + return segment; + }); +}; + +export const activeVQChronologicalScenarioSegmentForFrame = ( + segments: readonly VQScheduledScenarioSegment[], + frame: number +) => { + let activeSegment = segments[0]; + + for (const segment of segments) { + if (frame >= segment.from) { + activeSegment = segment; + } else { + break; + } + } + + return activeSegment; +}; + +export const totalVQChronologicalScenarioDurationInFrames = ( + scenario: VQChronologicalScenario, + fps: number +) => + scheduleVQChronologicalScenario(scenario, fps).reduce( + (total, segment, index) => + total + + segment.durationInFrames + + (index < scenario.timeline.length - 1 ? scenario.gapFrames : 0), + 0 + ); diff --git a/voicevox-remotion-template/src/pizzaOvenProject01.ts b/voicevox-remotion-template/src/pizzaOvenProject01.ts new file mode 100644 index 0000000..93f6645 --- /dev/null +++ b/voicevox-remotion-template/src/pizzaOvenProject01.ts @@ -0,0 +1,154 @@ +import React from "react"; +import { + AbsoluteFill, + interpolate, + Sequence, + spring, + useCurrentFrame, + useVideoConfig, +} from "remotion"; +import { + characters, + type SpeechEvent, + type TimelineEvent, +} from "./data/pizza-oven-project-01/script"; +import { + audioFileForSpeech, + hasAudioForSpeech, + pizzaOvenProject01Scenario, +} from "./data/pizza-oven-project-01/timing"; +import {roundedFontFamily} from "./fonts"; +import { + activeVQChronologicalScenarioSegmentForFrame, + scheduleVQChronologicalScenario, + VQLipSyncedStandeeImage, + VQSpeechOverlay, + VQWarmGradientBackground, +} from "./lib/VQRemotionLib"; +import {getMouthForSpeechFrame} from "./lipsync/manifest"; + +const sayoAvatar = characters.sayo.avatar; +const PizzaOvenSpeechOverlay = VQSpeechOverlay; + +const clampInterpolation = { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", +} as const; + +const SayoStandee: React.FC< + Readonly<{ + frame: number; + fps: number; + activeSpeech?: SpeechEvent; + speakingLocalFrame: number; + }> +> = ({frame, fps, activeSpeech, speakingLocalFrame}) => { + const entrance = spring({ + frame, + fps, + config: {damping: 18, mass: 0.6}, + }); + const translateY = interpolate(entrance, [0, 1], [32, 0], clampInterpolation); + const mouth = activeSpeech + ? getMouthForSpeechFrame(activeSpeech.id, speakingLocalFrame, fps) + : "rest"; + + return React.createElement( + "div", + { + style: { + position: "absolute", + right: 330, + bottom: -82, + width: 520, + height: 720, + display: "flex", + justifyContent: "center", + alignItems: "flex-end", + transform: `translateY(${translateY}px)`, + zIndex: 2, + } satisfies React.CSSProperties, + }, + React.createElement(VQLipSyncedStandeeImage, { + imagePath: sayoAvatar.imagePath, + mouthImageDir: sayoAvatar.mouthImageDir, + mouth, + width: "100%", + height: "100%", + maxHeight: "100%", + filter: "drop-shadow(0 18px 40px rgba(31, 42, 68, 0.22))", + }) + ); +}; + +const TimelineOverlay: React.FC> = ({ + event, +}) => { + const character = characters[event.character]; + + return React.createElement(PizzaOvenSpeechOverlay, { + speech: event, + speakerName: character.displayName, + accentColor: character.avatar.accentColor, + hasAudio: hasAudioForSpeech, + getAudioPath: audioFileForSpeech, + subtitleOptions: { + fontFamily: roundedFontFamily, + fontSize: 40, + lineHeight: 1.35, + backgroundColor: "rgba(255, 255, 255, 0.9)", + }, + containerStyle: {zIndex: 3}, + }); +}; + +export const PizzaOvenProject01: React.FC = () => { + const frame = useCurrentFrame(); + const {fps} = useVideoConfig(); + const scheduledEvents = scheduleVQChronologicalScenario( + pizzaOvenProject01Scenario, + fps + ); + const activeSegment = activeVQChronologicalScenarioSegmentForFrame( + scheduledEvents, + frame + ); + const isInsideActiveSegment = activeSegment + ? frame < activeSegment.from + activeSegment.durationInFrames + : false; + const activeSpeech = + activeSegment && isInsideActiveSegment ? activeSegment.event : undefined; + const speakingLocalFrame = + activeSegment && activeSpeech ? frame - activeSegment.from : 0; + const sequences = scheduledEvents.map((scheduledEvent) => + React.createElement( + Sequence, + { + key: scheduledEvent.event.id, + from: scheduledEvent.from, + durationInFrames: scheduledEvent.durationInFrames, + premountFor: Math.min(fps, scheduledEvent.from), + }, + React.createElement(TimelineOverlay, {event: scheduledEvent.event}) + ) + ); + + return React.createElement( + AbsoluteFill, + { + style: { + display: "flex", + flexDirection: "column", + alignItems: "center", + }, + }, + React.createElement(VQWarmGradientBackground, null), + React.createElement(SayoStandee, { + frame, + fps, + activeSpeech, + speakingLocalFrame, + }), + sequences + ); +}; diff --git a/voicevox-remotion-template/src/root.tsx b/voicevox-remotion-template/src/root.tsx index 24c3f58..a96a91a 100644 --- a/voicevox-remotion-template/src/root.tsx +++ b/voicevox-remotion-template/src/root.tsx @@ -7,6 +7,11 @@ PIZZA_KILN_FPS, totalPizzaKilnDurationInFrames, } from "./data/pizza-kiln/timing"; +import {PizzaOvenProject01} from "./pizzaOvenProject01"; +import { + PIZZA_OVEN_PROJECT_01_FPS, + totalPizzaOvenProject01DurationInFrames, +} from "./data/pizza-oven-project-01/timing"; export const Root: React.FC = () => { return ( @@ -27,6 +32,16 @@ width={1280} height={720} /> + ); };