diff --git a/voicevox-remotion-template/package.json b/voicevox-remotion-template/package.json index 06058a9..f41b620 100644 --- a/voicevox-remotion-template/package.json +++ b/voicevox-remotion-template/package.json @@ -7,7 +7,8 @@ "start": "remotion preview", "render": "remotion render", "lint": "eslint .", - "voice:generate": "node scripts/voicevox-generate.js" + "voice:generate": "node scripts/voicevox-generate.js", + "voice:generate:pizza-kiln": "node scripts/voicevox-generate.js --script src/data/pizza-kiln/script.ts --output public/audio/pizza-kiln/lines --manifest src/data/pizza-kiln/voicevox-manifest.json" }, "dependencies": { "@remotion/google-fonts": "4.0.460", diff --git a/voicevox-remotion-template/public/audio/pizza-kiln/lines/pizza-kiln-sayo-001.wav b/voicevox-remotion-template/public/audio/pizza-kiln/lines/pizza-kiln-sayo-001.wav new file mode 100644 index 0000000..2341160 --- /dev/null +++ b/voicevox-remotion-template/public/audio/pizza-kiln/lines/pizza-kiln-sayo-001.wav Binary files differ diff --git a/voicevox-remotion-template/public/audio/pizza-kiln/lines/pizza-kiln-sayo-002.wav b/voicevox-remotion-template/public/audio/pizza-kiln/lines/pizza-kiln-sayo-002.wav new file mode 100644 index 0000000..691c837 --- /dev/null +++ b/voicevox-remotion-template/public/audio/pizza-kiln/lines/pizza-kiln-sayo-002.wav Binary files differ diff --git a/voicevox-remotion-template/public/video/PXL_20260501_081335887.TS.mp4 b/voicevox-remotion-template/public/video/PXL_20260501_081335887.TS.mp4 new file mode 100644 index 0000000..93a9207 --- /dev/null +++ b/voicevox-remotion-template/public/video/PXL_20260501_081335887.TS.mp4 Binary files differ diff --git a/voicevox-remotion-template/public/video/pizza-kiln-background.mp4 b/voicevox-remotion-template/public/video/pizza-kiln-background.mp4 new file mode 100644 index 0000000..2b5c33c --- /dev/null +++ b/voicevox-remotion-template/public/video/pizza-kiln-background.mp4 Binary files differ diff --git a/voicevox-remotion-template/scripts/voicevox-generate.js b/voicevox-remotion-template/scripts/voicevox-generate.js index 77d33ce..aa0f350 100644 --- a/voicevox-remotion-template/scripts/voicevox-generate.js +++ b/voicevox-remotion-template/scripts/voicevox-generate.js @@ -1,15 +1,71 @@ import fs from "node:fs/promises"; +import path from "node:path"; +import {fileURLToPath} from "node:url"; import ts from "typescript"; const VOICEVOX_URL = process.env.VOICEVOX_URL ?? "http://host.docker.internal:50021"; -const scriptPath = new URL("../src/data/script.ts", import.meta.url); -const outputDir = new URL("../public/audio/lines/", import.meta.url); -const manifestPath = new URL( - "../src/data/voicevox-manifest.json", - import.meta.url +const projectRoot = path.resolve( + fileURLToPath(new URL("..", import.meta.url)) ); +const publicDir = path.join(projectRoot, "public"); + +const resolveProjectPath = (value) => + path.isAbsolute(value) ? value : path.resolve(projectRoot, value); + +const parseArgs = () => { + const values = { + script: "src/data/script.ts", + output: "public/audio/lines", + manifest: "src/data/voicevox-manifest.json", + }; + const args = process.argv.slice(2); + + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]; + if (!arg.startsWith("--")) { + throw new Error(`Unknown argument "${arg}".`); + } + + const key = arg.slice(2); + if (!(key in values)) { + throw new Error(`Unknown option "${arg}".`); + } + + const value = args[index + 1]; + if (!value || value.startsWith("--")) { + throw new Error(`Option "${arg}" needs a value.`); + } + + values[key] = value; + index += 1; + } + + return { + scriptPath: resolveProjectPath(values.script), + outputDir: resolveProjectPath(values.output), + manifestPath: resolveProjectPath(values.manifest), + }; +}; + +const {scriptPath, outputDir, manifestPath} = parseArgs(); + +const toProjectRelative = (targetPath) => + path.relative(projectRoot, targetPath).split(path.sep).join("/"); + +const toPublicRelative = (targetPath) => { + const relativePath = path.relative(publicDir, targetPath); + if (relativePath.startsWith("..") || path.isAbsolute(relativePath)) { + throw new Error( + `Output directory must be inside public/: ${toProjectRelative(targetPath)}` + ); + } + + return relativePath.split(path.sep).join("/"); +}; + +const publicRelativeOutputDir = toPublicRelative(outputDir); const getWavDurationSeconds = (buffer) => { if (buffer.toString("ascii", 0, 4) !== "RIFF") { @@ -50,7 +106,7 @@ module: ts.ModuleKind.ES2022, target: ts.ScriptTarget.ES2022, }, - fileName: scriptPath.pathname, + fileName: scriptPath, }); const errors = transpiled.diagnostics?.filter( (diagnostic) => diagnostic.category === ts.DiagnosticCategory.Error @@ -62,7 +118,9 @@ ts.flattenDiagnosticMessageText(diagnostic.messageText, "\n") ) .join("\n"); - throw new Error(`Failed to transpile src/data/script.ts:\n${message}`); + throw new Error( + `Failed to transpile ${toProjectRelative(scriptPath)}:\n${message}` + ); } const moduleUrl = `data:text/javascript;base64,${Buffer.from( @@ -115,12 +173,14 @@ const {characters, timeline} = await loadScriptModule(); if (!characters || !timeline) { - throw new Error("src/data/script.ts must export characters and timeline."); + throw new Error( + `${toProjectRelative(scriptPath)} must export characters and timeline.` + ); } const speechEvents = timeline.filter((event) => event?.type === "say"); if (speechEvents.length === 0) { - throw new Error("src/data/script.ts has no say(...) events."); + throw new Error(`${toProjectRelative(scriptPath)} has no say(...) events.`); } const speakers = await fetchSpeakers(); @@ -161,7 +221,7 @@ } const audioBuffer = Buffer.from(await synthResponse.arrayBuffer()); - const outputPath = new URL(`./${speech.id}.wav`, outputDir); + const outputPath = path.join(outputDir, `${speech.id}.wav`); await fs.writeFile(outputPath, audioBuffer); const durationSeconds = getWavDurationSeconds(audioBuffer); manifest.push({ @@ -170,13 +230,14 @@ speakerName: voice.speakerName, styleName: voice.styleName, speakerId, - file: `audio/lines/${speech.id}.wav`, + file: `${publicRelativeOutputDir}/${speech.id}.wav`, durationSeconds, }); console.log( - `Wrote ${outputPath.pathname} (${voice.speakerName} / ${voice.styleName}, ${durationSeconds.toFixed(2)}s)` + `Wrote ${outputPath} (${voice.speakerName} / ${voice.styleName}, ${durationSeconds.toFixed(2)}s)` ); } +await fs.mkdir(path.dirname(manifestPath), {recursive: true}); await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2) + "\n"); -console.log(`Updated ${manifestPath.pathname}`); +console.log(`Updated ${manifestPath}`); diff --git a/voicevox-remotion-template/src/data/pizza-kiln/script.ts b/voicevox-remotion-template/src/data/pizza-kiln/script.ts new file mode 100644 index 0000000..158a5e3 --- /dev/null +++ b/voicevox-remotion-template/src/data/pizza-kiln/script.ts @@ -0,0 +1,71 @@ +export type VoicevoxVoice = Readonly<{ + speakerName: string; + styleName: string; +}>; + +export type CharacterDefinition = Readonly<{ + displayName: string; + voicevox: VoicevoxVoice; + avatar: Readonly<{ + accentColor: string; + imagePath: string; + }>; +}>; + +export const characters = { + sayo: { + displayName: "小夜", + voicevox: { + speakerName: "小夜/SAYO", + styleName: "ノーマル", + }, + avatar: { + accentColor: "#6b5f83", + imagePath: "image/sayo-standee-base.png", + }, + }, +} as const satisfies Record; + +export type CharacterId = keyof typeof characters; + +export type SpeechOptions = Readonly<{ + subtitle?: string; + voicevox?: Partial; +}>; + +export type SpeechEvent = Readonly<{ + type: "say"; + id: string; + character: CharacterId; + text: string; + subtitle?: string; + voicevox?: Partial; +}>; + +export const say = ( + id: string, + character: CharacterId, + text: string, + options: SpeechOptions = {} +): SpeechEvent => ({ + type: "say", + id, + character, + text, + ...options, +}); + +export const timeline = [ + say( + "pizza-kiln-sayo-001", + "sayo", + "小夜です。今日は、お手製の耐熱煉瓦ピザ窯を、全体ショットでご紹介します。" + ), + say( + "pizza-kiln-sayo-002", + "sayo", + "以上、小夜がお届けしました。ピザ窯の雰囲気、伝わっていたらうれしいです。" + ), +] satisfies SpeechEvent[]; + +export const script = timeline; diff --git a/voicevox-remotion-template/src/data/pizza-kiln/timing.ts b/voicevox-remotion-template/src/data/pizza-kiln/timing.ts new file mode 100644 index 0000000..9ccd362 --- /dev/null +++ b/voicevox-remotion-template/src/data/pizza-kiln/timing.ts @@ -0,0 +1,48 @@ +import {timeline, type SpeechEvent} from "./script"; +import voicevoxManifest from "./voicevox-manifest.json"; + +type ManifestEntry = { + id: string; + character?: string; + speakerName?: string; + styleName?: string; + speakerId?: number; + file: string; + durationSeconds: number; +}; + +const manifestEntries = voicevoxManifest as ManifestEntry[]; +const manifestById = new Map( + manifestEntries.map((entry) => [entry.id, entry]) +); + +export const PIZZA_KILN_FPS = 30; +export const PIZZA_KILN_GAP_FRAMES = 6; +export const PIZZA_KILN_VIDEO_FRAMES = 154; + +export const hasAudioForSpeech = (speech: SpeechEvent) => + manifestById.has(speech.id); + +export const audioFileForSpeech = (speech: SpeechEvent) => + manifestById.get(speech.id)?.file ?? + `audio/pizza-kiln/lines/${speech.id}.wav`; + +export const durationForSpeech = ( + speech: SpeechEvent, + fps = PIZZA_KILN_FPS +) => { + const entry = manifestById.get(speech.id); + if (entry && Number.isFinite(entry.durationSeconds)) { + return Math.max(1, Math.ceil(entry.durationSeconds * fps)); + } + + const estimatedSeconds = Math.max(1.2, speech.text.length * 0.11); + return Math.ceil(estimatedSeconds * fps); +}; + +export const totalPizzaKilnDurationInFrames = (fps = PIZZA_KILN_FPS) => + durationForSpeech(timeline[0], fps) + + PIZZA_KILN_GAP_FRAMES + + PIZZA_KILN_VIDEO_FRAMES + + PIZZA_KILN_GAP_FRAMES + + durationForSpeech(timeline[1], fps); diff --git a/voicevox-remotion-template/src/data/pizza-kiln/voicevox-manifest.json b/voicevox-remotion-template/src/data/pizza-kiln/voicevox-manifest.json new file mode 100644 index 0000000..e104d12 --- /dev/null +++ b/voicevox-remotion-template/src/data/pizza-kiln/voicevox-manifest.json @@ -0,0 +1,20 @@ +[ + { + "id": "pizza-kiln-sayo-001", + "character": "sayo", + "speakerName": "小夜/SAYO", + "styleName": "ノーマル", + "speakerId": 46, + "file": "audio/pizza-kiln/lines/pizza-kiln-sayo-001.wav", + "durationSeconds": 7.456 + }, + { + "id": "pizza-kiln-sayo-002", + "character": "sayo", + "speakerName": "小夜/SAYO", + "styleName": "ノーマル", + "speakerId": 46, + "file": "audio/pizza-kiln/lines/pizza-kiln-sayo-002.wav", + "durationSeconds": 6.784 + } +] diff --git a/voicevox-remotion-template/src/pizza-kiln-composition.tsx b/voicevox-remotion-template/src/pizza-kiln-composition.tsx new file mode 100644 index 0000000..5c1f28c --- /dev/null +++ b/voicevox-remotion-template/src/pizza-kiln-composition.tsx @@ -0,0 +1,227 @@ +import React from "react"; +import {Audio, Video} from "@remotion/media"; +import { + AbsoluteFill, + Img, + interpolate, + Sequence, + spring, + staticFile, + useCurrentFrame, + useVideoConfig, +} from "remotion"; +import { + audioFileForSpeech, + durationForSpeech, + hasAudioForSpeech, + PIZZA_KILN_GAP_FRAMES, + PIZZA_KILN_VIDEO_FRAMES, +} from "./data/pizza-kiln/timing"; +import {characters, timeline, type SpeechEvent} from "./data/pizza-kiln/script"; +import {roundedFontFamily} from "./fonts"; + +const BACKGROUND_VIDEO_PATH = "video/pizza-kiln-background.mp4"; + +const clampInterpolation = { + extrapolateLeft: "clamp", + extrapolateRight: "clamp", +} as const; + +const UsualBackground: React.FC = () => ( + <> + + + +); + +const Subtitle: React.FC< + Readonly<{ + text: string; + progress: number; + speakerName: string; + accentColor: string; + }> +> = ({text, progress, speakerName, accentColor}) => { + const opacity = interpolate(progress, [0, 1], [0, 1], clampInterpolation); + const translateY = interpolate(progress, [0, 1], [16, 0], clampInterpolation); + + return ( +
+
+ {speakerName} +
+
{text}
+
+ ); +}; + +const SayoStandee: React.FC< + Readonly<{ + mode: "stage" | "corner"; + speaking: boolean; + localFrame: number; + fps: number; + }> +> = ({mode, speaking, localFrame, fps}) => { + const cycleFrames = Math.max(1, Math.round(fps * 0.25)); + const progress = (localFrame % cycleFrames) / cycleFrames; + const speakingHop = -Math.sin(progress * Math.PI) * 7; + const translateY = speaking ? speakingHop : 0; + const isCorner = mode === "corner"; + + return ( +
+ +
+ ); +}; + +const SpeechOverlay: React.FC> = ({speech}) => { + const frame = useCurrentFrame(); + const {fps} = useVideoConfig(); + const character = characters[speech.character]; + const subtitleProgress = spring({ + frame, + fps, + config: {damping: 20, mass: 0.7}, + }); + + return ( + <> +
+ +
+ {hasAudioForSpeech(speech) ? ( +