diff --git a/voicevox-remotion-template/README.md b/voicevox-remotion-template/README.md new file mode 100644 index 0000000..8c6fb47 --- /dev/null +++ b/voicevox-remotion-template/README.md @@ -0,0 +1,48 @@ +# Remotion x VOICEVOX ゆっくり解説テンプレート + +Remotion と VOICEVOX を組み合わせて、ずんだもんが解説する動画テンプレートです。 +サンプルテーマは「ネコミミはなぜかわいいのか?」です。 + +## 使い方 + +### 1. 依存関係をインストール +```bash +npm install +``` + +### 2. VOICEVOX エンジンを起動 +VOICEVOX のエンジンを起動してください。既定では `http://host.docker.internal:50021` を参照します。 +詳細は公式リポジトリを参照してください。 + +https://github.com/VOICEVOX/voicevox_engine + +### 3. 音声を生成 +```bash +npm run voice:generate +``` + +`src/data/script.json` の各センテンスから `public/audio/lines/*.wav` を生成し、 +`src/data/voicevox-manifest.json` に長さ情報を記録します。 + +以前の `public/audio/zundamon.txt` は互換用に残していますが、現在は参照しません。 + +### 4. プレビュー +```bash +npm run start +``` + +### 5. レンダリング +```bash +npm run render YukkuriZundamon out/video.mp4 +``` + +## 編集ポイント +- ナレーション文: `src/data/script.json` +- 音声タイミング: `src/data/voicevox-manifest.json` (自動生成) +- 映像の構成: `src/yukkuri-composition.tsx` + +## VOICEVOX設定 +環境変数で変更できます。 + +- `VOICEVOX_URL` (既定: `http://host.docker.internal:50021`) +- `VOICEVOX_SPEAKER_ID` (既定: `3` / ずんだもん) diff --git a/voicevox-remotion-template/public/audio/lines/zunda-001.wav b/voicevox-remotion-template/public/audio/lines/zunda-001.wav new file mode 100644 index 0000000..9f7a71e --- /dev/null +++ b/voicevox-remotion-template/public/audio/lines/zunda-001.wav Binary files differ diff --git a/voicevox-remotion-template/public/audio/lines/zunda-002.wav b/voicevox-remotion-template/public/audio/lines/zunda-002.wav new file mode 100644 index 0000000..c32b5dd --- /dev/null +++ b/voicevox-remotion-template/public/audio/lines/zunda-002.wav Binary files differ diff --git a/voicevox-remotion-template/public/audio/lines/zunda-003.wav b/voicevox-remotion-template/public/audio/lines/zunda-003.wav new file mode 100644 index 0000000..7a38b8a --- /dev/null +++ b/voicevox-remotion-template/public/audio/lines/zunda-003.wav Binary files differ diff --git a/voicevox-remotion-template/public/audio/lines/zunda-004.wav b/voicevox-remotion-template/public/audio/lines/zunda-004.wav new file mode 100644 index 0000000..04f069e --- /dev/null +++ b/voicevox-remotion-template/public/audio/lines/zunda-004.wav Binary files differ diff --git a/voicevox-remotion-template/public/audio/lines/zunda-005.wav b/voicevox-remotion-template/public/audio/lines/zunda-005.wav new file mode 100644 index 0000000..66e9195 --- /dev/null +++ b/voicevox-remotion-template/public/audio/lines/zunda-005.wav Binary files differ diff --git a/voicevox-remotion-template/public/audio/zundamon.txt b/voicevox-remotion-template/public/audio/zundamon.txt new file mode 100644 index 0000000..cc2db9e --- /dev/null +++ b/voicevox-remotion-template/public/audio/zundamon.txt @@ -0,0 +1,5 @@ +みなさんこんにちは、ずんだもんなのだ! +今日のテーマは「ネコミミはなぜかわいいのか?」なのだ。 +まず大きな理由は、丸みのあるシルエットと動きなのだ。 +そして感情が伝わりやすくて、親近感が増すのだ! +それじゃあ、また次回なのだ! diff --git a/voicevox-remotion-template/public/audio/zundamon.wav b/voicevox-remotion-template/public/audio/zundamon.wav new file mode 100644 index 0000000..60bfe4f --- /dev/null +++ b/voicevox-remotion-template/public/audio/zundamon.wav Binary files differ diff --git a/voicevox-remotion-template/scripts/voicevox-generate.js b/voicevox-remotion-template/scripts/voicevox-generate.js new file mode 100644 index 0000000..c434762 --- /dev/null +++ b/voicevox-remotion-template/scripts/voicevox-generate.js @@ -0,0 +1,100 @@ +import fs from "node:fs/promises"; + +const VOICEVOX_URL = + process.env.VOICEVOX_URL ?? "http://host.docker.internal:50021"; +const SPEAKER_ID = Number(process.env.VOICEVOX_SPEAKER_ID ?? "3"); + +const inputPath = new URL("../src/data/script.json", import.meta.url); +const outputDir = new URL("../public/audio/lines/", import.meta.url); +const manifestPath = new URL( + "../src/data/voicevox-manifest.json", + import.meta.url +); + +const getWavDurationSeconds = (buffer) => { + if (buffer.toString("ascii", 0, 4) !== "RIFF") { + throw new Error("Invalid WAV header: RIFF not found."); + } + if (buffer.toString("ascii", 8, 12) !== "WAVE") { + throw new Error("Invalid WAV header: WAVE not found."); + } + + let offset = 12; + let byteRate = 0; + let dataSize = 0; + + while (offset + 8 <= buffer.length) { + const chunkId = buffer.toString("ascii", offset, offset + 4); + const chunkSize = buffer.readUInt32LE(offset + 4); + if (chunkId === "fmt ") { + byteRate = buffer.readUInt32LE(offset + 16); + } + if (chunkId === "data") { + dataSize = chunkSize; + break; + } + offset += 8 + chunkSize + (chunkSize % 2); + } + + if (!byteRate || !dataSize) { + throw new Error("Failed to read WAV duration."); + } + + return dataSize / byteRate; +}; + +const raw = await fs.readFile(inputPath, "utf8"); +const script = JSON.parse(raw); +if (!Array.isArray(script) || script.length === 0) { + throw new Error("src/data/script.json is empty."); +} + +await fs.mkdir(outputDir, {recursive: true}); +const manifest = []; + +for (const sentence of script) { + if (!sentence?.id || !sentence?.text) { + throw new Error("Each entry needs id and text in script.json."); + } + + const queryResponse = await fetch( + `${VOICEVOX_URL}/audio_query?text=${encodeURIComponent(sentence.text)}&speaker=${SPEAKER_ID}`, + {method: "POST"} + ); + if (!queryResponse.ok) { + throw new Error(`audio_query failed: ${queryResponse.status}`); + } + + const query = await queryResponse.json(); + query.speedScale = 1.02; + query.pitchScale = 0.0; + query.intonationScale = 1.1; + + const synthResponse = await fetch( + `${VOICEVOX_URL}/synthesis?speaker=${SPEAKER_ID}`, + { + method: "POST", + headers: {"Content-Type": "application/json"}, + body: JSON.stringify(query), + } + ); + if (!synthResponse.ok) { + throw new Error(`synthesis failed: ${synthResponse.status}`); + } + + const audioBuffer = Buffer.from(await synthResponse.arrayBuffer()); + const outputPath = new URL(`./${sentence.id}.wav`, outputDir); + await fs.writeFile(outputPath, audioBuffer); + const durationSeconds = getWavDurationSeconds(audioBuffer); + manifest.push({ + id: sentence.id, + file: `audio/lines/${sentence.id}.wav`, + durationSeconds, + }); + console.log( + `Wrote ${outputPath.pathname} (${durationSeconds.toFixed(2)}s)` + ); +} + +await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2) + "\n"); +console.log(`Updated ${manifestPath.pathname}`); diff --git a/voicevox-remotion-template/src/data/script.json b/voicevox-remotion-template/src/data/script.json new file mode 100644 index 0000000..2fd33f9 --- /dev/null +++ b/voicevox-remotion-template/src/data/script.json @@ -0,0 +1,22 @@ +[ + { + "id": "zunda-001", + "text": "みなさんこんにちは、ずんだもんなのだ!" + }, + { + "id": "zunda-002", + "text": "今日のテーマは「ネコミミはなぜかわいいのか?」なのだ。" + }, + { + "id": "zunda-003", + "text": "まず大きな理由は、丸みのあるシルエットと動きなのだ。" + }, + { + "id": "zunda-004", + "text": "そして感情が伝わりやすくて、親近感が増すのだ!" + }, + { + "id": "zunda-005", + "text": "それじゃあ、また次回なのだ!" + } +] diff --git a/voicevox-remotion-template/src/data/script.ts b/voicevox-remotion-template/src/data/script.ts new file mode 100644 index 0000000..17a2840 --- /dev/null +++ b/voicevox-remotion-template/src/data/script.ts @@ -0,0 +1,10 @@ +import rawScript from "./script.json"; + +export type Sentence = { + id: string; + text: string; +}; + +export const script: Sentence[] = rawScript; + +export const audioFileFor = (id: string) => `audio/lines/${id}.wav`; diff --git a/voicevox-remotion-template/src/data/timing.ts b/voicevox-remotion-template/src/data/timing.ts new file mode 100644 index 0000000..ef376e5 --- /dev/null +++ b/voicevox-remotion-template/src/data/timing.ts @@ -0,0 +1,35 @@ +import {script, Sentence} from "./script"; +import voicevoxManifest from "./voicevox-manifest.json"; + +type ManifestEntry = { + id: string; + file: string; + durationSeconds: number; +}; + +const manifestEntries = voicevoxManifest as ManifestEntry[]; +const manifestById = new Map( + manifestEntries.map((entry) => [entry.id, entry]) +); + +export const FPS = 30; +export const GAP_FRAMES = 6; + +export const hasAudioForSentence = (sentence: Sentence) => + manifestById.has(sentence.id); + +export const durationForSentence = (sentence: Sentence, fps = FPS) => { + const entry = manifestById.get(sentence.id); + if (entry && Number.isFinite(entry.durationSeconds)) { + return Math.max(1, Math.ceil(entry.durationSeconds * fps)); + } + + const estimatedSeconds = Math.max(1.2, sentence.text.length * 0.11); + return Math.ceil(estimatedSeconds * fps); +}; + +export const totalDurationInFrames = (fps = FPS) => + script.reduce((sum, sentence, index) => { + const gap = index < script.length - 1 ? GAP_FRAMES : 0; + return sum + durationForSentence(sentence, fps) + gap; + }, 0); diff --git a/voicevox-remotion-template/src/data/voicevox-manifest.json b/voicevox-remotion-template/src/data/voicevox-manifest.json new file mode 100644 index 0000000..59ab5a8 --- /dev/null +++ b/voicevox-remotion-template/src/data/voicevox-manifest.json @@ -0,0 +1,27 @@ +[ + { + "id": "zunda-001", + "file": "audio/lines/zunda-001.wav", + "durationSeconds": 3.0613333333333332 + }, + { + "id": "zunda-002", + "file": "audio/lines/zunda-002.wav", + "durationSeconds": 4.48 + }, + { + "id": "zunda-003", + "file": "audio/lines/zunda-003.wav", + "durationSeconds": 4.394666666666667 + }, + { + "id": "zunda-004", + "file": "audio/lines/zunda-004.wav", + "durationSeconds": 4.32 + }, + { + "id": "zunda-005", + "file": "audio/lines/zunda-005.wav", + "durationSeconds": 2.474666666666667 + } +] diff --git a/voicevox-remotion-template/src/index.ts b/voicevox-remotion-template/src/index.ts new file mode 100644 index 0000000..6500c00 --- /dev/null +++ b/voicevox-remotion-template/src/index.ts @@ -0,0 +1,4 @@ +import {registerRoot} from "remotion"; +import {Root} from "./root"; + +registerRoot(Root); diff --git a/voicevox-remotion-template/src/root.tsx b/voicevox-remotion-template/src/root.tsx new file mode 100644 index 0000000..d499ed4 --- /dev/null +++ b/voicevox-remotion-template/src/root.tsx @@ -0,0 +1,19 @@ +import React from "react"; +import {Composition} from "remotion"; +import {YukkuriComposition} from "./yukkuri-composition"; +import {FPS, totalDurationInFrames} from "./data/timing"; + +export const Root: React.FC = () => { + return ( + <> + + + ); +}; diff --git a/voicevox-remotion-template/src/yukkuri-composition.tsx b/voicevox-remotion-template/src/yukkuri-composition.tsx new file mode 100644 index 0000000..cded9eb --- /dev/null +++ b/voicevox-remotion-template/src/yukkuri-composition.tsx @@ -0,0 +1,252 @@ +import React from "react"; +import { + AbsoluteFill, + Audio, + interpolate, + Sequence, + spring, + staticFile, + useCurrentFrame, + useVideoConfig, +} from "remotion"; +import {audioFileFor, script, Sentence} from "./data/script"; +import { + GAP_FRAMES, + durationForSentence, + hasAudioForSentence, +} from "./data/timing"; + +const Title: React.FC> = ({progress}) => { + const opacity = interpolate(progress, [0, 1], [0, 1]); + const translateY = interpolate(progress, [0, 1], [-30, 0]); + + return ( +
+ ネコミミはなぜかわいい? +
+ ); +}; + +const Subtitle: React.FC> = ({ + text, + progress, +}) => { + const opacity = interpolate(progress, [0, 1], [0, 1]); + const translateY = interpolate(progress, [0, 1], [16, 0]); + + return ( +
+ {text} +
+ ); +}; + +const Zundamon: React.FC> = ({bounce}) => { + return ( +
+
+
+
+
+
+
+
+
+ ); +}; + +const SentenceSegment: React.FC> = ({sentence}) => { + const frame = useCurrentFrame(); + const {fps} = useVideoConfig(); + + const subtitleProgress = spring({ + frame, + fps, + config: {damping: 20, mass: 0.7}, + }); + + return ( + <> +
+ +
+ {hasAudioForSentence(sentence) ? ( +