diff --git a/voicevox-remotion-template/src/data/script.ts b/voicevox-remotion-template/src/data/script.ts index 17a2840..7639453 100644 --- a/voicevox-remotion-template/src/data/script.ts +++ b/voicevox-remotion-template/src/data/script.ts @@ -1,10 +1,129 @@ -import rawScript from "./script.json"; +export type VoicevoxVoice = Readonly<{ + speakerName: string; + styleName: string; +}>; -export type Sentence = { +export type AvatarDefinition = Readonly<{ + kind: "zundamon" | "sayo"; + accentColor: string; + imagePath?: string; +}>; + +export type CharacterDefinition = Readonly<{ + displayName: string; + voicevox: VoicevoxVoice; + avatar: AvatarDefinition; +}>; + +export const characters = { + zundamon: { + displayName: "ずんだもん", + voicevox: { + speakerName: "ずんだもん", + styleName: "ノーマル", + }, + avatar: { + kind: "zundamon", + accentColor: "#79d36f", + }, + }, + sayo: { + displayName: "小夜", + voicevox: { + speakerName: "小夜/SAYO", + styleName: "ノーマル", + }, + avatar: { + kind: "sayo", + accentColor: "#6b5f83", + }, + }, +} as const satisfies Record; + +export type CharacterId = keyof typeof characters; + +export type SpeechOptions = Readonly<{ + subtitle?: string; + voicevox?: Partial; +}>; + +export type ShowOptions = Readonly<{ + caption?: string; + durationSeconds?: number; +}>; + +export type SpeechEvent = Readonly<{ + type: "say"; id: string; + character: CharacterId; text: string; -}; + subtitle?: string; + voicevox?: Partial; +}>; -export const script: Sentence[] = rawScript; +export type ShowEvent = Readonly<{ + type: "show"; + character: CharacterId; + caption?: string; + durationSeconds?: number; +}>; -export const audioFileFor = (id: string) => `audio/lines/${id}.wav`; +export type TimelineEvent = SpeechEvent | ShowEvent; + +export const say = ( + id: string, + character: CharacterId, + text: string, + options: SpeechOptions = {} +): SpeechEvent => ({ + type: "say", + id, + character, + text, + ...options, +}); + +export const show = ( + character: CharacterId, + options: ShowOptions = {} +): ShowEvent => ({ + type: "show", + character, + ...options, +}); + +export const initialVisibleCharacters: CharacterId[] = ["zundamon"]; + +export const timeline: TimelineEvent[] = [ + say("zunda-001", "zundamon", "みなさんこんにちは、ずんだもんなのだ!"), + say( + "zunda-002", + "zundamon", + "今日のテーマは「ネコミミはなぜかわいいのか?」なのだ。" + ), + say( + "zunda-003", + "zundamon", + "まず大きな理由は、丸みのあるシルエットと動きなのだ。" + ), + say( + "zunda-004", + "zundamon", + "そして感情が伝わりやすくて、親近感が増すのだ!" + ), + show("sayo", { + caption: "ネコミミ代表として、小夜が登場!", + }), + say( + "sayo-001", + "sayo", + "小夜です。ネコミミ代表として、猫耳のかわいさを証明しに来ました。" + ), + say("zunda-005", "zundamon", "それじゃあ、また次回なのだ!"), +]; + +export const isSpeechEvent = ( + event: TimelineEvent +): event is SpeechEvent => event.type === "say"; + +export const script = timeline.filter(isSpeechEvent); diff --git a/voicevox-remotion-template/src/data/timing.ts b/voicevox-remotion-template/src/data/timing.ts index ef376e5..6780ef3 100644 --- a/voicevox-remotion-template/src/data/timing.ts +++ b/voicevox-remotion-template/src/data/timing.ts @@ -1,8 +1,12 @@ -import {script, Sentence} from "./script"; +import {SpeechEvent, timeline, TimelineEvent} from "./script"; import voicevoxManifest from "./voicevox-manifest.json"; type ManifestEntry = { id: string; + character?: string; + speakerName?: string; + styleName?: string; + speakerId?: number; file: string; durationSeconds: number; }; @@ -14,22 +18,38 @@ export const FPS = 30; export const GAP_FRAMES = 6; +export const DEFAULT_SHOW_SECONDS = 1.5; -export const hasAudioForSentence = (sentence: Sentence) => - manifestById.has(sentence.id); +export const hasAudioForSpeech = (speech: SpeechEvent) => + manifestById.has(speech.id); -export const durationForSentence = (sentence: Sentence, fps = FPS) => { - const entry = manifestById.get(sentence.id); +export const audioFileForSpeech = (speech: SpeechEvent) => + manifestById.get(speech.id)?.file ?? `audio/lines/${speech.id}.wav`; + +export const durationForSpeech = (speech: SpeechEvent, fps = FPS) => { + const entry = manifestById.get(speech.id); if (entry && Number.isFinite(entry.durationSeconds)) { return Math.max(1, Math.ceil(entry.durationSeconds * fps)); } - const estimatedSeconds = Math.max(1.2, sentence.text.length * 0.11); + const estimatedSeconds = Math.max(1.2, speech.text.length * 0.11); return Math.ceil(estimatedSeconds * fps); }; +export const durationForTimelineEvent = ( + event: TimelineEvent, + fps = FPS +) => { + if (event.type === "say") { + return durationForSpeech(event, fps); + } + + const durationSeconds = event.durationSeconds ?? DEFAULT_SHOW_SECONDS; + return Math.max(1, Math.ceil(durationSeconds * fps)); +}; + export const totalDurationInFrames = (fps = FPS) => - script.reduce((sum, sentence, index) => { - const gap = index < script.length - 1 ? GAP_FRAMES : 0; - return sum + durationForSentence(sentence, fps) + gap; + timeline.reduce((sum, event, index) => { + const gap = index < timeline.length - 1 ? GAP_FRAMES : 0; + return sum + durationForTimelineEvent(event, fps) + gap; }, 0); diff --git a/voicevox-remotion-template/src/yukkuri-composition.tsx b/voicevox-remotion-template/src/yukkuri-composition.tsx index cded9eb..3c7a0e6 100644 --- a/voicevox-remotion-template/src/yukkuri-composition.tsx +++ b/voicevox-remotion-template/src/yukkuri-composition.tsx @@ -2,6 +2,7 @@ import { AbsoluteFill, Audio, + Img, interpolate, Sequence, spring, @@ -9,13 +10,71 @@ useCurrentFrame, useVideoConfig, } from "remotion"; -import {audioFileFor, script, Sentence} from "./data/script"; import { + characters, + initialVisibleCharacters, + timeline, + type AvatarDefinition, + type CharacterId, + type TimelineEvent, +} from "./data/script"; +import { + audioFileForSpeech, GAP_FRAMES, - durationForSentence, - hasAudioForSentence, + durationForTimelineEvent, + hasAudioForSpeech, } from "./data/timing"; +type ScheduledTimelineEvent = Readonly<{ + event: TimelineEvent; + from: number; + durationInFrames: number; + visibleCharacters: CharacterId[]; + focusedCharacter: CharacterId; +}>; + +const scheduleTimeline = (fps: number): ScheduledTimelineEvent[] => { + let cursor = 0; + const visibleCharacters = new Set(initialVisibleCharacters); + + return timeline.map((event, index) => { + visibleCharacters.add(event.character); + + const durationInFrames = durationForTimelineEvent(event, fps); + const scheduledEvent = { + event, + from: cursor, + durationInFrames, + visibleCharacters: Array.from(visibleCharacters), + focusedCharacter: event.character, + }; + + cursor += durationInFrames; + if (index < timeline.length - 1) { + cursor += GAP_FRAMES; + } + + return scheduledEvent; + }); +}; + +const activeSegmentForFrame = ( + scheduledEvents: ScheduledTimelineEvent[], + frame: number +) => { + let activeSegment = scheduledEvents[0]; + + for (const scheduledEvent of scheduledEvents) { + if (frame >= scheduledEvent.from) { + activeSegment = scheduledEvent; + } else { + break; + } + } + + return activeSegment; +}; + const Title: React.FC> = ({progress}) => { const opacity = interpolate(progress, [0, 1], [0, 1]); const translateY = interpolate(progress, [0, 1], [-30, 0]); @@ -41,10 +100,14 @@ ); }; -const Subtitle: React.FC> = ({ - text, - progress, -}) => { +const Subtitle: React.FC< + Readonly<{ + text: string; + progress: number; + speakerName?: string; + accentColor?: string; + }> +> = ({text, progress, speakerName, accentColor = "#1f2a44"}) => { const opacity = interpolate(progress, [0, 1], [0, 1]); const translateY = interpolate(progress, [0, 1], [16, 0]); @@ -58,20 +121,36 @@ color: "#1a1a1a", lineHeight: 1.4, padding: "18px 28px", - backgroundColor: "rgba(255, 255, 255, 0.85)", + backgroundColor: "rgba(255, 255, 255, 0.88)", borderRadius: 18, - border: "2px solid rgba(31, 42, 68, 0.15)", + border: `2px solid ${accentColor}33`, boxShadow: "0 10px 30px rgba(31, 42, 68, 0.15)", + maxWidth: 980, opacity, transform: `translateY(${translateY}px)`, }} > - {text} + {speakerName ? ( +
+ {speakerName} +
+ ) : null} +
{text}
); }; -const Zundamon: React.FC> = ({bounce}) => { +const ZundamonAvatar: React.FC = () => { return (
> = ({sentence}) => { +const SayoAvatar: React.FC> = ({ + accentColor, +}) => { + return ( +
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ); +}; + +const CharacterAvatar: React.FC< + Readonly<{ + characterId: CharacterId; + focused: boolean; + hasMultipleCharacters: boolean; + bounce: number; + }> +> = ({characterId, focused, hasMultipleCharacters, bounce}) => { + const character = characters[characterId]; + const {avatar}: {avatar: AvatarDefinition} = character; + const scale = focused ? 1.05 : hasMultipleCharacters ? 0.88 : 1; + const opacity = focused || !hasMultipleCharacters ? 1 : 0.72; + const translateY = focused ? bounce : bounce * 0.35; + + return ( +
+ {avatar.imagePath ? ( + + ) : avatar.kind === "sayo" ? ( + + ) : ( + + )} +
+ {character.displayName} +
+
+ ); +}; + +const Stage: React.FC< + Readonly<{ + visibleCharacters: CharacterId[]; + focusedCharacter?: CharacterId; + bounce: number; + }> +> = ({visibleCharacters, focusedCharacter, bounce}) => { + const hasMultipleCharacters = visibleCharacters.length > 1; + + return ( +
+ {visibleCharacters.map((characterId) => ( + + ))} +
+ ); +}; + +const TimelineOverlay: React.FC> = ({event}) => { const frame = useCurrentFrame(); const {fps} = useVideoConfig(); @@ -162,6 +475,16 @@ config: {damping: 20, mass: 0.7}, }); + const text = event.type === "say" ? event.subtitle ?? event.text : event.caption; + if (!text) { + return event.type === "say" && hasAudioForSpeech(event) ? ( +