diff --git a/voicevox-remotion-template/public/image/still/blender-oven-still01.png b/voicevox-remotion-template/public/image/still/blender-oven-still01.png new file mode 100644 index 0000000..8c3c856 --- /dev/null +++ b/voicevox-remotion-template/public/image/still/blender-oven-still01.png Binary files differ diff --git a/voicevox-remotion-template/src/data/pizza-oven-project-01/script.ts b/voicevox-remotion-template/src/data/pizza-oven-project-01/script.ts index 388e6de..ac1b665 100644 --- a/voicevox-remotion-template/src/data/pizza-oven-project-01/script.ts +++ b/voicevox-remotion-template/src/data/pizza-oven-project-01/script.ts @@ -1,4 +1,5 @@ import {getStandeeSet, type AvatarDefinition} from "../../standee-sets"; +import type {VQStillEvent} from "../../lib/VQRemotionLib"; export type VoicevoxVoice = Readonly<{ speakerName: string; @@ -45,7 +46,9 @@ durationSeconds?: number; }>; -export type TimelineEvent = SpeechEvent; +export type StillEvent = VQStillEvent; + +export type TimelineEvent = SpeechEvent | StillEvent; export const say = ( id: string, @@ -60,11 +63,37 @@ ...options, }); +export const still = ( + id: string, + imagePath: string, + options: Omit = {} +): StillEvent => ({ + type: "still", + id, + imagePath, + ...options, +}); + export const timeline = [ say("pizza-oven-project-01-sayo-001", "sayo", "こんにちは。小夜です。"), say("pizza-oven-project-01-sayo-002", "sayo", "ピザって美味しいじゃないですか。"), say("pizza-oven-project-01-sayo-003", "sayo", "だから、作る事にしたんですよね。"), - say("pizza-oven-project-01-sayo-004", "sayo", "ピザ窯を。"), + still( + "pizza-oven-project-01-oven-still-001", + "image/still/blender-oven-still01.png", + { + durationSeconds: 1.5, + fit: "cover", + } + ), + say("pizza-oven-project-01-sayo-004", "sayo", "ピザ窯を。", { + durationSeconds: 3, + }), + say("pizza-oven-project-01-sayo-005", "sayo", "まずはblender上で、耐熱レンガの寸法を元に積み方を設計することにしました。"), ] satisfies TimelineEvent[]; -export const script = timeline; +export const isSpeechEvent = ( + event: TimelineEvent +): event is SpeechEvent => event.type === "say"; + +export const script = timeline.filter(isSpeechEvent); diff --git a/voicevox-remotion-template/src/data/pizza-oven-project-01/timing.ts b/voicevox-remotion-template/src/data/pizza-oven-project-01/timing.ts index 702c51c..910e675 100644 --- a/voicevox-remotion-template/src/data/pizza-oven-project-01/timing.ts +++ b/voicevox-remotion-template/src/data/pizza-oven-project-01/timing.ts @@ -23,6 +23,7 @@ export const PIZZA_OVEN_PROJECT_01_FPS = 30; export const PIZZA_OVEN_PROJECT_01_GAP_FRAMES = 6; +export const PIZZA_OVEN_PROJECT_01_DEFAULT_STILL_SECONDS = 1.5; export const hasAudioForSpeech = (speech: SpeechEvent) => manifestById.has(speech.id); @@ -51,7 +52,15 @@ export const durationForTimelineEvent = ( event: TimelineEvent, fps = PIZZA_OVEN_PROJECT_01_FPS -) => durationForSpeech(event, fps); +) => { + if (event.type === "say") { + return durationForSpeech(event, fps); + } + + const durationSeconds = + event.durationSeconds ?? PIZZA_OVEN_PROJECT_01_DEFAULT_STILL_SECONDS; + return Math.max(1, Math.ceil(durationSeconds * fps)); +}; export const pizzaOvenProject01AssetWorkflow = defineVQScenarioAssetWorkflow({ diff --git a/voicevox-remotion-template/src/data/pizza-oven-project-01/voicevox-manifest.json b/voicevox-remotion-template/src/data/pizza-oven-project-01/voicevox-manifest.json index 9c95c1b..42182ce 100644 --- a/voicevox-remotion-template/src/data/pizza-oven-project-01/voicevox-manifest.json +++ b/voicevox-remotion-template/src/data/pizza-oven-project-01/voicevox-manifest.json @@ -25,5 +25,23 @@ "speakerId": 46, "file": "audio/pizza-oven-project-01/lines/pizza-oven-project-01-sayo-003.wav", "durationSeconds": 3.029333333333333 + }, + { + "id": "pizza-oven-project-01-sayo-004", + "character": "sayo", + "speakerName": "小夜/SAYO", + "styleName": "ノーマル", + "speakerId": 46, + "file": "audio/pizza-oven-project-01/lines/pizza-oven-project-01-sayo-004.wav", + "durationSeconds": 1.0133333333333334 + }, + { + "id": "pizza-oven-project-01-sayo-005", + "character": "sayo", + "speakerName": "小夜/SAYO", + "styleName": "ノーマル", + "speakerId": 46, + "file": "audio/pizza-oven-project-01/lines/pizza-oven-project-01-sayo-005.wav", + "durationSeconds": 6.581333333333333 } ] diff --git a/voicevox-remotion-template/src/generated/lipsync/manifest.json b/voicevox-remotion-template/src/generated/lipsync/manifest.json index 282bb6e..1af6d3e 100644 --- a/voicevox-remotion-template/src/generated/lipsync/manifest.json +++ b/voicevox-remotion-template/src/generated/lipsync/manifest.json @@ -2136,6 +2136,299 @@ "source": "X" } ] + }, + "pizza-oven-project-01-sayo-005": { + "version": 1, + "source": { + "audio": "audio/pizza-oven-project-01/lines/pizza-oven-project-01-sayo-005.wav", + "engine": "rhubarb-lip-sync", + "recognizer": "phonetic" + }, + "duration": 6.58, + "cues": [ + { + "start": 0, + "end": 0.09, + "mouth": "rest", + "source": "X" + }, + { + "start": 0.09, + "end": 0.23, + "mouth": "i", + "source": "B" + }, + { + "start": 0.23, + "end": 0.31, + "mouth": "closed", + "source": "A" + }, + { + "start": 0.31, + "end": 0.44, + "mouth": "e", + "source": "C" + }, + { + "start": 0.44, + "end": 0.52, + "mouth": "closed", + "source": "A" + }, + { + "start": 0.52, + "end": 0.72, + "mouth": "e", + "source": "C" + }, + { + "start": 0.72, + "end": 0.79, + "mouth": "i", + "source": "B" + }, + { + "start": 0.79, + "end": 0.87, + "mouth": "closed", + "source": "A" + }, + { + "start": 0.87, + "end": 1.04, + "mouth": "e", + "source": "C" + }, + { + "start": 1.04, + "end": 1.11, + "mouth": "i", + "source": "B" + }, + { + "start": 1.11, + "end": 1.32, + "mouth": "u", + "source": "F" + }, + { + "start": 1.32, + "end": 1.6, + "mouth": "i", + "source": "B" + }, + { + "start": 1.6, + "end": 2.02, + "mouth": "rest", + "source": "X" + }, + { + "start": 2.02, + "end": 2.07, + "mouth": "i", + "source": "B" + }, + { + "start": 2.07, + "end": 2.19, + "mouth": "e", + "source": "C" + }, + { + "start": 2.19, + "end": 2.96, + "mouth": "i", + "source": "B" + }, + { + "start": 2.96, + "end": 3.08, + "mouth": "closed", + "source": "A" + }, + { + "start": 3.08, + "end": 3.16, + "mouth": "u", + "source": "F" + }, + { + "start": 3.16, + "end": 3.24, + "mouth": "closed", + "source": "A" + }, + { + "start": 3.24, + "end": 3.54, + "mouth": "o", + "source": "E" + }, + { + "start": 3.54, + "end": 3.65, + "mouth": "closed", + "source": "A" + }, + { + "start": 3.65, + "end": 3.71, + "mouth": "a", + "source": "D" + }, + { + "start": 3.71, + "end": 3.98, + "mouth": "i", + "source": "B" + }, + { + "start": 3.98, + "end": 4.05, + "mouth": "closed", + "source": "A" + }, + { + "start": 4.05, + "end": 4.12, + "mouth": "i", + "source": "B" + }, + { + "start": 4.12, + "end": 4.18, + "mouth": "closed", + "source": "A" + }, + { + "start": 4.18, + "end": 4.32, + "mouth": "i", + "source": "B" + }, + { + "start": 4.32, + "end": 4.47, + "mouth": "closed", + "source": "A" + }, + { + "start": 4.47, + "end": 4.65, + "mouth": "i", + "source": "B" + }, + { + "start": 4.65, + "end": 4.75, + "mouth": "closed", + "source": "A" + }, + { + "start": 4.75, + "end": 4.98, + "mouth": "i", + "source": "B" + }, + { + "start": 4.98, + "end": 5.12, + "mouth": "e", + "source": "C" + }, + { + "start": 5.12, + "end": 5.26, + "mouth": "i", + "source": "B" + }, + { + "start": 5.26, + "end": 5.33, + "mouth": "e", + "source": "C" + }, + { + "start": 5.33, + "end": 5.4, + "mouth": "i", + "source": "B" + }, + { + "start": 5.4, + "end": 5.47, + "mouth": "u", + "source": "F" + }, + { + "start": 5.47, + "end": 5.55, + "mouth": "closed", + "source": "A" + }, + { + "start": 5.55, + "end": 5.84, + "mouth": "i", + "source": "B" + }, + { + "start": 5.84, + "end": 5.88, + "mouth": "closed", + "source": "A" + }, + { + "start": 5.88, + "end": 5.95, + "mouth": "i", + "source": "B" + }, + { + "start": 5.95, + "end": 6.02, + "mouth": "closed", + "source": "A" + }, + { + "start": 6.02, + "end": 6.06, + "mouth": "e", + "source": "C" + }, + { + "start": 6.06, + "end": 6.1, + "mouth": "i", + "source": "B" + }, + { + "start": 6.1, + "end": 6.21, + "mouth": "closed", + "source": "A" + }, + { + "start": 6.21, + "end": 6.41, + "mouth": "e", + "source": "C" + }, + { + "start": 6.41, + "end": 6.48, + "mouth": "i", + "source": "B" + }, + { + "start": 6.48, + "end": 6.58, + "mouth": "rest", + "source": "X" + } + ] } } } diff --git a/voicevox-remotion-template/src/pizza-kiln-composition.tsx b/voicevox-remotion-template/src/pizza-kiln-composition.tsx index 19d7ab4..a82da59 100644 --- a/voicevox-remotion-template/src/pizza-kiln-composition.tsx +++ b/voicevox-remotion-template/src/pizza-kiln-composition.tsx @@ -21,23 +21,14 @@ } from "./data/pizza-kiln/script"; import {roundedFontFamily} from "./fonts"; import { - VQLipSyncedStandeeImage, VQSpeechOverlay, + VQStageCornerStandee, VQWarmGradientBackground, vqSpeakingAvatarAnimations, } from "./lib/VQRemotionLib"; import {getMouthForSpeechFrame} from "./lipsync/manifest"; const BACKGROUND_VIDEO_PATH = "video/pizza-kiln-background.mp4"; -const STAGE_STANDEE_WIDTH = 610; -const STAGE_STANDEE_HEIGHT = 760; -const STAGE_STANDEE_RIGHT = 315; -const STAGE_STANDEE_BOTTOM = -118; -const CORNER_STANDEE_WIDTH = 420; -const CORNER_STANDEE_HEIGHT = 360; -const CORNER_IMAGE_WIDTH = 470; -const CORNER_IMAGE_HEIGHT = 705; - const pizzaSubtitleOptions = { fontFamily: roundedFontFamily, fontSize: 34, @@ -68,40 +59,15 @@ speaking && speakingAnimationType === "rhubarbLipSync" ? getMouthForSpeechFrame(speechId, localFrame, fps) : "rest"; - const isCorner = mode === "corner"; - const frameWidth = isCorner ? CORNER_STANDEE_WIDTH : STAGE_STANDEE_WIDTH; - const frameHeight = isCorner ? CORNER_STANDEE_HEIGHT : STAGE_STANDEE_HEIGHT; - return ( -
- -
+ ); }; diff --git a/voicevox-remotion-template/src/pizzaOvenProject01.ts b/voicevox-remotion-template/src/pizzaOvenProject01.ts index 93f6645..fe5b375 100644 --- a/voicevox-remotion-template/src/pizzaOvenProject01.ts +++ b/voicevox-remotion-template/src/pizzaOvenProject01.ts @@ -10,6 +10,7 @@ import { characters, type SpeechEvent, + type StillEvent, type TimelineEvent, } from "./data/pizza-oven-project-01/script"; import { @@ -21,14 +22,27 @@ import { activeVQChronologicalScenarioSegmentForFrame, scheduleVQChronologicalScenario, - VQLipSyncedStandeeImage, VQSpeechOverlay, + VQStageCornerStandee, + VQStillBackground, VQWarmGradientBackground, + type VQStageCornerStandeeLayouts, + vqDefaultStageCornerStandeeLayouts, } from "./lib/VQRemotionLib"; import {getMouthForSpeechFrame} from "./lipsync/manifest"; const sayoAvatar = characters.sayo.avatar; const PizzaOvenSpeechOverlay = VQSpeechOverlay; +const pizzaOvenStandeeLayouts = { + ...vqDefaultStageCornerStandeeLayouts, + stage: { + ...vqDefaultStageCornerStandeeLayouts.stage, + frameWidth: 520, + frameHeight: 720, + right: 330, + bottom: -82, + }, +} as const satisfies VQStageCornerStandeeLayouts; const clampInterpolation = { extrapolateLeft: "clamp", @@ -37,12 +51,13 @@ const SayoStandee: React.FC< Readonly<{ + mode: "stage" | "corner"; frame: number; fps: number; activeSpeech?: SpeechEvent; speakingLocalFrame: number; }> -> = ({frame, fps, activeSpeech, speakingLocalFrame}) => { +> = ({mode, frame, fps, activeSpeech, speakingLocalFrame}) => { const entrance = spring({ frame, fps, @@ -53,37 +68,24 @@ ? getMouthForSpeechFrame(activeSpeech.id, speakingLocalFrame, fps) : "rest"; - return React.createElement( - "div", - { - style: { - position: "absolute", - right: 330, - bottom: -82, - width: 520, - height: 720, - display: "flex", - justifyContent: "center", - alignItems: "flex-end", - transform: `translateY(${translateY}px)`, - zIndex: 2, - } satisfies React.CSSProperties, - }, - React.createElement(VQLipSyncedStandeeImage, { - imagePath: sayoAvatar.imagePath, - mouthImageDir: sayoAvatar.mouthImageDir, - mouth, - width: "100%", - height: "100%", - maxHeight: "100%", - filter: "drop-shadow(0 18px 40px rgba(31, 42, 68, 0.22))", - }) - ); + return React.createElement(VQStageCornerStandee, { + mode, + imagePath: sayoAvatar.imagePath, + mouthImageDir: sayoAvatar.mouthImageDir, + mouth, + translateY, + layouts: pizzaOvenStandeeLayouts, + zIndex: 2, + }); }; const TimelineOverlay: React.FC> = ({ event, }) => { + if (event.type === "still") { + return null; + } + const character = characters[event.character]; return React.createElement(PizzaOvenSpeechOverlay, { @@ -117,7 +119,16 @@ ? frame < activeSegment.from + activeSegment.durationInFrames : false; const activeSpeech = - activeSegment && isInsideActiveSegment ? activeSegment.event : undefined; + activeSegment && isInsideActiveSegment && activeSegment.event.type === "say" + ? activeSegment.event + : undefined; + const activeStill = scheduledEvents.reduce( + (currentStill, scheduledEvent) => + scheduledEvent.from <= frame && scheduledEvent.event.type === "still" + ? scheduledEvent.event + : currentStill, + undefined + ); const speakingLocalFrame = activeSegment && activeSpeech ? frame - activeSegment.from : 0; const sequences = scheduledEvents.map((scheduledEvent) => @@ -143,7 +154,9 @@ }, }, React.createElement(VQWarmGradientBackground, null), + React.createElement(VQStillBackground, {still: activeStill}), React.createElement(SayoStandee, { + mode: activeStill ? "corner" : "stage", frame, fps, activeSpeech,