Newer
Older
remotion_docker_devcontainer / voicevox-remotion-template / src / pizza-kiln-composition.tsx
import React from "react";
import {Audio, Video} from "@remotion/media";
import {
  AbsoluteFill,
  interpolate,
  Sequence,
  spring,
  staticFile,
  useCurrentFrame,
  useVideoConfig,
} from "remotion";
import {
  audioFileForSpeech,
  durationForSpeech,
  hasAudioForSpeech,
  PIZZA_KILN_GAP_FRAMES,
  PIZZA_KILN_VIDEO_FRAMES,
} from "./data/pizza-kiln/timing";
import {
  characters,
  timeline,
  type CharacterDefinition,
  type SpeechEvent,
} from "./data/pizza-kiln/script";
import {roundedFontFamily} from "./fonts";
import {speakingAvatarAnimations} from "./avatar-animations";
import {getMouthForSpeechFrame} from "./lipsync/manifest";
import {
  defaultMouthImageDir,
  LipSyncedStandeeImage,
} from "./lipsync/LipSyncedStandeeImage";

const BACKGROUND_VIDEO_PATH = "video/pizza-kiln-background.mp4";
const STAGE_STANDEE_WIDTH = 610;
const STAGE_STANDEE_HEIGHT = 760;
const STAGE_STANDEE_RIGHT = 315;
const STAGE_STANDEE_BOTTOM = -118;
const CORNER_STANDEE_WIDTH = 420;
const CORNER_STANDEE_HEIGHT = 360;
const CORNER_IMAGE_WIDTH = 470;
const CORNER_IMAGE_HEIGHT = 705;

const clampInterpolation = {
  extrapolateLeft: "clamp",
  extrapolateRight: "clamp",
} as const;

const UsualBackground: React.FC = () => (
  <>
    <AbsoluteFill
      style={{
        background:
          "radial-gradient(circle at top, #ffe8c7 0%, #ffd3b4 45%, #ffb6b6 100%)",
      }}
    />
    <AbsoluteFill
      style={{
        backgroundImage:
          "radial-gradient(circle at 20% 20%, rgba(255,255,255,0.6) 0, rgba(255,255,255,0) 40%), radial-gradient(circle at 80% 30%, rgba(255,255,255,0.5) 0, rgba(255,255,255,0) 45%), radial-gradient(circle at 30% 80%, rgba(255,255,255,0.4) 0, rgba(255,255,255,0) 50%)",
        opacity: 0.8,
      }}
    />
  </>
);

const Subtitle: React.FC<
  Readonly<{
    text: string;
    progress: number;
    speakerName: string;
    accentColor: string;
  }>
> = ({text, progress, speakerName, accentColor}) => {
  const opacity = interpolate(progress, [0, 1], [0, 1], clampInterpolation);
  const translateY = interpolate(progress, [0, 1], [16, 0], clampInterpolation);

  return (
    <div
      style={{
        fontFamily: roundedFontFamily,
        fontSize: 34,
        fontWeight: 700,
        color: "#1a1a1a",
        lineHeight: 1.45,
        padding: "18px 28px",
        backgroundColor: "rgba(255, 255, 255, 0.9)",
        borderRadius: 18,
        border: `2px solid ${accentColor}33`,
        boxShadow: "0 10px 30px rgba(31, 42, 68, 0.15)",
        maxWidth: 980,
        opacity,
        transform: `translateY(${translateY}px)`,
      }}
    >
      <div
        style={{
          display: "inline-block",
          fontSize: 20,
          color: "#ffffff",
          backgroundColor: accentColor,
          borderRadius: 999,
          padding: "4px 14px",
          marginBottom: 8,
        }}
      >
        {speakerName}
      </div>
      <div>{text}</div>
    </div>
  );
};

const SayoStandee: React.FC<
  Readonly<{
    mode: "stage" | "corner";
    speaking: boolean;
    localFrame: number;
    fps: number;
    speechId?: string;
  }>
> = ({mode, speaking, localFrame, fps, speechId}) => {
  const {avatar}: {avatar: CharacterDefinition["avatar"]} = characters.sayo;
  const speakingAnimationType = avatar.speakingAnimationType ?? "none";
  const translateY = speaking
    ? speakingAvatarAnimations[speakingAnimationType]({
        frame: localFrame,
        fps,
        focused: true,
        hasMultipleCharacters: false,
      })
    : 0;
  const mouth =
    speaking && speakingAnimationType === "rhubarbLipSync"
      ? getMouthForSpeechFrame(speechId, localFrame, fps)
      : "rest";
  const isCorner = mode === "corner";
  const frameWidth = isCorner ? CORNER_STANDEE_WIDTH : STAGE_STANDEE_WIDTH;
  const frameHeight = isCorner ? CORNER_STANDEE_HEIGHT : STAGE_STANDEE_HEIGHT;

  return (
    <div
      style={{
        position: "absolute",
        right: isCorner ? 18 : STAGE_STANDEE_RIGHT,
        bottom: isCorner ? 0 : STAGE_STANDEE_BOTTOM,
        width: frameWidth,
        height: frameHeight,
        display: "flex",
        alignItems: isCorner ? "flex-start" : "flex-end",
        justifyContent: "center",
        overflow: isCorner ? "hidden" : "visible",
        transform: `translateY(${translateY}px)`,
        zIndex: 3,
      }}
    >
      <LipSyncedStandeeImage
        imagePath={avatar.imagePath}
        mouthImageDir={
          avatar.mouthImageDir ?? defaultMouthImageDir(avatar.kind)
        }
        mouth={mouth}
        width={isCorner ? CORNER_IMAGE_WIDTH : "100%"}
        height={isCorner ? CORNER_IMAGE_HEIGHT : "100%"}
        maxHeight={isCorner ? CORNER_IMAGE_HEIGHT : "100%"}
        filter={
          isCorner
            ? "drop-shadow(0 12px 24px rgba(0, 0, 0, 0.32))"
            : "drop-shadow(0 18px 40px rgba(31, 42, 68, 0.22))"
        }
      />
    </div>
  );
};

const SpeechOverlay: React.FC<Readonly<{speech: SpeechEvent}>> = ({speech}) => {
  const frame = useCurrentFrame();
  const {fps} = useVideoConfig();
  const character = characters[speech.character];
  const subtitleProgress = spring({
    frame,
    fps,
    config: {damping: 20, mass: 0.7},
  });

  return (
    <>
      <div
        style={{
          position: "absolute",
          bottom: 40,
          left: 0,
          right: 0,
          display: "flex",
          justifyContent: "center",
          zIndex: 4,
        }}
      >
        <Subtitle
          text={speech.subtitle ?? speech.text}
          progress={subtitleProgress}
          speakerName={character.displayName}
          accentColor={character.avatar.accentColor}
        />
      </div>
      {hasAudioForSpeech(speech) ? (
        <Audio src={staticFile(audioFileForSpeech(speech))} />
      ) : null}
    </>
  );
};

export const PizzaKilnSayoComposition: React.FC = () => {
  const frame = useCurrentFrame();
  const {fps} = useVideoConfig();
  const introSpeech = timeline[0];
  const outroSpeech = timeline[1];
  const introFrames = durationForSpeech(introSpeech, fps);
  const outroFrames = durationForSpeech(outroSpeech, fps);
  const videoFrom = introFrames + PIZZA_KILN_GAP_FRAMES;
  const outroFrom = videoFrom + PIZZA_KILN_VIDEO_FRAMES + PIZZA_KILN_GAP_FRAMES;
  const isVideoVisible =
    frame >= videoFrom && frame < videoFrom + PIZZA_KILN_VIDEO_FRAMES;
  const isOutro = frame >= outroFrom;
  const activeSpeech =
    frame < introFrames ? introSpeech : isOutro ? outroSpeech : undefined;
  const speechLocalFrame = activeSpeech === outroSpeech ? frame - outroFrom : frame;

  return (
    <AbsoluteFill style={{backgroundColor: "#1a1a1a"}}>
      <UsualBackground />
      <Sequence
        from={videoFrom}
        durationInFrames={PIZZA_KILN_VIDEO_FRAMES}
        premountFor={Math.min(fps, videoFrom)}
      >
        <Video
          muted
          objectFit="cover"
          src={staticFile(BACKGROUND_VIDEO_PATH)}
          style={{
            width: "100%",
            height: "100%",
          }}
        />
      </Sequence>
      <SayoStandee
        mode={isVideoVisible ? "corner" : "stage"}
        speaking={Boolean(activeSpeech)}
        localFrame={speechLocalFrame}
        fps={fps}
        speechId={activeSpeech?.id}
      />
      <Sequence durationInFrames={introFrames} premountFor={0}>
        <SpeechOverlay speech={introSpeech} />
      </Sequence>
      <Sequence
        from={outroFrom}
        durationInFrames={outroFrames}
        premountFor={Math.min(fps, outroFrom)}
      >
        <SpeechOverlay speech={outroSpeech} />
      </Sequence>
    </AbsoluteFill>
  );
};