Newer
Older
remotion_docker_devcontainer / voicevox-remotion-template / scripts / templates / dialogue-standee / data / timing.ts.template
import {
  defineVQScenarioAssetWorkflow,
  type VQScenarioAssetWorkflow,
} from "../../lib/VQRemotionLib/scenario";
import {timeline, type SpeechEvent, type TimelineEvent} from "./script";
import voicevoxManifest from "./voicevox-manifest.json";

type ManifestEntry = {
  id: string;
  character?: string;
  speakerName?: string;
  styleName?: string;
  speakerId?: number;
  file: string;
  durationSeconds: number;
};

const manifestEntries = voicevoxManifest as ManifestEntry[];
const manifestById = new Map(
  manifestEntries.map((entry) => [entry.id, entry])
);

export const __SNAKE_ID___FPS = 30;
export const __SNAKE_ID___GAP_FRAMES = 6;
export const __SNAKE_ID___DEFAULT_SHOW_SECONDS = 1.5;

export const hasAudioForSpeech = (speech: SpeechEvent) =>
  manifestById.has(speech.id);

export const audioFileForSpeech = (speech: SpeechEvent) =>
  manifestById.get(speech.id)?.file ?? `audio/__SLUG__/lines/${speech.id}.wav`;

export const durationForSpeech = (
  speech: SpeechEvent,
  fps = __SNAKE_ID___FPS
) => {
  const entry = manifestById.get(speech.id);
  if (entry && Number.isFinite(entry.durationSeconds)) {
    return Math.max(1, Math.ceil(entry.durationSeconds * fps));
  }

  const textForEstimate = speech.readAs ?? speech.text;
  const estimatedSeconds = Math.max(1.2, textForEstimate.length * 0.11);
  return Math.ceil(estimatedSeconds * fps);
};

export const durationForTimelineEvent = (
  event: TimelineEvent,
  fps = __SNAKE_ID___FPS
) => {
  if (event.type === "say") {
    return durationForSpeech(event, fps);
  }

  if (
    event.type === "clearStill" ||
    event.type === "clearVideo" ||
    event.type === "standeePosition"
  ) {
    return 0;
  }

  const durationSeconds =
    "durationSeconds" in event && event.durationSeconds !== undefined
      ? event.durationSeconds
      : __SNAKE_ID___DEFAULT_SHOW_SECONDS;
  return Math.max(1, Math.ceil(durationSeconds * fps));
};

export const __PASCAL_ID__AssetWorkflow: VQScenarioAssetWorkflow =
  defineVQScenarioAssetWorkflow({
    voicevox: {
      scriptPath: "src/data/__SLUG__/script.ts",
      outputDir: "public/audio/__SLUG__/lines",
      manifestPath: "src/data/__SLUG__/voicevox-manifest.json",
    },
    rhubarb: {
      sourceManifestPath: "src/data/__SLUG__/voicevox-manifest.json",
      manifestPath: "src/generated/lipsync/manifest.json",
      outputDir: "src/generated/lipsync",
      rawOutputDir: "public/lipsync/raw",
    },
  });

export const total__PASCAL_ID__DurationInFrames = (
  fps = __SNAKE_ID___FPS
) =>
  timeline.reduce((sum, event, index) => {
    const gap = index < timeline.length - 1 ? __SNAKE_ID___GAP_FRAMES : 0;
    return sum + durationForTimelineEvent(event, fps) + gap;
  }, 0);