Newer
Older
remotion_docker_devcontainer / voicevox-remotion-template / src / data / timing.ts
import {SpeechEvent, timeline, TimelineEvent} from "./script";
import voicevoxManifest from "./voicevox-manifest.json";

type ManifestEntry = {
  id: string;
  character?: string;
  speakerName?: string;
  styleName?: string;
  speakerId?: number;
  file: string;
  durationSeconds: number;
};

const manifestEntries = voicevoxManifest as ManifestEntry[];
const manifestById = new Map(
  manifestEntries.map((entry) => [entry.id, entry])
);

export const FPS = 30;
export const GAP_FRAMES = 6;
export const DEFAULT_SHOW_SECONDS = 1.5;

export const hasAudioForSpeech = (speech: SpeechEvent) =>
  manifestById.has(speech.id);

export const audioFileForSpeech = (speech: SpeechEvent) =>
  manifestById.get(speech.id)?.file ?? `audio/lines/${speech.id}.wav`;

export const durationForSpeech = (speech: SpeechEvent, fps = FPS) => {
  const entry = manifestById.get(speech.id);
  if (entry && Number.isFinite(entry.durationSeconds)) {
    return Math.max(1, Math.ceil(entry.durationSeconds * fps));
  }

  const estimatedSeconds = Math.max(1.2, speech.text.length * 0.11);
  return Math.ceil(estimatedSeconds * fps);
};

export const durationForTimelineEvent = (
  event: TimelineEvent,
  fps = FPS
) => {
  if (event.type === "say") {
    return durationForSpeech(event, fps);
  }

  const durationSeconds = event.durationSeconds ?? DEFAULT_SHOW_SECONDS;
  return Math.max(1, Math.ceil(durationSeconds * fps));
};

export const totalDurationInFrames = (fps = FPS) =>
  timeline.reduce((sum, event, index) => {
    const gap = index < timeline.length - 1 ? GAP_FRAMES : 0;
    return sum + durationForTimelineEvent(event, fps) + gap;
  }, 0);