Newer
Older
remotion_docker_devcontainer / voicevox-remotion-template / scripts / voicevox-generate.js
import fs from "node:fs/promises";

const VOICEVOX_URL =
  process.env.VOICEVOX_URL ?? "http://host.docker.internal:50021";
const SPEAKER_ID = Number(process.env.VOICEVOX_SPEAKER_ID ?? "3");

const inputPath = new URL("../src/data/script.json", import.meta.url);
const outputDir = new URL("../public/audio/lines/", import.meta.url);
const manifestPath = new URL(
  "../src/data/voicevox-manifest.json",
  import.meta.url
);

const getWavDurationSeconds = (buffer) => {
  if (buffer.toString("ascii", 0, 4) !== "RIFF") {
    throw new Error("Invalid WAV header: RIFF not found.");
  }
  if (buffer.toString("ascii", 8, 12) !== "WAVE") {
    throw new Error("Invalid WAV header: WAVE not found.");
  }

  let offset = 12;
  let byteRate = 0;
  let dataSize = 0;

  while (offset + 8 <= buffer.length) {
    const chunkId = buffer.toString("ascii", offset, offset + 4);
    const chunkSize = buffer.readUInt32LE(offset + 4);
    if (chunkId === "fmt ") {
      byteRate = buffer.readUInt32LE(offset + 16);
    }
    if (chunkId === "data") {
      dataSize = chunkSize;
      break;
    }
    offset += 8 + chunkSize + (chunkSize % 2);
  }

  if (!byteRate || !dataSize) {
    throw new Error("Failed to read WAV duration.");
  }

  return dataSize / byteRate;
};

const raw = await fs.readFile(inputPath, "utf8");
const script = JSON.parse(raw);
if (!Array.isArray(script) || script.length === 0) {
  throw new Error("src/data/script.json is empty.");
}

await fs.mkdir(outputDir, {recursive: true});
const manifest = [];

for (const sentence of script) {
  if (!sentence?.id || !sentence?.text) {
    throw new Error("Each entry needs id and text in script.json.");
  }

  const queryResponse = await fetch(
    `${VOICEVOX_URL}/audio_query?text=${encodeURIComponent(sentence.text)}&speaker=${SPEAKER_ID}`,
    {method: "POST"}
  );
  if (!queryResponse.ok) {
    throw new Error(`audio_query failed: ${queryResponse.status}`);
  }

  const query = await queryResponse.json();
  query.speedScale = 1.02;
  query.pitchScale = 0.0;
  query.intonationScale = 1.1;

  const synthResponse = await fetch(
    `${VOICEVOX_URL}/synthesis?speaker=${SPEAKER_ID}`,
    {
      method: "POST",
      headers: {"Content-Type": "application/json"},
      body: JSON.stringify(query),
    }
  );
  if (!synthResponse.ok) {
    throw new Error(`synthesis failed: ${synthResponse.status}`);
  }

  const audioBuffer = Buffer.from(await synthResponse.arrayBuffer());
  const outputPath = new URL(`./${sentence.id}.wav`, outputDir);
  await fs.writeFile(outputPath, audioBuffer);
  const durationSeconds = getWavDurationSeconds(audioBuffer);
  manifest.push({
    id: sentence.id,
    file: `audio/lines/${sentence.id}.wav`,
    durationSeconds,
  });
  console.log(
    `Wrote ${outputPath.pathname} (${durationSeconds.toFixed(2)}s)`
  );
}

await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2) + "\n");
console.log(`Updated ${manifestPath.pathname}`);