Newer
Older
remotion_docker_devcontainer / voicevox-remotion-template / scripts / generate-lipsync.js
import fs from "node:fs/promises";
import path from "node:path";
import {spawn} from "node:child_process";
import {fileURLToPath} from "node:url";
import {normalizeRhubarbJson} from "./lipsync-utils.js";

const projectRoot = path.resolve(
  fileURLToPath(new URL("..", import.meta.url))
);
const publicDir = path.join(projectRoot, "public");
const generatedDir = path.join(projectRoot, "src/generated/lipsync");
const rawDir = path.join(publicDir, "lipsync/raw");

const DEFAULT_SOURCE_MANIFESTS = [
  "src/data/voicevox-manifest.json",
  "src/data/pizza-kiln/voicevox-manifest.json",
];

const resolveProjectPath = (value) =>
  path.isAbsolute(value) ? value : path.resolve(projectRoot, value);

const toProjectRelative = (targetPath) =>
  path.relative(projectRoot, targetPath).split(path.sep).join("/");

const toPublicRelative = (targetPath) => {
  const relativePath = path.relative(publicDir, targetPath);
  if (relativePath.startsWith("..") || path.isAbsolute(relativePath)) {
    return toProjectRelative(targetPath);
  }

  return relativePath.split(path.sep).join("/");
};

const pathExists = async (targetPath) => {
  try {
    await fs.access(targetPath);
    return true;
  } catch {
    return false;
  }
};

const parseArgs = () => {
  const values = {
    out: undefined,
    rawOut: undefined,
    manifest: "src/generated/lipsync/manifest.json",
  };
  const audioPaths = [];
  const args = process.argv.slice(2);

  for (let index = 0; index < args.length; index += 1) {
    const arg = args[index];
    if (!arg.startsWith("--")) {
      audioPaths.push(arg);
      continue;
    }

    const key = arg.slice(2);
    if (!["out", "raw-out", "manifest"].includes(key)) {
      throw new Error(`Unknown option "${arg}".`);
    }

    const value = args[index + 1];
    if (!value || value.startsWith("--")) {
      throw new Error(`Option "${arg}" needs a value.`);
    }

    if (key === "raw-out") {
      values.rawOut = value;
    } else {
      values[key] = value;
    }
    index += 1;
  }

  if (audioPaths.length > 1) {
    throw new Error("Only one audio path can be specified.");
  }
  if (!audioPaths[0] && (values.out || values.rawOut)) {
    throw new Error("--out and --raw-out can only be used with one audio path.");
  }

  return {
    audioPath: audioPaths[0],
    outPath: values.out ? resolveProjectPath(values.out) : undefined,
    rawOutPath: values.rawOut ? resolveProjectPath(values.rawOut) : undefined,
    manifestPath: resolveProjectPath(values.manifest),
  };
};

const executableNames = () =>
  process.platform === "win32"
    ? ["rhubarb.exe", "rhubarb.cmd", "rhubarb"]
    : ["rhubarb"];

const findRhubarbBin = async () => {
  if (process.env.RHUBARB_BIN) {
    const envPath = resolveProjectPath(process.env.RHUBARB_BIN);
    if (await pathExists(envPath)) {
      return envPath;
    }
    throw new Error(`RHUBARB_BIN was set, but not found: ${envPath}`);
  }

  const candidates = [];
  for (const name of executableNames()) {
    candidates.push(path.join(projectRoot, "node_modules/.bin", name));
    candidates.push(path.join(projectRoot, "tools/rhubarb", name));
    candidates.push(path.join(projectRoot, "vendor/rhubarb", name));
  }

  for (const candidate of candidates) {
    if (await pathExists(candidate)) {
      return candidate;
    }
  }

  for (const directory of (process.env.PATH ?? "").split(path.delimiter)) {
    for (const name of executableNames()) {
      const candidate = path.join(directory, name);
      if (await pathExists(candidate)) {
        return candidate;
      }
    }
  }

  throw new Error(
    [
      "Rhubarb Lip Sync CLI was not found.",
      "Set RHUBARB_BIN to the Rhubarb executable path to use any installed CLI.",
      "Executable names differ by OS, for example rhubarb, rhubarb.exe, or rhubarb.cmd.",
      "When using a Dev Container, install the Linux Rhubarb binary and point RHUBARB_BIN to it.",
    ].join("\n")
  );
};

const runRhubarb = (rhubarbBin, inputPath, rawOutputPath) =>
  new Promise((resolve, reject) => {
    const args = [
      "--recognizer",
      "phonetic",
      "--exportFormat",
      "json",
      "--extendedShapes",
      "X",
      "--output",
      rawOutputPath,
      inputPath,
    ];
    const child = spawn(rhubarbBin, args, {cwd: projectRoot});
    let stdout = "";
    let stderr = "";

    child.stdout.on("data", (chunk) => {
      stdout += chunk;
    });
    child.stderr.on("data", (chunk) => {
      stderr += chunk;
    });
    child.on("error", reject);
    child.on("close", (code) => {
      if (code === 0) {
        resolve({stdout, stderr});
        return;
      }

      reject(
        new Error(
          `Rhubarb exited with code ${code} for ${toProjectRelative(inputPath)}.\n${stderr || stdout}`
        )
      );
    });
  });

const loadJson = async (targetPath) =>
  JSON.parse(await fs.readFile(targetPath, "utf8"));

const loadExistingGeneratedManifest = async (manifestPath) => {
  if (!(await pathExists(manifestPath))) {
    return {version: 1, timelines: {}};
  }

  const manifest = await loadJson(manifestPath);
  if (manifest?.version !== 1 || typeof manifest.timelines !== "object") {
    return {version: 1, timelines: {}};
  }

  return manifest;
};

const taskForAudioPath = async ({audioPath, outPath, rawOutPath}) => {
  const inputPath = resolveProjectPath(audioPath);
  if (!(await pathExists(inputPath))) {
    throw new Error(`Input audio file was not found: ${audioPath}`);
  }

  const id = path.basename(inputPath, path.extname(inputPath));

  return {
    id,
    inputPath,
    sourceAudio: toPublicRelative(inputPath),
    rawOutputPath: rawOutPath ?? path.join(rawDir, `${id}.rhubarb.json`),
    outputPath: outPath ?? path.join(generatedDir, `${id}.mouth.json`),
  };
};

const tasksForVoicevoxManifest = async (manifestPath) => {
  if (!(await pathExists(manifestPath))) {
    return [];
  }

  const entries = await loadJson(manifestPath);
  if (!Array.isArray(entries)) {
    throw new Error(`${toProjectRelative(manifestPath)} must be a JSON array.`);
  }

  return Promise.all(
    entries.map(async (entry) => {
      if (!entry?.id || !entry?.file) {
        throw new Error(
          `${toProjectRelative(manifestPath)} entries need id and file.`
        );
      }

      const inputPath = path.join(publicDir, entry.file);
      if (!(await pathExists(inputPath))) {
        throw new Error(`Input audio file was not found: ${entry.file}`);
      }

      return {
        id: entry.id,
        inputPath,
        sourceAudio: entry.file,
        rawOutputPath: path.join(rawDir, `${entry.id}.rhubarb.json`),
        outputPath: path.join(generatedDir, `${entry.id}.mouth.json`),
      };
    })
  );
};

const defaultTasks = async () => {
  const taskGroups = await Promise.all(
    DEFAULT_SOURCE_MANIFESTS.map((manifest) =>
      tasksForVoicevoxManifest(resolveProjectPath(manifest))
    )
  );
  const tasks = taskGroups.flat();
  if (tasks.length === 0) {
    throw new Error("No VOICEVOX manifest entries were found.");
  }

  return tasks;
};

const writeJson = async (targetPath, value) => {
  await fs.mkdir(path.dirname(targetPath), {recursive: true});
  await fs.writeFile(targetPath, `${JSON.stringify(value, null, 2)}\n`);
};

const generateTask = async (rhubarbBin, task) => {
  await fs.mkdir(path.dirname(task.rawOutputPath), {recursive: true});
  await runRhubarb(rhubarbBin, task.inputPath, task.rawOutputPath);

  const rawJson = await loadJson(task.rawOutputPath);
  const {timeline, warnings} = normalizeRhubarbJson(rawJson, {
    audio: task.sourceAudio,
  });

  warnings.forEach((warning) => {
    console.warn(`${task.id}: ${warning}`);
  });
  await writeJson(task.outputPath, timeline);

  console.log(
    `Wrote ${toProjectRelative(task.outputPath)} from ${toProjectRelative(
      task.inputPath
    )}`
  );

  return timeline;
};

const {audioPath, outPath, rawOutPath, manifestPath} = parseArgs();
const rhubarbBin = await findRhubarbBin();
const tasks = audioPath
  ? [await taskForAudioPath({audioPath, outPath, rawOutPath})]
  : await defaultTasks();
const generatedManifest = audioPath
  ? await loadExistingGeneratedManifest(manifestPath)
  : {version: 1, timelines: {}};

for (const task of tasks) {
  generatedManifest.timelines[task.id] = await generateTask(rhubarbBin, task);
}

await writeJson(manifestPath, generatedManifest);
console.log(`Updated ${toProjectRelative(manifestPath)}`);