import fs from "node:fs/promises";
import ts from "typescript";
const VOICEVOX_URL =
process.env.VOICEVOX_URL ?? "http://host.docker.internal:50021";
const scriptPath = new URL("../src/data/script.ts", import.meta.url);
const outputDir = new URL("../public/audio/lines/", import.meta.url);
const manifestPath = new URL(
"../src/data/voicevox-manifest.json",
import.meta.url
);
const getWavDurationSeconds = (buffer) => {
if (buffer.toString("ascii", 0, 4) !== "RIFF") {
throw new Error("Invalid WAV header: RIFF not found.");
}
if (buffer.toString("ascii", 8, 12) !== "WAVE") {
throw new Error("Invalid WAV header: WAVE not found.");
}
let offset = 12;
let byteRate = 0;
let dataSize = 0;
while (offset + 8 <= buffer.length) {
const chunkId = buffer.toString("ascii", offset, offset + 4);
const chunkSize = buffer.readUInt32LE(offset + 4);
if (chunkId === "fmt ") {
byteRate = buffer.readUInt32LE(offset + 16);
}
if (chunkId === "data") {
dataSize = chunkSize;
break;
}
offset += 8 + chunkSize + (chunkSize % 2);
}
if (!byteRate || !dataSize) {
throw new Error("Failed to read WAV duration.");
}
return dataSize / byteRate;
};
const loadScriptModule = async () => {
const source = await fs.readFile(scriptPath, "utf8");
const transpiled = ts.transpileModule(source, {
compilerOptions: {
module: ts.ModuleKind.ES2022,
target: ts.ScriptTarget.ES2022,
},
fileName: scriptPath.pathname,
});
const errors = transpiled.diagnostics?.filter(
(diagnostic) => diagnostic.category === ts.DiagnosticCategory.Error
);
if (errors?.length) {
const message = errors
.map((diagnostic) =>
ts.flattenDiagnosticMessageText(diagnostic.messageText, "\n")
)
.join("\n");
throw new Error(`Failed to transpile src/data/script.ts:\n${message}`);
}
const moduleUrl = `data:text/javascript;base64,${Buffer.from(
transpiled.outputText
).toString("base64")}`;
return import(moduleUrl);
};
const fetchSpeakers = async () => {
const response = await fetch(`${VOICEVOX_URL}/speakers`);
if (!response.ok) {
throw new Error(`speakers failed: ${response.status}`);
}
return response.json();
};
const resolveVoice = (characters, speech) => {
const character = characters[speech.character];
if (!character) {
throw new Error(`Unknown character "${speech.character}" in ${speech.id}.`);
}
return {
speakerName:
speech.voicevox?.speakerName ?? character.voicevox?.speakerName,
styleName: speech.voicevox?.styleName ?? character.voicevox?.styleName,
};
};
const resolveSpeakerId = (speakers, voice, speechId) => {
const speaker = speakers.find(({name}) => name === voice.speakerName);
if (!speaker) {
const names = speakers.map(({name}) => name).join(", ");
throw new Error(
`Speaker "${voice.speakerName}" for ${speechId} was not found. Available speakers: ${names}`
);
}
const style = speaker.styles.find(({name}) => name === voice.styleName);
if (!style) {
const styles = speaker.styles.map(({name}) => name).join(", ");
throw new Error(
`Style "${voice.styleName}" for ${speechId} was not found on "${voice.speakerName}". Available styles: ${styles}`
);
}
return style.id;
};
const {characters, timeline} = await loadScriptModule();
if (!characters || !timeline) {
throw new Error("src/data/script.ts must export characters and timeline.");
}
const speechEvents = timeline.filter((event) => event?.type === "say");
if (speechEvents.length === 0) {
throw new Error("src/data/script.ts has no say(...) events.");
}
const speakers = await fetchSpeakers();
await fs.mkdir(outputDir, {recursive: true});
const manifest = [];
for (const speech of speechEvents) {
if (!speech?.id || !speech?.text || !speech?.character) {
throw new Error("Each say(...) entry needs id, character, and text.");
}
const voice = resolveVoice(characters, speech);
const speakerId = resolveSpeakerId(speakers, voice, speech.id);
const queryResponse = await fetch(
`${VOICEVOX_URL}/audio_query?text=${encodeURIComponent(speech.text)}&speaker=${speakerId}`,
{method: "POST"}
);
if (!queryResponse.ok) {
throw new Error(`audio_query failed: ${queryResponse.status}`);
}
const query = await queryResponse.json();
query.speedScale = 1.02;
query.pitchScale = 0.0;
query.intonationScale = 1.1;
const synthResponse = await fetch(
`${VOICEVOX_URL}/synthesis?speaker=${speakerId}`,
{
method: "POST",
headers: {"Content-Type": "application/json"},
body: JSON.stringify(query),
}
);
if (!synthResponse.ok) {
throw new Error(`synthesis failed: ${synthResponse.status}`);
}
const audioBuffer = Buffer.from(await synthResponse.arrayBuffer());
const outputPath = new URL(`./${speech.id}.wav`, outputDir);
await fs.writeFile(outputPath, audioBuffer);
const durationSeconds = getWavDurationSeconds(audioBuffer);
manifest.push({
id: speech.id,
character: speech.character,
speakerName: voice.speakerName,
styleName: voice.styleName,
speakerId,
file: `audio/lines/${speech.id}.wav`,
durationSeconds,
});
console.log(
`Wrote ${outputPath.pathname} (${voice.speakerName} / ${voice.styleName}, ${durationSeconds.toFixed(2)}s)`
);
}
await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2) + "\n");
console.log(`Updated ${manifestPath.pathname}`);