diff --git a/.gitignore b/.gitignore
index 2d6a637..4807c38 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,10 @@
 # Remotion render output
 out/
 
+# Rhubarb lip sync intermediate outputs
+voicevox-remotion-template/public/lipsync/raw/
+voicevox-remotion-template/src/generated/lipsync/*.mouth.json
+
 # Build artifacts and caches
 dist/
 build/
diff --git a/voicevox-remotion-template/.eslintrc.cjs b/voicevox-remotion-template/.eslintrc.cjs
index 0217ed1..8d47931 100644
--- a/voicevox-remotion-template/.eslintrc.cjs
+++ b/voicevox-remotion-template/.eslintrc.cjs
@@ -1,6 +1,11 @@
 module.exports = {
   root: true,
   extends: ["@remotion"],
+  ignorePatterns: [
+    "public/image/*-rhubarb-mouths/rhubarb-map.js",
+    "public/lipsync/",
+    "src/generated/lipsync/*.json",
+  ],
   env: {
     node: true,
   },
diff --git a/voicevox-remotion-template/package.json b/voicevox-remotion-template/package.json
index f41b620..0d331e4 100644
--- a/voicevox-remotion-template/package.json
+++ b/voicevox-remotion-template/package.json
@@ -7,6 +7,8 @@
     "start": "remotion preview",
     "render": "remotion render",
     "lint": "eslint .",
+    "lipsync:generate": "node scripts/generate-lipsync.js",
+    "test:lipsync": "node --test scripts/lipsync-utils.test.js",
     "voice:generate": "node scripts/voicevox-generate.js",
     "voice:generate:pizza-kiln": "node scripts/voicevox-generate.js --script src/data/pizza-kiln/script.ts --output public/audio/pizza-kiln/lines --manifest src/data/pizza-kiln/voicevox-manifest.json"
   },
diff --git a/voicevox-remotion-template/scripts/generate-lipsync.js b/voicevox-remotion-template/scripts/generate-lipsync.js
new file mode 100644
index 0000000..d44ebb1
--- /dev/null
+++ b/voicevox-remotion-template/scripts/generate-lipsync.js
@@ -0,0 +1,299 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+import {spawn} from "node:child_process";
+import {fileURLToPath} from "node:url";
+import {normalizeRhubarbJson} from "./lipsync-utils.js";
+
+const projectRoot = path.resolve(
+  fileURLToPath(new URL("..", import.meta.url))
+);
+const publicDir = path.join(projectRoot, "public");
+const generatedDir = path.join(projectRoot, "src/generated/lipsync");
+const rawDir = path.join(publicDir, "lipsync/raw");
+
+const DEFAULT_SOURCE_MANIFESTS = [
+  "src/data/voicevox-manifest.json",
+  "src/data/pizza-kiln/voicevox-manifest.json",
+];
+
+const resolveProjectPath = (value) =>
+  path.isAbsolute(value) ? value : path.resolve(projectRoot, value);
+
+const toProjectRelative = (targetPath) =>
+  path.relative(projectRoot, targetPath).split(path.sep).join("/");
+
+const toPublicRelative = (targetPath) => {
+  const relativePath = path.relative(publicDir, targetPath);
+  if (relativePath.startsWith("..") || path.isAbsolute(relativePath)) {
+    return toProjectRelative(targetPath);
+  }
+
+  return relativePath.split(path.sep).join("/");
+};
+
+const pathExists = async (targetPath) => {
+  try {
+    await fs.access(targetPath);
+    return true;
+  } catch {
+    return false;
+  }
+};
+
+const parseArgs = () => {
+  const values = {
+    out: undefined,
+    rawOut: undefined,
+    manifest: "src/generated/lipsync/manifest.json",
+  };
+  const audioPaths = [];
+  const args = process.argv.slice(2);
+
+  for (let index = 0; index < args.length; index += 1) {
+    const arg = args[index];
+    if (!arg.startsWith("--")) {
+      audioPaths.push(arg);
+      continue;
+    }
+
+    const key = arg.slice(2);
+    if (!["out", "raw-out", "manifest"].includes(key)) {
+      throw new Error(`Unknown option "${arg}".`);
+    }
+
+    const value = args[index + 1];
+    if (!value || value.startsWith("--")) {
+      throw new Error(`Option "${arg}" needs a value.`);
+    }
+
+    if (key === "raw-out") {
+      values.rawOut = value;
+    } else {
+      values[key] = value;
+    }
+    index += 1;
+  }
+
+  if (audioPaths.length > 1) {
+    throw new Error("Only one audio path can be specified.");
+  }
+  if (!audioPaths[0] && (values.out || values.rawOut)) {
+    throw new Error("--out and --raw-out can only be used with one audio path.");
+  }
+
+  return {
+    audioPath: audioPaths[0],
+    outPath: values.out ? resolveProjectPath(values.out) : undefined,
+    rawOutPath: values.rawOut ? resolveProjectPath(values.rawOut) : undefined,
+    manifestPath: resolveProjectPath(values.manifest),
+  };
+};
+
+const executableNames = () =>
+  process.platform === "win32"
+    ? ["rhubarb.exe", "rhubarb.cmd", "rhubarb"]
+    : ["rhubarb"];
+
+const findRhubarbBin = async () => {
+  if (process.env.RHUBARB_BIN) {
+    const envPath = resolveProjectPath(process.env.RHUBARB_BIN);
+    if (await pathExists(envPath)) {
+      return envPath;
+    }
+    throw new Error(`RHUBARB_BIN was set, but not found: ${envPath}`);
+  }
+
+  const candidates = [];
+  for (const name of executableNames()) {
+    candidates.push(path.join(projectRoot, "node_modules/.bin", name));
+    candidates.push(path.join(projectRoot, "tools/rhubarb", name));
+    candidates.push(path.join(projectRoot, "vendor/rhubarb", name));
+  }
+
+  for (const candidate of candidates) {
+    if (await pathExists(candidate)) {
+      return candidate;
+    }
+  }
+
+  for (const directory of (process.env.PATH ?? "").split(path.delimiter)) {
+    for (const name of executableNames()) {
+      const candidate = path.join(directory, name);
+      if (await pathExists(candidate)) {
+        return candidate;
+      }
+    }
+  }
+
+  throw new Error(
+    [
+      "Rhubarb Lip Sync CLI was not found.",
+      "Set RHUBARB_BIN to the Rhubarb executable path to use any installed CLI.",
+      "Executable names differ by OS, for example rhubarb, rhubarb.exe, or rhubarb.cmd.",
+      "When using a Dev Container, install the Linux Rhubarb binary and point RHUBARB_BIN to it.",
+    ].join("\n")
+  );
+};
+
+const runRhubarb = (rhubarbBin, inputPath, rawOutputPath) =>
+  new Promise((resolve, reject) => {
+    const args = [
+      "--recognizer",
+      "phonetic",
+      "--exportFormat",
+      "json",
+      "--extendedShapes",
+      "X",
+      "--output",
+      rawOutputPath,
+      inputPath,
+    ];
+    const child = spawn(rhubarbBin, args, {cwd: projectRoot});
+    let stdout = "";
+    let stderr = "";
+
+    child.stdout.on("data", (chunk) => {
+      stdout += chunk;
+    });
+    child.stderr.on("data", (chunk) => {
+      stderr += chunk;
+    });
+    child.on("error", reject);
+    child.on("close", (code) => {
+      if (code === 0) {
+        resolve({stdout, stderr});
+        return;
+      }
+
+      reject(
+        new Error(
+          `Rhubarb exited with code ${code} for ${toProjectRelative(inputPath)}.\n${stderr || stdout}`
+        )
+      );
+    });
+  });
+
+const loadJson = async (targetPath) =>
+  JSON.parse(await fs.readFile(targetPath, "utf8"));
+
+const loadExistingGeneratedManifest = async (manifestPath) => {
+  if (!(await pathExists(manifestPath))) {
+    return {version: 1, timelines: {}};
+  }
+
+  const manifest = await loadJson(manifestPath);
+  if (manifest?.version !== 1 || typeof manifest.timelines !== "object") {
+    return {version: 1, timelines: {}};
+  }
+
+  return manifest;
+};
+
+const taskForAudioPath = async ({audioPath, outPath, rawOutPath}) => {
+  const inputPath = resolveProjectPath(audioPath);
+  if (!(await pathExists(inputPath))) {
+    throw new Error(`Input audio file was not found: ${audioPath}`);
+  }
+
+  const id = path.basename(inputPath, path.extname(inputPath));
+
+  return {
+    id,
+    inputPath,
+    sourceAudio: toPublicRelative(inputPath),
+    rawOutputPath: rawOutPath ?? path.join(rawDir, `${id}.rhubarb.json`),
+    outputPath: outPath ?? path.join(generatedDir, `${id}.mouth.json`),
+  };
+};
+
+const tasksForVoicevoxManifest = async (manifestPath) => {
+  if (!(await pathExists(manifestPath))) {
+    return [];
+  }
+
+  const entries = await loadJson(manifestPath);
+  if (!Array.isArray(entries)) {
+    throw new Error(`${toProjectRelative(manifestPath)} must be a JSON array.`);
+  }
+
+  return Promise.all(
+    entries.map(async (entry) => {
+      if (!entry?.id || !entry?.file) {
+        throw new Error(
+          `${toProjectRelative(manifestPath)} entries need id and file.`
+        );
+      }
+
+      const inputPath = path.join(publicDir, entry.file);
+      if (!(await pathExists(inputPath))) {
+        throw new Error(`Input audio file was not found: ${entry.file}`);
+      }
+
+      return {
+        id: entry.id,
+        inputPath,
+        sourceAudio: entry.file,
+        rawOutputPath: path.join(rawDir, `${entry.id}.rhubarb.json`),
+        outputPath: path.join(generatedDir, `${entry.id}.mouth.json`),
+      };
+    })
+  );
+};
+
+const defaultTasks = async () => {
+  const taskGroups = await Promise.all(
+    DEFAULT_SOURCE_MANIFESTS.map((manifest) =>
+      tasksForVoicevoxManifest(resolveProjectPath(manifest))
+    )
+  );
+  const tasks = taskGroups.flat();
+  if (tasks.length === 0) {
+    throw new Error("No VOICEVOX manifest entries were found.");
+  }
+
+  return tasks;
+};
+
+const writeJson = async (targetPath, value) => {
+  await fs.mkdir(path.dirname(targetPath), {recursive: true});
+  await fs.writeFile(targetPath, `${JSON.stringify(value, null, 2)}\n`);
+};
+
+const generateTask = async (rhubarbBin, task) => {
+  await fs.mkdir(path.dirname(task.rawOutputPath), {recursive: true});
+  await runRhubarb(rhubarbBin, task.inputPath, task.rawOutputPath);
+
+  const rawJson = await loadJson(task.rawOutputPath);
+  const {timeline, warnings} = normalizeRhubarbJson(rawJson, {
+    audio: task.sourceAudio,
+  });
+
+  warnings.forEach((warning) => {
+    console.warn(`${task.id}: ${warning}`);
+  });
+  await writeJson(task.outputPath, timeline);
+
+  console.log(
+    `Wrote ${toProjectRelative(task.outputPath)} from ${toProjectRelative(
+      task.inputPath
+    )}`
+  );
+
+  return timeline;
+};
+
+const {audioPath, outPath, rawOutPath, manifestPath} = parseArgs();
+const rhubarbBin = await findRhubarbBin();
+const tasks = audioPath
+  ? [await taskForAudioPath({audioPath, outPath, rawOutPath})]
+  : await defaultTasks();
+const generatedManifest = audioPath
+  ? await loadExistingGeneratedManifest(manifestPath)
+  : {version: 1, timelines: {}};
+
+for (const task of tasks) {
+  generatedManifest.timelines[task.id] = await generateTask(rhubarbBin, task);
+}
+
+await writeJson(manifestPath, generatedManifest);
+console.log(`Updated ${toProjectRelative(manifestPath)}`);
diff --git a/voicevox-remotion-template/scripts/lipsync-utils.js b/voicevox-remotion-template/scripts/lipsync-utils.js
new file mode 100644
index 0000000..af61e4f
--- /dev/null
+++ b/voicevox-remotion-template/scripts/lipsync-utils.js
@@ -0,0 +1,82 @@
+export const RHUBARB_TO_JA_MOUTH = Object.freeze({
+  X: "rest",
+  A: "closed",
+  B: "i",
+  C: "e",
+  D: "a",
+  E: "o",
+  F: "u",
+  G: "i",
+  H: "e",
+});
+
+export const JAPANESE_MOUTH_SHAPES = Object.freeze([
+  "a",
+  "i",
+  "u",
+  "e",
+  "o",
+  "closed",
+  "rest",
+]);
+
+export const mapRhubarbMouthToJapanese = (source, warnings = []) => {
+  const mouth = RHUBARB_TO_JA_MOUTH[source];
+  if (mouth) {
+    return mouth;
+  }
+
+  warnings.push(`Unknown Rhubarb mouth shape "${source}", using "rest".`);
+  return "rest";
+};
+
+const assertFiniteNumber = (value, fieldName, index) => {
+  if (!Number.isFinite(value)) {
+    throw new Error(`mouthCues[${index}].${fieldName} must be a number.`);
+  }
+};
+
+export const normalizeRhubarbJson = (
+  rhubarbJson,
+  {audio, recognizer = "phonetic"} = {}
+) => {
+  if (!rhubarbJson || !Array.isArray(rhubarbJson.mouthCues)) {
+    throw new Error("Rhubarb JSON must contain a mouthCues array.");
+  }
+
+  const warnings = [];
+  const cues = rhubarbJson.mouthCues.map((cue, index) => {
+    const start = Number(cue?.start);
+    const end = Number(cue?.end);
+    assertFiniteNumber(start, "start", index);
+    assertFiniteNumber(end, "end", index);
+
+    const source = typeof cue?.value === "string" ? cue.value : String(cue?.value);
+
+    return {
+      start,
+      end,
+      mouth: mapRhubarbMouthToJapanese(source, warnings),
+      source,
+    };
+  });
+
+  const metadataDuration = Number(rhubarbJson.metadata?.duration);
+  const duration = Number.isFinite(metadataDuration)
+    ? metadataDuration
+    : cues.at(-1)?.end ?? 0;
+
+  return {
+    timeline: {
+      version: 1,
+      source: {
+        audio: audio ?? rhubarbJson.metadata?.soundFile ?? "",
+        engine: "rhubarb-lip-sync",
+        recognizer,
+      },
+      duration,
+      cues,
+    },
+    warnings,
+  };
+};
diff --git a/voicevox-remotion-template/scripts/lipsync-utils.test.js b/voicevox-remotion-template/scripts/lipsync-utils.test.js
new file mode 100644
index 0000000..133bb5d
--- /dev/null
+++ b/voicevox-remotion-template/scripts/lipsync-utils.test.js
@@ -0,0 +1,54 @@
+import assert from "node:assert/strict";
+import {test} from "node:test";
+import {normalizeRhubarbJson} from "./lipsync-utils.js";
+
+test("maps Rhubarb mouth shapes to Japanese mouth shapes", () => {
+  const {timeline} = normalizeRhubarbJson(
+    {
+      metadata: {duration: 1.2},
+      mouthCues: [
+        {start: 0, end: 0.1, value: "X"},
+        {start: 0.1, end: 0.2, value: "D"},
+        {start: 0.2, end: 0.3, value: "F"},
+      ],
+    },
+    {audio: "audio/example.wav"}
+  );
+
+  assert.deepEqual(
+    timeline.cues.map((cue) => cue.mouth),
+    ["rest", "a", "u"]
+  );
+});
+
+test("uses rest for unknown shapes and reports a warning", () => {
+  const {timeline, warnings} = normalizeRhubarbJson(
+    {
+      mouthCues: [{start: 0, end: 0.1, value: "Z"}],
+    },
+    {audio: "audio/example.wav"}
+  );
+
+  assert.equal(timeline.cues[0].mouth, "rest");
+  assert.match(warnings[0], /Unknown Rhubarb mouth shape "Z"/);
+});
+
+test("uses metadata duration when available", () => {
+  const {timeline} = normalizeRhubarbJson({
+    metadata: {duration: 2.5},
+    mouthCues: [{start: 0, end: 0.1, value: "X"}],
+  });
+
+  assert.equal(timeline.duration, 2.5);
+});
+
+test("falls back to the last cue end for duration", () => {
+  const {timeline} = normalizeRhubarbJson({
+    mouthCues: [
+      {start: 0, end: 0.4, value: "X"},
+      {start: 0.4, end: 0.8, value: "D"},
+    ],
+  });
+
+  assert.equal(timeline.duration, 0.8);
+});
diff --git a/voicevox-remotion-template/src/avatar-animations.ts b/voicevox-remotion-template/src/avatar-animations.ts
index 3fcadf5..22a42f1 100644
--- a/voicevox-remotion-template/src/avatar-animations.ts
+++ b/voicevox-remotion-template/src/avatar-animations.ts
@@ -19,8 +19,16 @@
 
 export type IdleAvatarAnimationType = keyof typeof idleAvatarAnimations;
 
-export const speakingAvatarAnimations = {
+type SpeakingAvatarAnimationMap = Readonly<{
+  none: AvatarAnimation;
+  rhubarbLipSync: AvatarAnimation;
+  gentleBob: AvatarAnimation;
+  quickHop: AvatarAnimation;
+}>;
+
+export const speakingAvatarAnimations: SpeakingAvatarAnimationMap = {
   none: () => 0,
+  rhubarbLipSync: () => 0,
   gentleBob: idleAvatarAnimations.gentleBob,
   quickHop: ({frame, fps}) => {
     const cycleFrames = Math.max(1, Math.round(fps * 0.25));
@@ -28,6 +36,6 @@
 
     return -Math.sin(progress * Math.PI) * 7;
   },
-} satisfies Record<string, AvatarAnimation>;
+};
 
 export type SpeakingAvatarAnimationType = keyof typeof speakingAvatarAnimations;
diff --git a/voicevox-remotion-template/src/data/pizza-kiln/script.ts b/voicevox-remotion-template/src/data/pizza-kiln/script.ts
index 158a5e3..5670dc7 100644
--- a/voicevox-remotion-template/src/data/pizza-kiln/script.ts
+++ b/voicevox-remotion-template/src/data/pizza-kiln/script.ts
@@ -1,3 +1,5 @@
+import type {SpeakingAvatarAnimationType} from "../../avatar-animations";
+
 export type VoicevoxVoice = Readonly<{
   speakerName: string;
   styleName: string;
@@ -7,8 +9,11 @@
   displayName: string;
   voicevox: VoicevoxVoice;
   avatar: Readonly<{
+    kind: "sayo";
     accentColor: string;
     imagePath: string;
+    mouthImageDir?: string;
+    speakingAnimationType?: SpeakingAvatarAnimationType;
   }>;
 }>;
 
@@ -20,8 +25,10 @@
       styleName: "ノーマル",
     },
     avatar: {
+      kind: "sayo",
       accentColor: "#6b5f83",
       imagePath: "image/sayo-standee-base.png",
+      speakingAnimationType: "rhubarbLipSync",
     },
   },
 } as const satisfies Record<string, CharacterDefinition>;
diff --git a/voicevox-remotion-template/src/data/script.ts b/voicevox-remotion-template/src/data/script.ts
index fdc83fd..a745d5f 100644
--- a/voicevox-remotion-template/src/data/script.ts
+++ b/voicevox-remotion-template/src/data/script.ts
@@ -12,6 +12,7 @@
   kind: "zundamon" | "sayo";
   accentColor: string;
   imagePath?: string;
+  mouthImageDir?: string;
   imageLayout?: Readonly<{
     width?: number;
     maxHeight?: number;
@@ -48,7 +49,7 @@
       },
       nameplatePosition: "none",
       idleAnimationType: "none",
-      speakingAnimationType: "quickHop",
+      speakingAnimationType: "rhubarbLipSync",
     },
   },
   sayo: {
@@ -68,7 +69,7 @@
       },
       nameplatePosition: "none",
       idleAnimationType: "none",
-      speakingAnimationType: "quickHop",
+      speakingAnimationType: "rhubarbLipSync",
     },
   },
 } as const satisfies Record<string, CharacterDefinition>;
diff --git a/voicevox-remotion-template/src/lipsync/LipSyncedStandeeImage.tsx b/voicevox-remotion-template/src/lipsync/LipSyncedStandeeImage.tsx
new file mode 100644
index 0000000..81f2271
--- /dev/null
+++ b/voicevox-remotion-template/src/lipsync/LipSyncedStandeeImage.tsx
@@ -0,0 +1,64 @@
+import React from "react";
+import {Img, staticFile} from "remotion";
+import type {JapaneseMouthShape} from "./types";
+
+type LipSyncedStandeeImageProps = Readonly<{
+  imagePath: string;
+  mouthImageDir: string;
+  mouth: JapaneseMouthShape;
+  width: number | string;
+  maxHeight: number | string;
+  height?: number | string;
+  transform?: string;
+  filter?: string;
+}>;
+
+export const defaultMouthImageDir = (avatarKind: string) =>
+  `image/${avatarKind}-rhubarb-mouths`;
+
+export const LipSyncedStandeeImage: React.FC<LipSyncedStandeeImageProps> = ({
+  imagePath,
+  mouthImageDir,
+  mouth,
+  width,
+  maxHeight,
+  height,
+  transform,
+  filter,
+}) => {
+  return (
+    <div
+      style={{
+        position: "relative",
+        zIndex: 1,
+        width,
+        height,
+        maxHeight,
+        lineHeight: 0,
+        transform,
+      }}
+    >
+      <Img
+        src={staticFile(imagePath)}
+        style={{
+          width: "100%",
+          height: height ? "100%" : undefined,
+          maxHeight,
+          objectFit: "contain",
+          filter,
+        }}
+      />
+      <Img
+        src={staticFile(`${mouthImageDir}/${mouth}.png`)}
+        style={{
+          position: "absolute",
+          inset: 0,
+          width: "100%",
+          height: "100%",
+          objectFit: "contain",
+          pointerEvents: "none",
+        }}
+      />
+    </div>
+  );
+};
diff --git a/voicevox-remotion-template/src/lipsync/getMouthAtTime.ts b/voicevox-remotion-template/src/lipsync/getMouthAtTime.ts
new file mode 100644
index 0000000..a818ac5
--- /dev/null
+++ b/voicevox-remotion-template/src/lipsync/getMouthAtTime.ts
@@ -0,0 +1,8 @@
+import type {JapaneseMouthShape, MouthTimeline} from "./types";
+
+export const getMouthAtTime = (
+  timeline: MouthTimeline | undefined,
+  seconds: number
+): JapaneseMouthShape =>
+  timeline?.cues.find((cue) => seconds >= cue.start && seconds < cue.end)
+    ?.mouth ?? "rest";
diff --git a/voicevox-remotion-template/src/lipsync/manifest.ts b/voicevox-remotion-template/src/lipsync/manifest.ts
new file mode 100644
index 0000000..f0a4a7b
--- /dev/null
+++ b/voicevox-remotion-template/src/lipsync/manifest.ts
@@ -0,0 +1,21 @@
+import generatedManifest from "../generated/lipsync/manifest.json";
+import {getMouthAtTime} from "./getMouthAtTime";
+import type {
+  JapaneseMouthShape,
+  MouthTimeline,
+  MouthTimelineManifest,
+} from "./types";
+
+const lipsyncManifest = generatedManifest as MouthTimelineManifest;
+
+export const mouthTimelineForSpeech = (
+  speechId: string | undefined
+): MouthTimeline | undefined =>
+  speechId ? lipsyncManifest.timelines[speechId] : undefined;
+
+export const getMouthForSpeechFrame = (
+  speechId: string | undefined,
+  frame: number,
+  fps: number
+): JapaneseMouthShape =>
+  getMouthAtTime(mouthTimelineForSpeech(speechId), frame / fps);
diff --git a/voicevox-remotion-template/src/lipsync/rhubarb-map.ts b/voicevox-remotion-template/src/lipsync/rhubarb-map.ts
new file mode 100644
index 0000000..dbce856
--- /dev/null
+++ b/voicevox-remotion-template/src/lipsync/rhubarb-map.ts
@@ -0,0 +1,22 @@
+import type {JapaneseMouthShape} from "./types";
+
+export const RHUBARB_TO_JA_MOUTH = {
+  X: "rest",
+  A: "closed",
+  B: "i",
+  C: "e",
+  D: "a",
+  E: "o",
+  F: "u",
+  G: "i",
+  H: "e",
+} as const satisfies Record<string, JapaneseMouthShape>;
+
+export const rhubarbMouthToJapanese = (
+  source: string
+): JapaneseMouthShape => {
+  const mouthMap: Readonly<Record<string, JapaneseMouthShape>> =
+    RHUBARB_TO_JA_MOUTH;
+
+  return mouthMap[source] ?? "rest";
+};
diff --git a/voicevox-remotion-template/src/lipsync/types.ts b/voicevox-remotion-template/src/lipsync/types.ts
new file mode 100644
index 0000000..980ef73
--- /dev/null
+++ b/voicevox-remotion-template/src/lipsync/types.ts
@@ -0,0 +1,31 @@
+export type JapaneseMouthShape =
+  | "a"
+  | "i"
+  | "u"
+  | "e"
+  | "o"
+  | "closed"
+  | "rest";
+
+export type MouthCue = Readonly<{
+  start: number;
+  end: number;
+  mouth: JapaneseMouthShape;
+  source: string;
+}>;
+
+export type MouthTimeline = Readonly<{
+  version: 1;
+  source: Readonly<{
+    audio: string;
+    engine: "rhubarb-lip-sync";
+    recognizer: "phonetic";
+  }>;
+  duration: number;
+  cues: MouthCue[];
+}>;
+
+export type MouthTimelineManifest = Readonly<{
+  version: 1;
+  timelines: Record<string, MouthTimeline>;
+}>;
diff --git a/voicevox-remotion-template/src/pizza-kiln-composition.tsx b/voicevox-remotion-template/src/pizza-kiln-composition.tsx
index 5c1f28c..722e762 100644
--- a/voicevox-remotion-template/src/pizza-kiln-composition.tsx
+++ b/voicevox-remotion-template/src/pizza-kiln-composition.tsx
@@ -2,7 +2,6 @@
 import {Audio, Video} from "@remotion/media";
 import {
   AbsoluteFill,
-  Img,
   interpolate,
   Sequence,
   spring,
@@ -17,8 +16,19 @@
   PIZZA_KILN_GAP_FRAMES,
   PIZZA_KILN_VIDEO_FRAMES,
 } from "./data/pizza-kiln/timing";
-import {characters, timeline, type SpeechEvent} from "./data/pizza-kiln/script";
+import {
+  characters,
+  timeline,
+  type CharacterDefinition,
+  type SpeechEvent,
+} from "./data/pizza-kiln/script";
 import {roundedFontFamily} from "./fonts";
+import {speakingAvatarAnimations} from "./avatar-animations";
+import {getMouthForSpeechFrame} from "./lipsync/manifest";
+import {
+  defaultMouthImageDir,
+  LipSyncedStandeeImage,
+} from "./lipsync/LipSyncedStandeeImage";
 
 const BACKGROUND_VIDEO_PATH = "video/pizza-kiln-background.mp4";
 
@@ -98,12 +108,23 @@
     speaking: boolean;
     localFrame: number;
     fps: number;
+    speechId?: string;
   }>
-> = ({mode, speaking, localFrame, fps}) => {
-  const cycleFrames = Math.max(1, Math.round(fps * 0.25));
-  const progress = (localFrame % cycleFrames) / cycleFrames;
-  const speakingHop = -Math.sin(progress * Math.PI) * 7;
-  const translateY = speaking ? speakingHop : 0;
+> = ({mode, speaking, localFrame, fps, speechId}) => {
+  const {avatar}: {avatar: CharacterDefinition["avatar"]} = characters.sayo;
+  const speakingAnimationType = avatar.speakingAnimationType ?? "none";
+  const translateY = speaking
+    ? speakingAvatarAnimations[speakingAnimationType]({
+        frame: localFrame,
+        fps,
+        focused: true,
+        hasMultipleCharacters: false,
+      })
+    : 0;
+  const mouth =
+    speaking && speakingAnimationType === "rhubarbLipSync"
+      ? getMouthForSpeechFrame(speechId, localFrame, fps)
+      : "rest";
   const isCorner = mode === "corner";
 
   return (
@@ -121,16 +142,20 @@
         zIndex: 3,
       }}
     >
-      <Img
-        src={staticFile(characters.sayo.avatar.imagePath)}
-        style={{
-          width: "100%",
-          maxHeight: "100%",
-          objectFit: "contain",
-          filter: isCorner
+      <LipSyncedStandeeImage
+        imagePath={avatar.imagePath}
+        mouthImageDir={
+          avatar.mouthImageDir ?? defaultMouthImageDir(avatar.kind)
+        }
+        mouth={mouth}
+        width="100%"
+        height="100%"
+        maxHeight="100%"
+        filter={
+          isCorner
             ? "drop-shadow(0 12px 24px rgba(0, 0, 0, 0.32))"
-            : "drop-shadow(0 18px 40px rgba(31, 42, 68, 0.22))",
-        }}
+            : "drop-shadow(0 18px 40px rgba(31, 42, 68, 0.22))"
+        }
       />
     </div>
   );
@@ -185,8 +210,9 @@
   const isVideoVisible =
     frame >= videoFrom && frame < videoFrom + PIZZA_KILN_VIDEO_FRAMES;
   const isOutro = frame >= outroFrom;
-  const isSpeaking = frame < introFrames || isOutro;
-  const speechLocalFrame = isOutro ? frame - outroFrom : frame;
+  const activeSpeech =
+    frame < introFrames ? introSpeech : isOutro ? outroSpeech : undefined;
+  const speechLocalFrame = activeSpeech === outroSpeech ? frame - outroFrom : frame;
 
   return (
     <AbsoluteFill style={{backgroundColor: "#1a1a1a"}}>
@@ -208,9 +234,10 @@
       </Sequence>
       <SayoStandee
         mode={isVideoVisible ? "corner" : "stage"}
-        speaking={isSpeaking}
+        speaking={Boolean(activeSpeech)}
         localFrame={speechLocalFrame}
         fps={fps}
+        speechId={activeSpeech?.id}
       />
       <Sequence durationInFrames={introFrames} premountFor={0}>
         <SpeechOverlay speech={introSpeech} />
diff --git a/voicevox-remotion-template/src/yukkuri-composition.tsx b/voicevox-remotion-template/src/yukkuri-composition.tsx
index 47b2ea7..8a1b467 100644
--- a/voicevox-remotion-template/src/yukkuri-composition.tsx
+++ b/voicevox-remotion-template/src/yukkuri-composition.tsx
@@ -2,7 +2,6 @@
 import {Audio} from "@remotion/media";
 import {
   AbsoluteFill,
-  Img,
   interpolate,
   Sequence,
   spring,
@@ -29,6 +28,11 @@
   hasAudioForSpeech,
 } from "./data/timing";
 import {roundedFontFamily} from "./fonts";
+import {getMouthForSpeechFrame} from "./lipsync/manifest";
+import {
+  defaultMouthImageDir,
+  LipSyncedStandeeImage,
+} from "./lipsync/LipSyncedStandeeImage";
 
 type ScheduledTimelineEvent = Readonly<{
   event: TimelineEvent;
@@ -85,6 +89,23 @@
   extrapolateRight: "clamp",
 } as const;
 
+const mouthForSpeechFrame = ({
+  isSpeaking,
+  speakingAnimationType,
+  speechId,
+  speakingLocalFrame,
+  fps,
+}: Readonly<{
+  isSpeaking: boolean;
+  speakingAnimationType: string;
+  speechId?: string;
+  speakingLocalFrame: number;
+  fps: number;
+}>) =>
+  isSpeaking && speakingAnimationType === "rhubarbLipSync"
+    ? getMouthForSpeechFrame(speechId, speakingLocalFrame, fps)
+    : "rest";
+
 const Title: React.FC<Readonly<{progress: number}>> = ({progress}) => {
   const opacity = interpolate(progress, [0, 1], [0, 1], clampInterpolation);
   const translateY = interpolate(progress, [0, 1], [-30, 0], clampInterpolation);
@@ -388,8 +409,19 @@
     frame: number;
     fps: number;
     isSpeaking: boolean;
+    speakingSpeechId?: string;
+    speakingLocalFrame: number;
   }>
-> = ({characterId, focused, hasMultipleCharacters, frame, fps, isSpeaking}) => {
+> = ({
+  characterId,
+  focused,
+  hasMultipleCharacters,
+  frame,
+  fps,
+  isSpeaking,
+  speakingSpeechId,
+  speakingLocalFrame,
+}) => {
   const character = characters[characterId];
   const {avatar}: {avatar: AvatarDefinition} = character;
   const scale = hasMultipleCharacters ? 0.88 : focused ? 1.05 : 1;
@@ -414,6 +446,13 @@
   const imageTransform = `translateY(${imageTranslateY}px) scaleX(${imageScaleX})`;
   const nameplatePosition = avatar.nameplatePosition ?? "bottom";
   const showNameplate = nameplatePosition !== "none";
+  const mouth = mouthForSpeechFrame({
+    isSpeaking,
+    speakingAnimationType,
+    speechId: speakingSpeechId,
+    speakingLocalFrame,
+    fps,
+  });
 
   const nameplate = (
     <div
@@ -448,17 +487,14 @@
     >
       {showNameplate && nameplatePosition === "top" ? nameplate : null}
       {avatar.imagePath ? (
-        <Img
-          src={staticFile(avatar.imagePath)}
-          style={{
-            width: avatar.imageLayout?.width ?? 320,
-            maxHeight: avatar.imageLayout?.maxHeight ?? 360,
-            objectFit: "contain",
-            filter: "drop-shadow(0 18px 40px rgba(31, 42, 68, 0.22))",
-            position: "relative",
-            zIndex: 1,
-            transform: imageTransform,
-          }}
+        <LipSyncedStandeeImage
+          imagePath={avatar.imagePath}
+          mouthImageDir={avatar.mouthImageDir ?? defaultMouthImageDir(avatar.kind)}
+          mouth={mouth}
+          width={avatar.imageLayout?.width ?? 320}
+          maxHeight={avatar.imageLayout?.maxHeight ?? 360}
+          filter="drop-shadow(0 18px 40px rgba(31, 42, 68, 0.22))"
+          transform={imageTransform}
         />
       ) : avatar.kind === "sayo" ? (
         <div
@@ -483,10 +519,20 @@
     visibleCharacters: CharacterId[];
     focusedCharacter?: CharacterId;
     speakingCharacter?: CharacterId;
+    speakingSpeechId?: string;
+    speakingLocalFrame: number;
     frame: number;
     fps: number;
   }>
-> = ({visibleCharacters, focusedCharacter, speakingCharacter, frame, fps}) => {
+> = ({
+  visibleCharacters,
+  focusedCharacter,
+  speakingCharacter,
+  speakingSpeechId,
+  speakingLocalFrame,
+  frame,
+  fps,
+}) => {
   const hasMultipleCharacters = visibleCharacters.length > 1;
 
   return (
@@ -510,6 +556,8 @@
           frame={frame}
           fps={fps}
           isSpeaking={speakingCharacter === characterId}
+          speakingSpeechId={speakingSpeechId}
+          speakingLocalFrame={speakingLocalFrame}
         />
       ))}
     </div>
@@ -583,10 +631,12 @@
     fps,
     config: {damping: 18, mass: 0.6},
   });
-  const speakingCharacter =
+  const activeSpeech =
     isInsideActiveSegment && activeSegment.event.type === "say"
-      ? activeSegment.event.character
+      ? activeSegment.event
       : undefined;
+  const speakingCharacter = activeSpeech?.character;
+  const speakingLocalFrame = activeSpeech ? frame - activeSegment.from : 0;
 
   const sequences = scheduledEvents.map((scheduledEvent, index) => (
     <Sequence
@@ -625,6 +675,8 @@
           isInsideActiveSegment ? activeSegment.focusedCharacter : undefined
         }
         speakingCharacter={speakingCharacter}
+        speakingSpeechId={activeSpeech?.id}
+        speakingLocalFrame={speakingLocalFrame}
         frame={frame}
         fps={fps}
       />