commit · quire · e0866782

Switch CI run store from filesystem to SQLite

Replaces the directory-per-run state management (pending/active/complete/failed
directories with YAML sidecars) with a single SQLite database at
<quire-root>/quire.db. State transitions are now single UPDATE statements
inside transactions, orphan reconciliation is a single SQL pass, and container
lifecycle timestamps are tracked in the runs table.

db::open is the sole connection factory (WAL mode, foreign keys). db::migrate
runs pending migrations — called once at server startup, not on every open.
Runs and Run each open their own connection via db::open. Ci::runs is removed;
all callers use Repo::runs, which derives the runs base dir from the repo name
and quire root (single source of truth).

Run directories on disk persist only for workspace materialization and
per-job log files.

Schema: runs table with CHECK constraints enforcing the state machine,
jobs table for per-job state. Migrations via rusqlite_migration with
SQL files under migrations/.

Assisted-by: GLM-5.1 via pi

change owmontxuymnsqzznloupzqopsnvysmvw

commit e08667824bcc3a03e208e55026bc78cb07be2b0d

author Alpha Chen <alpha@kejadlen.dev>

date 1mo ago

parent ssukkzww

diff --git a/Cargo.lock b/Cargo.lock
index b19270d..242efe0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -34,7 +34,7 @@ dependencies = [
  "bytestring",
  "derive_more",
  "encoding_rs",
- "foldhash",
+ "foldhash 0.1.5",
  "futures-core",
  "http 0.2.12",
  "httparse",
@@ -129,7 +129,7 @@ dependencies = [
  "cfg-if",
  "derive_more",
  "encoding_rs",
- "foldhash",
+ "foldhash 0.1.5",
  "futures-core",
  "futures-util",
  "impl-more",
@@ -699,6 +699,18 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "fallible-iterator"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
+
+[[package]]
+name = "fallible-streaming-iterator"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
+
 [[package]]
 name = "fastrand"
 version = "2.4.1"
@@ -750,6 +762,12 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
+
 [[package]]
 name = "form_urlencoded"
 version = "1.2.2"
@@ -905,7 +923,16 @@ version = "0.15.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 dependencies = [
- "foldhash",
+ "foldhash 0.1.5",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+dependencies = [
+ "foldhash 0.2.0",
 ]
 
 [[package]]
@@ -914,6 +941,15 @@ version = "0.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51"
 
+[[package]]
+name = "hashlink"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea0b22561a9c04a7cb1a302c013e0259cd3b4bb619f145b32f72b8b4bcbed230"
+dependencies = [
+ "hashbrown 0.16.1",
+]
+
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -1376,6 +1412,17 @@ version = "0.2.186"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
 
+[[package]]
+name = "libsqlite3-sys"
+version = "0.37.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f111c8c41e7c61a49cd34e44c7619462967221a6443b0ec299e0ac30cfb9b1"
+dependencies = [
+ "cc",
+ "pkg-config",
+ "vcpkg",
+]
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.12.1"
@@ -2023,6 +2070,8 @@ dependencies = [
  "petgraph",
  "predicates",
  "regex",
+ "rusqlite",
+ "rusqlite_migration",
  "sentry",
  "sentry-tracing",
  "serde",
@@ -2186,6 +2235,41 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
+[[package]]
+name = "rsqlite-vfs"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8a1f2315036ef6b1fbacd1972e8ee7688030b0a2121edfc2a6550febd41574d"
+dependencies = [
+ "hashbrown 0.16.1",
+ "thiserror",
+]
+
+[[package]]
+name = "rusqlite"
+version = "0.39.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0d2b0146dd9661bf67bb107c0bb2a55064d556eeb3fc314151b957f313bcd4e"
+dependencies = [
+ "bitflags",
+ "fallible-iterator",
+ "fallible-streaming-iterator",
+ "hashlink",
+ "libsqlite3-sys",
+ "smallvec",
+ "sqlite-wasm-rs",
+]
+
+[[package]]
+name = "rusqlite_migration"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "410e4d2d97ff816796ed012b789c7381ae42c09a809822a75d29a01022181184"
+dependencies = [
+ "log",
+ "rusqlite",
+]
+
 [[package]]
 name = "rustc-demangle"
 version = "0.1.27"
@@ -2652,6 +2736,18 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "sqlite-wasm-rs"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b2c760607300407ddeaee518acf28c795661b7108c75421303dbefb237d3a36"
+dependencies = [
+ "cc",
+ "js-sys",
+ "rsqlite-vfs",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "stable_deref_trait"
 version = "1.2.1"
@@ -3164,6 +3260,12 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
 
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
 [[package]]
 name = "wait-timeout"
 version = "0.2.1"
diff --git a/Cargo.toml b/Cargo.toml
index 4df289c..cd784ed 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,6 +17,8 @@ miette = { version = "*", features = ["fancy"] }
 mlua = { version = "*", features = ["lua54", "serde", "vendored", "error-send"] }
 regex = "*"
 petgraph = "*"
+rusqlite = { version = "*", features = ["bundled"] }
+rusqlite_migration = "*"
 sentry = { version = "*", features = ["backtrace", "contexts", "debug-images", "panic", "release-health", "reqwest", "rustls", "tokio"], default-features = false }
 sentry-tracing = "*"
 serde = { version = "*", features = ["derive"] }
diff --git a/docs/CI.md b/docs/CI.md
index 0a74a2d..48e676d 100644
--- a/docs/CI.md
+++ b/docs/CI.md
@@ -26,31 +26,33 @@ The runner doesn't get its own process because **it doesn't execute user code in
 
 Within the host process, `(sh ...)` is the only sanctioned host-effect primitive in the Lua VM. See "Sandbox the in-process VM" below — the compile-then-execute split removes `io`/`os`/`debug` from the execute VM so a buggy or hostile ci.fnl can't bypass the chokepoint.
 
-## Communication: filesystem as state of record, channels as optimization
+## Communication: SQLite as state of record, channels as optimization
 
-Run records on disk are the **durable truth** once written. The hook is a thin transport: it sends a push event over a Unix socket to `quire serve`, which is the sole writer of run records on disk.
+Run records in SQLite are the **durable truth** once written. The hook is a thin transport: it sends a push event over a Unix socket to `quire serve`, which is the sole writer of run records.
 
-| Component | Reads from disk | Writes to disk | In-memory comms |
+| Component | Reads from | Writes to | In-memory comms |
 | --- | --- | --- | --- |
 | Hook (`post-receive`) | — | — | push event → `quire serve` socket listener |
-| Runner (in-process with `quire serve`) | run records on startup | `meta.json`, `state.json`, `jobs/*/`, logs | wakeup from listener (mpsc); broadcast logs → web |
-| Web (`quire serve`) | run records on demand | — | subscribe to log broadcasts |
+| Runner (in-process with `quire serve`) | SQLite on startup | SQLite, `jobs/*/`, logs | wakeup from listener (Notify); broadcast logs → web |
+| Web (`quire serve`) | SQLite on demand | — | subscribe to log broadcasts |
 
-The listener task (also inside `quire serve`) bridges the hook process boundary to the in-process runner. It binds `/var/quire/server.sock` on startup, parses incoming events, writes the initial run record, and signals the runner via mpsc. The wakeup signal carries no payload — the runner re-derives state by scanning `pending/`, so missed or duplicated wakes are idempotent.
+The listener task (also inside `quire serve`) bridges the hook process boundary to the in-process runner. It binds `/var/quire/server.sock` on startup, parses incoming events, inserts the initial run row, and signals the runner. The wakeup signal carries no payload — the runner queries SQLite for the next pending run, so missed or duplicated wakes are idempotent.
 
-On startup, the runner walks `runs/pending/` and `runs/active/`, reconstructs the queue, and reconciles orphans (any `active/` entry whose container is no longer running gets marked failed). Crash resilience covers `quire serve` restart: any run record committed before the crash gets picked up.
+On startup, the runner reconciles orphans: any `active` row whose container is no longer running gets marked `failed`. Crash resilience covers `quire serve` restart: any run row committed before the crash gets picked up.
 
-**v1 limitation: zero-loss-on-server-down is not provided.** If `quire serve` is down at push time, the hook's socket connect fails, the pusher sees a stderr warning, and no run is created. The push itself remains accepted by git (post-receive runs after acceptance). The v1 mitigation is "run `quire serve` under a supervisor that restarts it"; a hook fallback that writes `meta.json` directly when the socket is unreachable is a deferred follow-up if this ever bites in practice.
+**v1 limitation: zero-loss-on-server-down is not provided.** If `quire serve` is down at push time, the hook's socket connect fails, the pusher sees a stderr warning, and no run is created. The push itself remains accepted by git (post-receive runs after acceptance). The v1 mitigation is "run `quire serve` under a supervisor that restarts it"; a hook fallback that inserts directly into SQLite when the socket is unreachable is a deferred follow-up if this ever bites in practice.
 
-The "we could one day extract the runner into its own process" door stays open: the on-disk schema doesn't change, the listener-to-runner mpsc becomes a Unix socket. Not building it now.
+The "we could one day extract the runner into its own process" door stays open: the SQLite schema doesn't change, the listener-to-runner wakeup becomes a Unix socket. Not building it now.
 
-## Storage: no database
+## Storage: SQLite
 
-No SQLite in v1. Run records, job state, logs, and the queue all live as files under `/var/quire/runs/`. None of the queries the v1 web UI wants are slow at this scale; the in-memory queue handles low-latency enqueue/dequeue.
+Run state, job state, and the run queue live in a single SQLite database at `<quire-data-root>/quire.db`. The database is the primary store for all run lifecycle data. The filesystem holds per-run workspaces and per-job log files only.
 
-**The commitment, written down so it doesn't drift:** if SQLite ever earns its keep — most likely trigger is FTS5 over logs — it enters as a **secondary index over the filesystem**, never as a primary store. Files remain canonical. The database is rebuildable: `quire reindex` walks `runs/` and repopulates. If the database is corrupted or lost, recovery is mechanical. The rule: `rm /var/quire/quire.db && quire reindex` returns the system to a working state. If that ever stops being true, the database has crossed a line it shouldn't have.
+The database is project-scoped, not CI-scoped, even though the only tables today are CI tables. Future tables (config snapshots, hook event audit, etc.) live in the same file.
 
-This is the principle that prevents drift. The temptation to migrate state into SQLite ("just this one thing, it's so much easier") is constant once it exists; without the rule written down, the second time you reach for SQLite you'll have forgotten why you originally said no.
+Migrations are SQL files under `migrations/` at the project root, embedded into the binary via `include_str!`. `rusqlite_migration` tracks `PRAGMA user_version` and applies missing migrations transactionally. Each future schema change adds a new file (`0002_*.sql`, `0003_*.sql`, …) and a corresponding `M::up` entry. Files are append-only — never edit a migration that has already shipped.
+
+SQLite is the queue: `quire serve` finds the next pending run with a `SELECT`, not by scanning directories. The wakeup signal stays an in-process `tokio::sync::Notify` — used only to nudge the runner, not to carry data.
 
 ## Concurrency: max one run at a time
 
@@ -146,28 +148,30 @@ The reason this is the chosen path rather than "subprocess + rlimit, no bwrap" 
 
 ## Run lifecycle
 
-1. **`post-receive` hook** sends a push event (one JSON line: `{type, repo, pushed_at, refs: [{ref, old_sha, new_sha}, ...]}`) over `/var/quire/server.sock` and exits. The listener task in `quire serve` parses the event, allocates a run-id per ref, writes `runs/<repo>/<run-id>/{meta.json, state.json}`, and signals the runner via mpsc. No CI work runs in the hook itself.
-2. **Runner picks up** the entry from the queue. Atomic rename `pending/<id>` → `active/<id>` for state-machine clarity.
+1. **`post-receive` hook** sends a push event (one JSON line: `{type, repo, pushed_at, refs: [{ref, old_sha, new_sha}, ...]}`) over `/var/quire/server.sock` and exits. The listener task in `quire serve` parses the event, allocates a run-id per ref, inserts a row into `runs` in `pending` state, and signals the runner. No CI work runs in the hook itself.
+2. **Runner picks up** the entry from the queue. Single `UPDATE runs SET state = 'active'` in SQLite.
 3. **Materialize workspace.** `git --git-dir=repos/foo.git archive <sha> | tar -x -C workspace/`. No worktree, no checkout state on the bare repo. Workspace is throwaway; deleted at end of run.
 4. **Evaluate `.quire/ci.fnl`** in the host process (see above). Pipeline image is read from the `(ci.image ...)` registration; jobs are registered via `(ci.job ...)`; the run-fns are not yet invoked.
-5. **Start the run container.** `docker run -d --rm --mount type=bind,src=<run-dir>,dst=/work -w /work <image> sleep infinity`. Container ID stowed on the runtime. The run's container hosts every `(sh ...)` call from every job in the run.
-6. **Per ready job:** invoke its run-fn in topological order. Each `(sh ...)` call inside the run-fn issues `docker exec` (no TTY) into the run container, streams stdout/stderr into `jobs/<job-id>/log.jsonl` as JSONL events (one per `sh-start`, `stdout`/`stderr`, `sh-exit`), and returns `{exit, stdout, stderr, cmd}` to Lua. Container-level events (`container-start`, `container-died`, `container-end`) go into the run's own `<run-dir>/log.jsonl`.
-7. **Tear down the run container.** `docker stop` + `docker rm`. Even on error paths — no orphaned containers if a run-fn errors.
-8. **Aggregate.** Write final status to the run directory. Move `active/<id>` → `complete/<id>` (or `failed/<id>`).
+5. **Start the run container.** `docker run -d --rm --mount type=bind,src=<run-dir>,dst=/work -w /work <image> sleep infinity`. Container ID written to the `runs` row. The run's container hosts every `(sh ...)` call from every job in the run.
+6. **Per ready job:** invoke its run-fn in topological order. Each `(sh ...)` call inside the run-fn issues `docker exec` (no TTY) into the run container, captures stdout/stderr and exit code, and returns `{exit, stdout, stderr, cmd}` to Lua.
+7. **Tear down the run container.** `docker stop` + `docker rm`. Even on error paths — no orphaned containers if a run-fn errors. `container_stopped_at_ms` written to the `runs` row.
+8. **Aggregate.** Write final status via `UPDATE runs SET state = 'complete'` (or `'failed'`). Per-job logs are written to `jobs/<job-id>/log.yml` on disk before the final transition.
 
 ## Run record schema
 
 ```
+quire.db
+  runs table: id, repo, ref_name, sha, pushed_at_ms, state, failure_kind,
+              queued_at_ms, started_at_ms, finished_at_ms, container_id,
+              image_tag, build_started_at_ms, build_finished_at_ms,
+              container_started_at_ms, container_stopped_at_ms, workspace_path
+  jobs table:  run_id, job_id, state, exit_code, started_at_ms, finished_at_ms
+
 runs/<repo>/<run-id>/
-  meta.json        # immutable: sha, ref, pusher, pushed_at
-  state.json       # mutable: status, started_at, finished_at, runner_pid, container_id
-  log.jsonl        # per-run events: container-start, container-died, container-end
+  workspace/               # materialized checkout
   jobs/
     <job-id>/
-      spec.json    # immutable: inputs, registration source location
-      state.json   # mutable: status, started_at, finished_at, outputs
-      log.jsonl    # per-job events: sh-start, stdout, stderr, sh-exit
-  cancel           # touch-file; runner checks before each job
+      log.yml              # per-job sh output logs
 ```
 
 Two principles fall out:
@@ -246,7 +250,7 @@ Punt on cache invalidation until it actually annoys. "Delete the cache dir" is a
 ## Locked-in decisions
 
 * **Runner is in-process** with `quire serve` as a tokio task; not a separate process. Filesystem is the state of record; channels are the wakeup optimization.
-* **No SQLite in v1.** If it enters later, it's a secondary index over the filesystem, never primary. `rm quire.db && quire reindex` must always recover.
+* **SQLite is the primary store for run and job state.** Migrations under `migrations/`, embedded into the binary. The filesystem holds workspaces and per-job log files only.
 * **Per-run container**, not per-job and not long-lived runners. One `docker run` at run start, `docker exec` per `(sh ...)` call from each job, `docker stop` at run end. Per-job container differentiation is a deferred extension.
 * **`(sh ...)` is the only host-effect primitive in the Lua VM.** No `(container ...)` primitive. The execute VM is hardened (no `io`/`os`/`debug`) so `sh` becomes the documented chokepoint — every effect is auditable, persistable, redactable in one place.
 * **Pipeline-level image declaration via `(ci.image ...)`.** Single image per pipeline; per-job override deferred until pipelines actually need heterogeneity.
diff --git a/docs/PLAN.md b/docs/PLAN.md
index 8e7cd05..abb6a61 100644
--- a/docs/PLAN.md
+++ b/docs/PLAN.md
@@ -46,6 +46,7 @@ One volume mounted into the container:
 
 ```
 /var/quire/
+  quire.db                       # SQLite database
   repos/
     foo.git/
       quire/
@@ -54,10 +55,11 @@ One volume mounted into the container:
         quire/...
   runs/
     <repo>/<run-id>/
-      meta.fnl               status, ref, sha, pipeline source, timings
-      log                    streamed stdout/stderr
-      artifacts/
-  config.fnl                 global config
+      workspace/                 # materialized checkout
+      jobs/
+        <job-id>/
+          log.yml                # per-job sh output logs
+  config.fnl                 # global config
 ```
 
 Per-repo config (`public_runs`, etc.) is checked into the repo at `.quire/config.fnl`, not stored in the bare repo's `quire/` directory. The `quire/` directory holds only generated artifacts.
diff --git a/docs/superpowers/plans/2026-05-06-ci-sqlite-migration.md b/docs/superpowers/plans/2026-05-06-ci-sqlite-migration.md
new file mode 100644
index 0000000..3c32c05
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-06-ci-sqlite-migration.md
@@ -0,0 +1,913 @@
+# CI SQLite Migration Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Replace the filesystem-backed CI run store with a single SQLite database, preserving the existing CI lifecycle (trigger → execute → complete/fail).
+
+**Architecture:** A `src/db.rs` module owns the SQLite connection, migrations, and schema. `Runs` and `Run` structs query and mutate the DB instead of managing directories. State directories (`pending/`, `active/`, `complete/`, `failed/`) and per-run YAML sidecars are removed. Run directories persist only for workspace materialization and log storage.
+
+**Tech Stack:** `rusqlite`, `rusqlite_migration`, `include_str!` for embedded SQL migrations.
+
+**Design doc:** `docs/plans/2026-05-06-ci-sqlite-migration-design.md`
+
+---
+
+## File structure
+
+| Action | File | Responsibility |
+|--------|------|---------------|
+| Create | `migrations/0001_initial.sql` | Schema DDL for `runs` and `jobs` tables |
+| Create | `src/db.rs` | Connection management, WAL mode, migration runner |
+| Modify | `Cargo.toml` | Add `rusqlite`, `rusqlite_migration` dependencies |
+| Modify | `src/lib.rs` | Export `db` module |
+| Modify | `src/ci/run.rs` | Rewrite `Runs` and `Run` to use SQLite; remove `write_yaml`/`read_yaml` helpers |
+| Modify | `src/ci/mod.rs` | Update `trigger_ref` to pass DB conn; remove old filesystem paths |
+| Modify | `src/ci/error.rs` | Add `Sql` error variant, remove `Yaml` variant |
+| Modify | `src/ci/runtime.rs` | Update `DockerLifecycle` to use DB for container record writes |
+| Modify | `src/ci/docker.rs` | No changes expected (shell-out layer) |
+| Modify | `src/quire.rs` | Add `db()` method returning a DB handle; remove `Runs` convenience methods that are now DB-scoped |
+| Modify | `src/bin/quire/server.rs` | Open DB on startup; pass to orphan reconciliation |
+| Modify | `src/bin/quire/commands/ci.rs` | Use DB for `ci run` command |
+| Modify | `docs/CI.md` | Update storage section, layout, lifecycle description |
+
+---
+
+## Task 1: Add dependencies and create migration file
+
+**Files:**
+- Create: `migrations/0001_initial.sql`
+- Modify: `Cargo.toml`
+
+- [ ] **Step 1: Add rusqlite and rusqlite_migration to Cargo.toml**
+
+Add to `[dependencies]` in `Cargo.toml`:
+
+```toml
+rusqlite = { version = "*", features = ["bundled"] }
+rusqlite_migration = "*"
+```
+
+- [ ] **Step 2: Create the initial migration file**
+
+Create `migrations/0001_initial.sql` with the schema from the design doc:
+
+```sql
+CREATE TABLE runs (
+  id              TEXT PRIMARY KEY,
+  repo            TEXT NOT NULL,
+  ref_name        TEXT NOT NULL,
+  sha             TEXT NOT NULL,
+  pushed_at_ms    INTEGER NOT NULL,
+  state           TEXT NOT NULL,
+  failure_kind    TEXT,
+  queued_at_ms    INTEGER NOT NULL,
+  started_at_ms   INTEGER,
+  finished_at_ms  INTEGER,
+  container_id    TEXT,
+  workspace_path  TEXT NOT NULL,
+
+  CHECK (state IN ('pending', 'active', 'complete', 'failed', 'superseded')),
+
+  CHECK (started_at_ms  IS NULL OR started_at_ms  >= queued_at_ms),
+  CHECK (finished_at_ms IS NULL OR finished_at_ms >= queued_at_ms),
+  CHECK (finished_at_ms IS NULL OR started_at_ms IS NULL
+         OR finished_at_ms >= started_at_ms),
+
+  CHECK (CASE state
+    WHEN 'pending'    THEN started_at_ms IS NULL  AND finished_at_ms IS NULL  AND container_id IS NULL
+    WHEN 'active'     THEN started_at_ms IS NOT NULL AND finished_at_ms IS NULL
+    WHEN 'complete'   THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL AND container_id IS NULL
+    WHEN 'failed'     THEN finished_at_ms IS NOT NULL AND container_id IS NULL
+    WHEN 'superseded' THEN finished_at_ms IS NOT NULL AND container_id IS NULL
+  END)
+);
+
+CREATE INDEX runs_repo_pushed_at ON runs(repo, pushed_at_ms DESC);
+CREATE INDEX runs_state          ON runs(state);
+
+CREATE TABLE jobs (
+  run_id          TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
+  job_id          TEXT NOT NULL,
+  state           TEXT NOT NULL,
+  exit_code       INTEGER,
+  started_at_ms   INTEGER,
+  finished_at_ms  INTEGER,
+
+  CHECK (state IN ('pending', 'active', 'complete', 'failed', 'skipped', 'aborted')),
+
+  CHECK (started_at_ms IS NULL OR finished_at_ms IS NULL
+         OR finished_at_ms >= started_at_ms),
+
+  CHECK (CASE state
+    WHEN 'pending'  THEN started_at_ms IS NULL  AND finished_at_ms IS NULL
+    WHEN 'active'   THEN started_at_ms IS NOT NULL AND finished_at_ms IS NULL
+    WHEN 'complete' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL
+    WHEN 'failed'   THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL
+    WHEN 'skipped'  THEN started_at_ms IS NULL  AND finished_at_ms IS NOT NULL
+    WHEN 'aborted'  THEN finished_at_ms IS NOT NULL
+  END),
+
+  PRIMARY KEY (run_id, job_id)
+);
+```
+
+- [ ] **Step 3: Verify it compiles**
+
+Run: `cargo check --workspace`
+Expected: compiles (dependencies resolve; no code uses them yet)
+
+- [ ] **Step 4: Commit**
+
+```
+Add rusqlite dependencies and initial schema migration
+```
+
+---
+
+## Task 2: Create `src/db.rs` — connection management and migration runner
+
+**Files:**
+- Create: `src/db.rs`
+- Modify: `src/lib.rs`
+
+- [ ] **Step 1: Create `src/db.rs`**
+
+```rust
+//! Database connection management and migration runner.
+//!
+//! Owns the SQLite connection, WAL mode pragma, foreign key enforcement,
+//! and the ordered list of migrations. Callers borrow a connection handle
+//! from [`open`] rather than opening their own.
+
+use std::path::Path;
+use std::sync::LazyLock;
+
+use rusqlite::Connection;
+use rusqlite_migration::{Migrations, M};
+
+use crate::error::Error;
+
+/// The ordered set of schema migrations. Append-only — never edit
+/// a migration that has already shipped.
+static MIGRATIONS: LazyLock<Migrations<'static>> = LazyLock::new(|| {
+    Migrations::new(vec![
+        M::up(include_str!("../migrations/0001_initial.sql")),
+    ])
+});
+
+/// Open the database at `path`, enable WAL mode and foreign keys,
+/// and run any pending migrations. Creates the file if it doesn't
+/// exist.
+pub fn open(path: &Path) -> Result<Connection, Error> {
+    let mut conn = Connection::open(path)?;
+    conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA foreign_keys = ON;")?;
+    MIGRATIONS.to_latest(&mut conn)?;
+    Ok(conn)
+}
+
+/// Open an in-memory database (for tests). Same pragmas and
+/// migrations as the on-disk version.
+#[cfg(test)]
+pub fn open_in_memory() -> Result<Connection, Error> {
+    let mut conn = Connection::open_in_memory()?;
+    conn.execute_batch("PRAGMA foreign_keys = ON;")?;
+    MIGRATIONS.to_latest(&mut conn)?;
+    Ok(conn)
+}
+```
+
+- [ ] **Step 2: Export the module from `src/lib.rs`**
+
+Add `pub mod db;` to `src/lib.rs`.
+
+- [ ] **Step 3: Add `Sql` error variant to `src/error.rs`**
+
+Add `rusqlite` error conversion. In `src/error.rs`:
+
+```rust
+#[error(transparent)]
+Sql(#[from] rusqlite::Error),
+```
+
+And in `src/ci/error.rs`:
+
+```rust
+#[error(transparent)]
+Sql(#[from] rusqlite::Error),
+```
+
+Also remove the `Yaml` and `Utf8` variants from `src/ci/error.rs` once nothing uses them (will clean up in Task 3).
+
+- [ ] **Step 4: Verify it compiles**
+
+Run: `cargo check --workspace`
+
+- [ ] **Step 5: Commit**
+
+```
+Add db module with SQLite connection management and migrations
+```
+
+---
+
+## Task 3: Rewrite `Runs` and `Run` to use SQLite
+
+This is the core of the migration. The `Runs` struct owns a DB connection (or a path to the DB file) and a base path for run directories (workspace + logs). `Run` owns a connection and a run ID.
+
+**Files:**
+- Modify: `src/ci/run.rs`
+- Modify: `src/ci/error.rs`
+
+### Key changes to `src/ci/run.rs`
+
+**Struct changes:**
+
+- `Runs` now holds: `db: rusqlite::Connection`, `repo: String`, `base_dir: PathBuf` (for run directories)
+- `Run` now holds: `db: rusqlite::Connection`, `id: String`, `repo: String`, `base_dir: PathBuf`
+- `RunState` gains `Superseded` variant
+- Remove `RunMeta`, `RunTimes`, `ContainerRecord` as persistence types — their fields map to columns
+- Keep `RunMeta` as an in-memory input type for `Runs::create` (callers still pass sha/ref/pushed_at)
+- Remove `write_yaml` / `read_yaml` helpers
+
+**`Runs::create` changes:**
+
+```sql
+INSERT INTO runs (id, repo, ref_name, sha, pushed_at_ms, state,
+                  queued_at_ms, workspace_path)
+VALUES (?, ?, ?, ?, ?, 'pending', ?, ?);
+```
+
+The `workspace_path` is `<base_dir>/<id>/workspace`. The run directory is created at create time.
+
+**`Run::transition` changes:**
+
+```sql
+UPDATE runs SET state = ?, started_at_ms = ?, finished_at_ms = ?, container_id = NULL
+WHERE id = ?;
+```
+
+Single UPDATE in a transaction. No directory renames. Timestamps stamped as in the current code.
+
+**`Run::read_meta` / `read_times` / `write_times` changes:**
+
+Replaced by direct column reads from the `runs` row. Expose accessor methods instead of returning structs:
+
+- `Run::sha()`, `Run::ref_name()`, `Run::pushed_at_ms()` — read from DB
+- `Run::started_at_ms()`, `Run::finished_at_ms()` — read from DB
+- `Run::state()` — cached from last query or read fresh
+
+**`Run::read_container_record` / `write_container_record` changes:**
+
+`container_id` is a column on `runs`. The container timestamps (build_started_at, etc.) are not in the current schema — they can be added to the `runs` table in a follow-up migration. For now, keep writing `container.yml` as a file in the run directory for the container lifecycle timestamps, and only track `container_id` in the DB. This is consistent with the design doc's schema which only has `container_id`.
+
+**`DockerLifecycle` changes:**
+
+`record_path` still points to `<run-dir>/container.yml` for the container timestamps. The `container_id` is also written to the DB when it's set. The `Drop` impl continues to write `container_stopped_at` to the YAML file.
+
+**`Runs::scan_orphans` changes:**
+
+```sql
+SELECT id, state FROM runs WHERE state IN ('pending', 'active') AND repo = ?;
+```
+
+No more directory scanning. Quarantine concept goes away (unreadable runs were a filesystem artifact).
+
+**`Runs::reconcile_orphans` changes:**
+
+```sql
+UPDATE runs SET state = 'failed', finished_at_ms = ?, container_id = NULL, failure_kind = 'orphaned'
+WHERE state = 'active' AND repo = ?;
+```
+
+For pending orphans, the design doc says `umykvluw` lands separately. Current behavior transitions them to `complete`. Keep that behavior for now but use the DB:
+
+```sql
+UPDATE runs SET state = 'complete', finished_at_ms = ?, container_id = NULL
+WHERE state = 'pending' AND repo = ?;
+```
+
+**`Run::path` changes:**
+
+Returns `<base_dir>/<id>/` — the run directory for workspace and logs. No state subdirectory.
+
+**`Run::update_latest` changes:**
+
+Removed entirely. The `latest` symlink was a filesystem workaround; the DB query `SELECT id FROM runs WHERE repo = ? ORDER BY queued_at_ms DESC LIMIT 1` replaces it.
+
+**`Run::write_all_logs` changes:**
+
+Stays the same — writes YAML log files under `<run-dir>/jobs/<job-id>/log.yml`. Logs live on disk per the design doc.
+
+- [ ] **Step 1: Write the new `RunState` with `Superseded` variant and accessor methods**
+
+Update `RunState` to include `Superseded`:
+
+```rust
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum RunState {
+    Pending,
+    Active,
+    Complete,
+    Failed,
+    Superseded,
+}
+
+impl RunState {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            RunState::Pending => "pending",
+            RunState::Active => "active",
+            RunState::Complete => "complete",
+            RunState::Failed => "failed",
+            RunState::Superseded => "superseded",
+        }
+    }
+
+    pub fn from_str(s: &str) -> Option<Self> {
+        match s {
+            "pending" => Some(RunState::Pending),
+            "active" => Some(RunState::Active),
+            "complete" => Some(RunState::Complete),
+            "failed" => Some(RunState::Failed),
+            "superseded" => Some(RunState::Superseded),
+            _ => None,
+        }
+    }
+}
+```
+
+Remove `dir_name()`.
+
+- [ ] **Step 2: Rewrite `Runs` struct**
+
+```rust
+pub struct Runs {
+    db: rusqlite::Connection,
+    repo: String,
+    base_dir: PathBuf,
+}
+```
+
+`base_dir` is `<quire-root>/runs/<repo>/`. Run directories live at `<base_dir>/<id>/`.
+
+Update constructor:
+
+```rust
+impl Runs {
+    pub fn new(db: rusqlite::Connection, repo: String, base_dir: PathBuf) -> Self {
+        Self { db, repo, base_dir }
+    }
+}
+```
+
+- [ ] **Step 3: Rewrite `Runs::create`**
+
+```rust
+pub fn create(&self, meta: &RunMeta) -> Result<Run> {
+    let id = uuid::Uuid::now_v7().to_string();
+    let workspace_path = self.base_dir.join(&id).join("workspace");
+
+    self.db.execute(
+        "INSERT INTO runs (id, repo, ref_name, sha, pushed_at_ms, state, queued_at_ms, workspace_path)
+         VALUES (?1, ?2, ?3, ?4, ?5, 'pending', ?6, ?7)",
+        rusqlite::params![
+            &id,
+            &self.repo,
+            &meta.r#ref,
+            &meta.sha,
+            meta.pushed_at.as_millisecond(),
+            jiff::Timestamp::now().as_millisecond(),
+            workspace_path.to_str().ok_or_else(|| std::io::Error::new(
+                std::io::ErrorKind::InvalidData,
+                "workspace path is not valid UTF-8",
+            ))?,
+        ],
+    )?;
+
+    // Create run directory for workspace and logs.
+    fs_err::create_dir_all(&workspace_path)?;
+
+    Ok(Run {
+        id,
+        repo: self.repo.clone(),
+        base_dir: self.base_dir.clone(),
+        state: RunState::Pending,
+    })
+}
+```
+
+Note: `Run` caches `state` in memory to avoid a round-trip after creation. It also needs a way to execute DB statements. Two options:
+- (A) `Run` clones the connection or holds a reference
+- (B) `Run` takes `&Connection` on each method call
+
+Option (B) is cleaner for borrowing but makes `Run::execute` harder since it consumes `self`. Go with option (A): `Run` holds its own `rusqlite::Connection`. SQLite allows multiple connections to the same file in WAL mode. Alternatively, since `rusqlite::Connection` is not `Clone`, `Run` can take ownership of the connection from `Runs`.
+
+Actually, the simplest approach: `Runs` holds the DB path (not a connection), and each method opens a short-lived connection. Or better: pass `&Connection` to each method. Since `Run::execute` needs to do many operations, it should hold its own connection.
+
+Let me reconsider: `Runs` creates `Run` objects. The caller (trigger_ref, server startup) has a connection. Let's make `Runs` hold a `&Connection` lifetime... but that gets messy with ownership.
+
+Simplest correct approach: `Runs` owns a `Connection`. `Run` borrows `&Connection`. But `Run::execute` consumes `self` and the pipeline, and the runtime needs its own state...
+
+Final decision: `Run` holds its own `rusqlite::Connection`. It opens a new connection to the same DB file. This is standard SQLite practice — multiple connections in WAL mode are fine. `Runs` holds the DB path and opens connections as needed.
+
+Update:
+
+```rust
+pub struct Runs {
+    db_path: PathBuf,
+    repo: String,
+    base_dir: PathBuf,
+}
+
+impl Runs {
+    pub fn new(db_path: PathBuf, repo: String, base_dir: PathBuf) -> Self {
+        Self { db_path, repo, base_dir }
+    }
+
+    fn conn(&self) -> Result<rusqlite::Connection> {
+        let conn = rusqlite::Connection::open(&self.db_path)?;
+        conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA foreign_keys = ON;")?;
+        Ok(conn)
+    }
+
+    pub fn create(&self, meta: &RunMeta) -> Result<Run> {
+        let conn = self.conn()?;
+        // ... insert ...
+        Run::open(conn, self.repo.clone(), self.base_dir.clone(), &id)
+    }
+}
+```
+
+Hmm, but opening a new connection per operation is wasteful. Let me think about this differently.
+
+Better approach: `Runs` holds a `rusqlite::Connection`. `Run::execute` is where the long-lived operation happens. Before `execute`, `Run` can get its own connection (or we pass one in). For the simpler methods (transition, read_meta, etc.), `Run` can borrow from... somewhere.
+
+Actually, the cleanest solution: make `Run` hold a `rusqlite::Connection` that it receives at construction. `Runs` opens a connection in `create` and transfers it to the new `Run`. For methods that don't consume `self` (like `read_meta`, `transition`), `Run` uses its owned connection. For `execute`, it already owns one.
+
+But then `Runs` needs a new connection for each `create` call. Unless `Runs` doesn't hold a connection at all — it holds the db path.
+
+Let me look at how `Runs` is used:
+
+1. `Runs::new(base)` — constructed with a path
+2. `runs.create(&meta)` — returns a `Run`
+3. `runs.scan_orphans()` — returns `Vec<Run>`
+4. `runs.reconcile_orphans()` — internally iterates
+
+And `Run` is used:
+1. `run.id()`, `run.state()`, `run.path()`
+2. `run.transition(RunState::Active)`
+3. `run.read_meta()`, `run.read_times()`, `run.write_times()`
+4. `run.read_container_record()`, `run.write_container_record()`
+5. `run.execute(pipeline, secrets, git_dir, workspace, executor)` — consumes self
+
+The pattern is: `Runs` creates `Run` objects, and callers work with `Run` objects. Both need DB access.
+
+Simplest clean approach: both `Runs` and `Run` hold `rusqlite::Connection`. Since SQLite supports multiple connections in WAL mode, `Runs::create` opens a new connection for the new `Run`. For `Runs` methods like `scan_orphans` and `reconcile_orphans`, it uses its own connection.
+
+- [ ] **Step 4: Rewrite `Run` struct**
+
+```rust
+pub struct Run {
+    db: rusqlite::Connection,
+    id: String,
+    state: RunState,
+    base_dir: PathBuf,
+}
+```
+
+Methods use `self.db` for all reads/writes. No more filesystem state management.
+
+- [ ] **Step 5: Rewrite `Run::transition`**
+
+```rust
+pub fn transition(&mut self, to: RunState) -> Result<()> {
+    let allowed = matches!(
+        (self.state, to),
+        (RunState::Pending, RunState::Active)
+        | (RunState::Pending, RunState::Complete)
+        | (RunState::Active, RunState::Complete)
+        | (RunState::Active, RunState::Failed)
+    );
+    if !allowed {
+        return Err(Error::InvalidTransition { from: self.state, to });
+    }
+
+    let now = jiff::Timestamp::now().as_millisecond();
+
+    self.db.execute(
+        "UPDATE runs SET state = ?1,
+            started_at_ms = CASE WHEN ?2 = 'active' AND started_at_ms IS NULL THEN ?3 ELSE started_at_ms END,
+            finished_at_ms = CASE WHEN ?2 IN ('complete', 'failed') AND finished_at_ms IS NULL THEN ?3 ELSE finished_at_ms END,
+            container_id = CASE WHEN ?2 IN ('complete', 'failed') THEN NULL ELSE container_id END
+         WHERE id = ?4",
+        rusqlite::params![to.as_str(), to.as_str(), now, &self.id],
+    )?;
+
+    self.state = to;
+    Ok(())
+}
+```
+
+- [ ] **Step 6: Rewrite `Run::read_meta` as column accessors**
+
+```rust
+pub fn read_meta(&self) -> Result<RunMeta> {
+    let (sha, ref_name, pushed_at_ms) = self.db.query_row(
+        "SELECT sha, ref_name, pushed_at_ms FROM runs WHERE id = ?1",
+        rusqlite::params![&self.id],
+        |row| Ok((row.get(0)?, row.get(1)?, row.get::<_, i64>(2)?)),
+    )?;
+    Ok(RunMeta {
+        sha,
+        r#ref: ref_name,
+        pushed_at: jiff::Timestamp::from_millisecond(pushed_at_ms).expect("valid timestamp"),
+    })
+}
+```
+
+- [ ] **Step 7: Remove `write_yaml` / `read_yaml`, `RunTimes`, `ContainerRecord` persistence**
+
+`RunTimes` and `ContainerRecord` as separate YAML-backed types go away. Timestamps are columns on `runs`. `container_id` is a column. Container lifecycle timestamps (build_started_at etc.) can stay as a `container.yml` file in the run dir for now, since the schema only tracks `container_id`.
+
+Keep `RunMeta` as an in-memory struct passed to `Runs::create`. Remove `RunTimes` as a public type — callers use `run.started_at()` etc.
+
+Actually, keep `ContainerRecord` around for the file-based container timestamps, since the DB schema only has `container_id`. The `DockerLifecycle` still writes `container.yml`.
+
+- [ ] **Step 8: Rewrite `Run::path`**
+
+```rust
+pub fn path(&self) -> PathBuf {
+    self.base_dir.join(&self.id)
+}
+```
+
+No state subdirectory. The run dir is always `<base_dir>/<id>/`.
+
+- [ ] **Step 9: Remove `update_latest`**
+
+No more symlink. Remove the method entirely.
+
+- [ ] **Step 10: Rewrite `Runs::scan_orphans` and `reconcile_orphans`**
+
+```rust
+pub fn scan_orphans(&self) -> Result<Vec<Run>> {
+    let mut stmt = self.db.prepare(
+        "SELECT id, state FROM runs WHERE state IN ('pending', 'active') AND repo = ?1"
+    )?;
+    let rows = stmt.query_map(rusqlite::params![&self.repo], |row| {
+        let id: String = row.get(0)?;
+        let state_str: String = row.get(1)?;
+        let state = RunState::from_str(&state_str).expect("DB enforces valid states");
+        Ok((id, state))
+    })?;
+
+    let mut orphans = Vec::new();
+    for row in rows {
+        let (id, state) = row?;
+        let db = self.conn()?;  // each Run gets its own connection
+        orphans.push(Run { db, id, state, base_dir: self.base_dir.clone() });
+    }
+    Ok(orphans)
+}
+```
+
+```rust
+pub fn reconcile_orphans(&self) -> Result<()> {
+    let now = jiff::Timestamp::now().as_millisecond();
+
+    // Active orphans → failed
+    self.db.execute(
+        "UPDATE runs SET state = 'failed', finished_at_ms = ?1, container_id = NULL, failure_kind = 'orphaned'
+         WHERE state = 'active' AND repo = ?2",
+        rusqlite::params![now, &self.repo],
+    )?;
+
+    // Pending orphans → complete (matching current behavior; umykvluw changes this to failed)
+    self.db.execute(
+        "UPDATE runs SET state = 'complete', finished_at_ms = ?1, container_id = NULL
+         WHERE state = 'pending' AND repo = ?2",
+        rusqlite::params![now, &self.repo],
+    )?;
+
+    Ok(())
+}
+```
+
+- [ ] **Step 11: Rewrite `Run::execute` to use DB**
+
+The execute method is the most complex. Key changes:
+- `self.transition(RunState::Active)` works as before (now DB-backed)
+- `build_executor_runtime` writes `container_id` to the DB instead of (or in addition to) `container.yml`
+- `write_all_logs` stays file-based (logs on disk per design doc)
+- The `DockerLifecycle.record_path` stays for container timestamps, but `container_id` is tracked in the DB
+
+- [ ] **Step 12: Update `Run::build_executor_runtime`**
+
+After building the container and getting the session, write `container_id` to the DB:
+
+```rust
+self.db.execute(
+    "UPDATE runs SET container_id = ?1 WHERE id = ?2",
+    rusqlite::params![&session.container_id, &self.id],
+)?;
+```
+
+Keep writing `container.yml` for the build/container timestamps.
+
+- [ ] **Step 13: Remove unused error variants from `src/ci/error.rs`**
+
+Remove `Yaml` and `Utf8` variants if nothing uses them. Add `Sql` variant.
+
+- [ ] **Step 14: Run tests and fix compilation errors**
+
+Run: `cargo check --workspace`
+Then: `cargo test --workspace -q`
+
+Fix any compilation errors. The tests in `run.rs` will need updating since they construct `Runs` with the old API.
+
+- [ ] **Step 15: Update tests in `src/ci/run.rs`**
+
+Key test changes:
+- `tmp_quire()` helpers create an in-memory DB via `db::open_in_memory()` or open a temp file DB
+- `test_runs()` creates `Runs::new(db_path, "test.git".to_string(), base_dir)`
+- Tests no longer check for state directories (`pending/`, `active/`, etc.)
+- Tests check run directories at `<base_dir>/<id>/`
+- `scan_orphans` tests verify DB queries instead of directory scans
+- Remove `create_symlinks_latest` test (no more symlink)
+- Remove `scan_orphans_quarantines_unreadable_runs` test (no more quarantine — that was a filesystem artifact)
+- Update `transition_errors_on_missing_source` — no more missing directory, but could test with a run ID that doesn't exist in the DB
+
+- [ ] **Step 16: Run full test suite**
+
+Run: `cargo test --workspace -q`
+
+- [ ] **Step 17: Commit**
+
+```
+Rewrite Runs and Run to use SQLite for state storage
+```
+
+---
+
+## Task 4: Update `src/ci/mod.rs` — trigger path
+
+**Files:**
+- Modify: `src/ci/mod.rs`
+
+The `trigger_ref` function currently calls `ci.runs(repo.runs_base()).create(&meta)`. After the migration, it needs to pass the DB connection/path.
+
+- [ ] **Step 1: Update `Ci::runs` signature**
+
+Change from:
+
+```rust
+pub fn runs(&self, runs_base: PathBuf) -> Runs
+```
+
+To:
+
+```rust
+pub fn runs(&self, db_path: &Path, repo: &str, runs_base: PathBuf) -> Runs
+```
+
+Or, better: pass through the Quire-level DB path. The `trigger` function has access to `quire`, so it can pass `quire.db_path()`.
+
+- [ ] **Step 2: Update `trigger` and `trigger_ref` functions**
+
+```rust
+pub fn trigger(quire: &crate::Quire, event: &PushEvent) {
+    // ... existing repo resolution ...
+    let db_path = quire.db_path();
+    for push_ref in event.updated_refs() {
+        if let Err(e) = trigger_ref(&repo, &db_path, event.pushed_at, push_ref, &secrets) {
+            // ... error handling ...
+        }
+    }
+}
+
+fn trigger_ref(
+    repo: &Repo,
+    db_path: &Path,
+    pushed_at: jiff::Timestamp,
+    push_ref: &PushRef,
+    secrets: &HashMap<String, crate::secret::SecretString>,
+) -> error::Result<()> {
+    // ... existing code ...
+    let mut run = ci.runs(db_path, repo.name(), repo.runs_base()).create(&meta)?;
+    // ... rest stays largely the same ...
+}
+```
+
+- [ ] **Step 3: Update tests in `src/ci/mod.rs`**
+
+Tests that create `Runs` need the new signature. Use `db::open_in_memory()` or a temp file for the DB.
+
+- [ ] **Step 4: Run tests**
+
+Run: `cargo test --workspace -q`
+
+- [ ] **Step 5: Commit**
+
+```
+Update CI trigger path to use SQLite
+```
+
+---
+
+## Task 5: Update `src/quire.rs` — DB path accessor
+
+**Files:**
+- Modify: `src/quire.rs`
+
+- [ ] **Step 1: Add `db_path` method to `Quire`**
+
+```rust
+pub fn db_path(&self) -> PathBuf {
+    self.base_dir.join("quire.db")
+}
+```
+
+- [ ] **Step 2: Update `Repo::runs` and `Repo::runs_base`**
+
+`Repo::runs` currently returns `Runs::new(self.runs_base())`. Update to pass the DB path:
+
+```rust
+pub fn runs(&self, db_path: &Path) -> Runs {
+    Runs::new(
+        db_path.to_path_buf(),
+        self.name().to_string(),
+        self.runs_base(),
+    )
+}
+```
+
+Or remove the convenience method and let callers construct `Runs` directly with the right params.
+
+- [ ] **Step 3: Run tests**
+
+Run: `cargo test --workspace -q`
+
+- [ ] **Step 4: Commit**
+
+```
+Add DB path accessor to Quire and update Repo::runs
+```
+
+---
+
+## Task 6: Update server startup — open DB and reconcile orphans
+
+**Files:**
+- Modify: `src/bin/quire/server.rs`
+
+- [ ] **Step 1: Open the database on startup**
+
+Add after the socket setup, before orphan reconciliation:
+
+```rust
+let db_path = quire.db_path();
+tracing::info!(path = %db_path.display(), "opening database");
+let db = crate::db::open(&db_path)?;
+```
+
+- [ ] **Step 2: Update orphan reconciliation to use DB**
+
+```rust
+for repo in quire.repos().context("failed to list repos")? {
+    let runs = repo.runs(&db_path);
+    runs.reconcile_orphans()?;
+}
+```
+
+Note: if `Runs` holds a connection (not a path), the server would pass a reference or the path. If `Runs` takes a path and opens its own connection, this is straightforward.
+
+- [ ] **Step 3: Run tests**
+
+Run: `cargo test --workspace -q`
+
+- [ ] **Step 4: Commit**
+
+```
+Open SQLite database on server startup for orphan reconciliation
+```
+
+---
+
+## Task 7: Update `ci run` CLI command
+
+**Files:**
+- Modify: `src/bin/quire/commands/ci.rs`
+
+- [ ] **Step 1: Update `ci::run` to use DB**
+
+The `run` function creates a `Runs` with a tempdir. Now it needs a DB path. Use a temp file for the DB:
+
+```rust
+let db_path = tmp.path().join("quire.db");
+let db = quire::db::open(&db_path)?;
+let runs = Runs::new(db_path, "local".to_string(), tmp.path().to_path_buf());
+```
+
+- [ ] **Step 2: Run tests**
+
+Run: `cargo test --workspace -q`
+
+- [ ] **Step 3: Commit**
+
+```
+Update ci run command to use SQLite
+```
+
+---
+
+## Task 8: Update `src/ci/runtime.rs` — DockerLifecycle DB writes
+
+**Files:**
+- Modify: `src/ci/runtime.rs`
+
+The `DockerLifecycle` currently writes `container_stopped_at` to a YAML file. After the migration:
+- `container_id` is tracked in the DB
+- Container lifecycle timestamps can stay in `container.yml` for now (the DB schema only has `container_id`)
+- The `Drop` impl for `DockerLifecycle` needs a DB connection to clear `container_id`
+
+The challenge: `DockerLifecycle` needs DB access in its `Drop` impl. Options:
+- (A) Give `DockerLifecycle` the DB path so it can open a connection in `Drop`
+- (B) Keep writing container timestamps to `container.yml` only; the DB `container_id` is managed by `Run::build_executor_runtime` and `Run::transition` (which already clears it)
+
+Go with (B): `Run::transition` already sets `container_id = NULL` when transitioning to Complete/Failed. The `DockerLifecycle` only needs to write `container_stopped_at` to the YAML file. No changes to `DockerLifecycle` needed beyond what's already handled by the transition logic.
+
+- [ ] **Step 1: Verify `DockerLifecycle` Drop still works**
+
+The `Drop` impl writes to `container.yml` at `self.record_path`. This path is still valid since run directories still exist at `<base_dir>/<id>/`. No changes needed.
+
+- [ ] **Step 2: Verify container_id is cleared on state transition**
+
+The `Run::transition` SQL already sets `container_id = NULL` for complete/failed states. Confirm this is working.
+
+- [ ] **Step 3: Commit (if changes needed, otherwise skip)**
+
+---
+
+## Task 9: Update `docs/CI.md`
+
+**Files:**
+- Modify: `docs/CI.md`
+
+- [ ] **Step 1: Update the "Storage" section**
+
+Remove "No SQLite in v1" and the secondary-index-only commitment. Replace with the SQLite-as-primary-store description from the design doc.
+
+- [ ] **Step 2: Update the volume layout**
+
+Replace the directory-based run layout with:
+
+```
+/var/quire/
+  quire.db                       # SQLite database
+  repos/<name>.git/              # bare repos, unchanged
+  runs/<repo>/<run-id>/          # per-run workspace
+    workspace/                   # materialized checkout
+    jobs/<job-id>/
+      log.yml                    # per-job sh output logs
+```
+
+- [ ] **Step 3: Update the lifecycle description**
+
+Replace the directory-rename lifecycle with SQL state transitions.
+
+- [ ] **Step 4: Remove the in-memory queue / mpsc references**
+
+The design doc says SQLite is the queue. The `mpsc` references in CI.md should be updated (or noted as "replaced by DB queries" — the actual queue replacement is a follow-up since the runner isn't built yet).
+
+- [ ] **Step 5: Commit**
+
+```
+Update CI docs for SQLite migration
+```
+
+---
+
+## Task 10: Clean up and final verification
+
+- [ ] **Step 1: Remove dead code**
+
+- Remove `write_yaml` and `read_yaml` helpers from `run.rs`
+- Remove `Yaml` error variant from `ci/error.rs` if unused
+- Remove `repo_segment` function if unused (was for Docker image tags from path — check if still needed)
+- Remove any `serde_yaml_ng` usage in `run.rs` that's no longer needed
+
+- [ ] **Step 2: Run `just all`**
+
+Run: `just all`
+Expected: all checks pass (fmt, clippy, test)
+
+- [ ] **Step 3: Run coverage**
+
+Run: `just coverage`
+Expected: 100% coverage maintained
+
+- [ ] **Step 4: Commit**
+
+```
+Clean up dead code from filesystem run store
+```
diff --git a/migrations/0001_initial.sql b/migrations/0001_initial.sql
new file mode 100644
index 0000000..4645069
--- /dev/null
+++ b/migrations/0001_initial.sql
@@ -0,0 +1,62 @@
+CREATE TABLE runs (
+  id                    TEXT PRIMARY KEY,
+  repo                  TEXT NOT NULL,
+  ref_name              TEXT NOT NULL,
+  sha                   TEXT NOT NULL,
+  pushed_at_ms          INTEGER NOT NULL,
+  state                 TEXT NOT NULL,
+  failure_kind          TEXT,
+  queued_at_ms          INTEGER NOT NULL,
+  started_at_ms         INTEGER,
+  finished_at_ms        INTEGER,
+  container_id          TEXT,
+  image_tag             TEXT,
+  build_started_at_ms   INTEGER,
+  build_finished_at_ms  INTEGER,
+  container_started_at_ms INTEGER,
+  container_stopped_at_ms INTEGER,
+  workspace_path        TEXT NOT NULL,
+
+  CHECK (state IN ('pending', 'active', 'complete', 'failed', 'superseded')),
+
+  CHECK (started_at_ms  IS NULL OR started_at_ms  >= queued_at_ms),
+  CHECK (finished_at_ms IS NULL OR finished_at_ms >= queued_at_ms),
+  CHECK (finished_at_ms IS NULL OR started_at_ms IS NULL
+         OR finished_at_ms >= started_at_ms),
+
+  CHECK (CASE state
+    WHEN 'pending'    THEN started_at_ms IS NULL  AND finished_at_ms IS NULL  AND container_id IS NULL
+    WHEN 'active'     THEN started_at_ms IS NOT NULL AND finished_at_ms IS NULL
+    WHEN 'complete'   THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL AND container_id IS NULL
+    WHEN 'failed'     THEN finished_at_ms IS NOT NULL AND container_id IS NULL
+    WHEN 'superseded' THEN finished_at_ms IS NOT NULL AND container_id IS NULL
+  END)
+);
+
+CREATE INDEX runs_repo_pushed_at ON runs(repo, pushed_at_ms DESC);
+CREATE INDEX runs_state          ON runs(state);
+
+CREATE TABLE jobs (
+  run_id          TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
+  job_id          TEXT NOT NULL,
+  state           TEXT NOT NULL,
+  exit_code       INTEGER,
+  started_at_ms   INTEGER,
+  finished_at_ms  INTEGER,
+
+  CHECK (state IN ('pending', 'active', 'complete', 'failed', 'skipped', 'aborted')),
+
+  CHECK (started_at_ms IS NULL OR finished_at_ms IS NULL
+         OR finished_at_ms >= started_at_ms),
+
+  CHECK (CASE state
+    WHEN 'pending'  THEN started_at_ms IS NULL  AND finished_at_ms IS NULL
+    WHEN 'active'   THEN started_at_ms IS NOT NULL AND finished_at_ms IS NULL
+    WHEN 'complete' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL
+    WHEN 'failed'   THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL
+    WHEN 'skipped'  THEN started_at_ms IS NULL  AND finished_at_ms IS NOT NULL
+    WHEN 'aborted'  THEN finished_at_ms IS NOT NULL
+  END),
+
+  PRIMARY KEY (run_id, job_id)
+);
diff --git a/src/bin/quire/commands/ci.rs b/src/bin/quire/commands/ci.rs
index 0dd1f20..5382cdb 100644
--- a/src/bin/quire/commands/ci.rs
+++ b/src/bin/quire/commands/ci.rs
@@ -65,7 +65,11 @@ pub async fn run(quire: &Quire, maybe_sha: Option<&str>, executor: Executor) ->
     // (e.g. $XDG_CACHE_HOME/quire/local-runs) so logs survive past the
     // command and `tail -f` becomes useful.
     let tmp = tempfile::tempdir().into_diagnostic()?;
-    let runs = Runs::new(tmp.path().to_path_buf());
+    let db_path = tmp.path().join("quire.db");
+    let mut db = quire::db::open(&db_path).into_diagnostic()?;
+    quire::db::migrate(&mut db).into_diagnostic()?;
+    drop(db);
+    let runs = Runs::new(db_path, "local".to_string(), tmp.path().to_path_buf());
 
     let meta = RunMeta {
         sha: commit.sha.clone(),
diff --git a/src/bin/quire/server.rs b/src/bin/quire/server.rs
index 9db3aa9..cb56ef6 100644
--- a/src/bin/quire/server.rs
+++ b/src/bin/quire/server.rs
@@ -39,9 +39,16 @@ pub async fn run(quire: &Quire) -> Result<()> {
 
     tracing::info!(path = %socket_path.display(), "listening on event socket");
 
+    // Open and migrate the database.
+    let db_path = quire.db_path();
+    tracing::info!(path = %db_path.display(), "opening database");
+    let mut db = quire::db::open(&db_path).into_diagnostic()?;
+    quire::db::migrate(&mut db).into_diagnostic()?;
+    drop(db);
+
     // Scan for orphaned runs from a previous server instance.
     for repo in quire.repos().context("failed to list repos")? {
-        repo.runs().reconcile_orphans()?;
+        repo.runs(&db_path).reconcile_orphans()?;
     }
 
     let quire_handle = quire.clone();
diff --git a/src/ci/error.rs b/src/ci/error.rs
index c1f9124..2995401 100644
--- a/src/ci/error.rs
+++ b/src/ci/error.rs
@@ -67,6 +67,9 @@ pub enum Error {
     #[error(transparent)]
     Utf8(#[from] std::string::FromUtf8Error),
 
+    #[error(transparent)]
+    Sql(#[from] rusqlite::Error),
+
     #[error(transparent)]
     Secret(#[from] secret::Error),
 
diff --git a/src/ci/mod.rs b/src/ci/mod.rs
index f96b851..5ac86f5 100644
--- a/src/ci/mod.rs
+++ b/src/ci/mod.rs
@@ -13,7 +13,7 @@ pub(crate) mod error;
 
 pub use error::{Error, Result};
 pub use pipeline::{DefinitionError, Diagnostic, Job, Pipeline, PipelineError, StructureError};
-pub use run::{Executor, Run, RunMeta, RunState, RunTimes, Runs, materialize_workspace};
+pub use run::{Executor, Run, RunMeta, RunState, Runs, materialize_workspace};
 
 /// A resolved commit reference.
 ///
@@ -26,7 +26,7 @@ pub struct CommitRef {
     pub display: String,
 }
 
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
 
 use crate::display_chain;
 use crate::event::{PushEvent, PushRef};
@@ -49,11 +49,6 @@ impl Ci {
         Self { repo_path }
     }
 
-    /// Access CI runs for this repo.
-    pub fn runs(&self, runs_base: PathBuf) -> Runs {
-        Runs::new(runs_base)
-    }
-
     /// Read and compile ci.fnl at a given SHA, returning the validated
     /// pipeline.
     ///
@@ -134,8 +129,9 @@ pub fn trigger(quire: &crate::Quire, event: &PushEvent) {
         }
     };
 
+    let db_path = quire.db_path();
     for push_ref in event.updated_refs() {
-        if let Err(e) = trigger_ref(&repo, event.pushed_at, push_ref, &secrets) {
+        if let Err(e) = trigger_ref(&repo, &db_path, event.pushed_at, push_ref, &secrets) {
             tracing::error!(
                 repo = %event.repo,
                 sha = %push_ref.new_sha, // cov-excl-line
@@ -149,6 +145,7 @@ pub fn trigger(quire: &crate::Quire, event: &PushEvent) {
 /// Create and run CI for a single updated ref.
 fn trigger_ref(
     repo: &Repo,
+    db_path: &Path,
     pushed_at: jiff::Timestamp,
     push_ref: &PushRef,
     secrets: &HashMap<String, crate::secret::SecretString>,
@@ -165,7 +162,7 @@ fn trigger_ref(
         pushed_at,
     };
 
-    let mut run = ci.runs(repo.runs_base()).create(&meta)?;
+    let mut run = repo.runs(db_path).create(&meta)?;
 
     tracing::info!(
         run_id = %run.id(), // cov-excl-line
@@ -245,6 +242,10 @@ mod tests {
         );
 
         let quire = Quire::new(dir.path().to_path_buf());
+        // Initialize the database.
+        let mut db = crate::db::open(&quire.db_path()).expect("init db");
+        crate::db::migrate(&mut db).expect("migrate db");
+        drop(db);
         (dir, quire, "test.git".to_string())
     }
 
@@ -267,6 +268,9 @@ mod tests {
         );
 
         let quire = Quire::new(dir.path().to_path_buf());
+        let mut db = crate::db::open(&quire.db_path()).expect("init db");
+        crate::db::migrate(&mut db).expect("migrate db");
+        drop(db);
         (dir, quire, "test.git".to_string())
     }
 
@@ -278,13 +282,6 @@ mod tests {
         String::from_utf8(output.stdout).unwrap().trim().to_string()
     }
 
-    #[test]
-    fn ci_new_and_runs() {
-        let ci = Ci::new(PathBuf::from("/tmp/test"));
-        let _runs = ci.runs(PathBuf::from("/tmp/runs"));
-        // Just confirm construction works — Runs::new is covered elsewhere.
-    }
-
     #[test]
     fn ci_pipeline_returns_none_when_no_ci_fnl() {
         let (_dir, quire, name) = bare_repo_without_ci();
@@ -362,11 +359,17 @@ mod tests {
             r#ref: "refs/heads/main".to_string(),
         };
 
-        trigger_ref(&repo, pushed_at, &push_ref, &HashMap::new())
-            .expect("trigger_ref should succeed");
+        trigger_ref(
+            &repo,
+            &quire.db_path(),
+            pushed_at,
+            &push_ref,
+            &HashMap::new(),
+        )
+        .expect("trigger_ref should succeed");
 
         // Verify a run was created in complete/.
-        let runs = repo.runs();
+        let runs = repo.runs(&quire.db_path());
         let orphans = runs.scan_orphans().expect("scan");
         assert!(orphans.is_empty(), "run should be complete, not orphaned");
     }
@@ -383,8 +386,14 @@ mod tests {
             r#ref: "refs/heads/main".to_string(),
         };
 
-        trigger_ref(&repo, pushed_at, &push_ref, &HashMap::new())
-            .expect("should succeed without ci.fnl");
+        trigger_ref(
+            &repo,
+            &quire.db_path(),
+            pushed_at,
+            &push_ref,
+            &HashMap::new(),
+        )
+        .expect("should succeed without ci.fnl");
     }
 
     #[test]
@@ -400,7 +409,13 @@ mod tests {
             r#ref: "refs/heads/main".to_string(),
         };
 
-        let result = trigger_ref(&repo, pushed_at, &push_ref, &HashMap::new());
+        let result = trigger_ref(
+            &repo,
+            &quire.db_path(),
+            pushed_at,
+            &push_ref,
+            &HashMap::new(),
+        );
         assert!(result.is_err(), "invalid pipeline should fail");
     }
 
diff --git a/src/ci/run.rs b/src/ci/run.rs
index e48e5f6..ecf4fa3 100644
--- a/src/ci/run.rs
+++ b/src/ci/run.rs
@@ -1,9 +1,9 @@
-//! On-disk storage for CI runs.
+//! SQLite-backed storage for CI runs.
 //!
-//! A run is a directory under `runs/<repo>/<state>/<id>/` containing
-//! `meta.yml` (immutable) and `times.yml` (timestamps). The directory's
-//! parent name is the authoritative state; transitions are atomic
-//! `rename` operations.
+//! A run is a row in the `runs` table identified by UUID. State
+//! transitions are single `UPDATE` statements inside a transaction.
+//! Run directories on disk hold the materialized workspace and per-job
+//! log files, but state lives in the database.
 
 use std::collections::HashMap;
 use std::path::{Path, PathBuf};
@@ -26,10 +26,9 @@ pub enum Executor {
     Docker,
 }
 
-/// Owns a [`ContainerSession`](crate::ci::docker::ContainerSession)
-/// alongside the run-dir's `container.yml` path so [`Drop`] can stamp
-/// `container_stopped_at` *before* the session itself drops and fires
-/// `docker stop`.
+/// Owns the per-run container session alongside the database path
+/// so [`Drop`] can stamp `container_stopped_at` *before* the session
+/// itself drops and fires `docker stop`.
 ///
 /// Field declaration order matters: `session` is declared first so it
 /// drops first after this struct's custom `Drop` body returns. The
@@ -37,7 +36,8 @@ pub enum Executor {
 /// `docker stop`.
 pub(super) struct DockerLifecycle {
     pub(super) session: crate::ci::docker::ContainerSession,
-    record_path: PathBuf,
+    db_path: PathBuf,
+    run_id: String,
     pub(super) work_dir: String,
 }
 
@@ -46,19 +46,22 @@ impl Drop for DockerLifecycle {
         // Stamp `container_stopped_at` before ContainerSession's Drop
         // (`docker stop`) fires. Errors are logged and swallowed —
         // Drop cannot return Result.
-        match read_yaml::<ContainerRecord>(&self.record_path) {
-            Ok(mut rec) => {
-                rec.container_stopped_at = Some(Timestamp::now());
-                if let Err(e) = write_yaml(&self.record_path, &rec) {
+        match crate::db::open(&self.db_path) {
+            Ok(conn) => {
+                let now = Timestamp::now().as_millisecond();
+                if let Err(e) = conn.execute(
+                    "UPDATE runs SET container_stopped_at_ms = ?1 WHERE id = ?2",
+                    rusqlite::params![now, &self.run_id],
+                ) {
                     tracing::error!(
-                        error = %display_chain(&e),
+                        error = %display_chain(&Error::from(e)),
                         "failed to write container_stopped_at"
                     );
                 }
             }
             Err(e) => tracing::error!(
                 error = %display_chain(&e),
-                "failed to read container.yml before stop"
+                "failed to open db before container stop"
             ),
         }
         // After this body returns, fields drop in declaration order:
@@ -73,21 +76,39 @@ pub enum RunState {
     Active,
     Complete,
     Failed,
+    Superseded,
 }
 
 impl RunState {
-    /// The directory name used for this state in the run storage layout.
-    pub fn dir_name(&self) -> &'static str {
+    pub fn as_str(&self) -> &'static str {
         match self {
             RunState::Pending => "pending",
             RunState::Active => "active",
             RunState::Complete => "complete",
             RunState::Failed => "failed",
+            RunState::Superseded => "superseded",
+        }
+    }
+}
+
+impl std::str::FromStr for RunState {
+    type Err = ();
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        match s {
+            "pending" => Some(RunState::Pending),
+            "active" => Some(RunState::Active),
+            "complete" => Some(RunState::Complete),
+            "failed" => Some(RunState::Failed),
+            "superseded" => Some(RunState::Superseded),
+            _ => None,
         }
+        .ok_or(())
     }
 }
 
-/// Immutable metadata for a CI run. Written once and never modified.
+/// Immutable metadata for a CI run. Passed to `Runs::create` at
+/// enqueue time; the fields are written to the `runs` row once.
 #[derive(Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 pub struct RunMeta {
     /// The commit SHA that triggered this run.
@@ -98,203 +119,147 @@ pub struct RunMeta {
     pub pushed_at: Timestamp,
 }
 
-/// Timestamps recorded across the run lifecycle. The directory name is the
-/// authoritative state; this file records when transitions happened.
-#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-pub struct RunTimes {
-    /// When the run was picked up (moved to active).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub started_at: Option<Timestamp>,
-    /// When the run finished (moved to complete/failed).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub finished_at: Option<Timestamp>,
-}
-
-/// Container metadata for a docker-mode run, persisted to
-/// `<run-dir>/container.yml`. Each field is populated incrementally as
-/// the lifecycle progresses; absence implies "not yet (or never)
-/// reached." Host-mode runs do not write this file.
-#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-pub struct ContainerRecord {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub image_tag: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub container_id: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub build_started_at: Option<Timestamp>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub build_finished_at: Option<Timestamp>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub container_started_at: Option<Timestamp>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub container_stopped_at: Option<Timestamp>,
-}
-
 /// Access to CI runs for a single repo.
 ///
-/// Owns the base path (`runs/<repo>/`) and provides run creation
-/// and orphan reconciliation. Obtain one via `Ci::runs()`.
+/// Owns the database path, repo name, and base directory for run
+/// artifacts (workspace, logs). Each method opens a connection via
+/// [`crate::db::open`]. Obtain one via `Ci::runs()`.
 #[derive(Debug)]
 pub struct Runs {
-    base: PathBuf,
+    db_path: PathBuf,
+    repo: String,
+    base_dir: PathBuf,
 }
 
 impl Runs {
-    pub fn new(base: PathBuf) -> Self {
-        Self { base }
+    pub fn new(db_path: PathBuf, repo: String, base_dir: PathBuf) -> Self {
+        Self {
+            db_path,
+            repo,
+            base_dir,
+        }
     }
 
     /// Create a new run record in the `pending` state.
     ///
-    /// Writes `meta.yml` and `times.yml` atomically (temp dir + rename).
+    /// Inserts a row into `runs` and creates the run directory for
+    /// workspace materialization and log storage.
     pub fn create(&self, meta: &RunMeta) -> Result<Run> {
-        let pending_dir = self.base.join(RunState::Pending.dir_name());
         let id = uuid::Uuid::now_v7().to_string();
-
-        fs_err::create_dir_all(&pending_dir)?;
-
-        let tmp_dir = pending_dir.join(format!(".tmp-{id}"));
-        fs_err::create_dir_all(&tmp_dir)?;
-
-        write_yaml(&tmp_dir.join("meta.yml"), meta)?;
-        write_yaml(&tmp_dir.join("times.yml"), &RunTimes::default())?;
-
-        let final_dir = pending_dir.join(&id);
-        fs_err::rename(&tmp_dir, &final_dir)?;
-
-        // Set the latest symlink after opening the run so it can do it.
-        let run = Run::open(self.base.clone(), RunState::Pending, id)?;
-        run.update_latest()?;
-        Ok(run)
+        let workspace_path = self.base_dir.join(&id).join("workspace");
+
+        let db = crate::db::open(&self.db_path)?;
+        db.execute(
+            "INSERT INTO runs (id, repo, ref_name, sha, pushed_at_ms, state, queued_at_ms, workspace_path)
+             VALUES (?1, ?2, ?3, ?4, ?5, 'pending', ?6, ?7)",
+            rusqlite::params![
+                &id,
+                &self.repo,
+                &meta.r#ref,
+                &meta.sha,
+                meta.pushed_at.as_millisecond(),
+                Timestamp::now().as_millisecond(),
+                workspace_path.to_str().ok_or_else(|| std::io::Error::new(
+                    std::io::ErrorKind::InvalidData,
+                    "workspace path is not valid UTF-8",
+                ))?,
+            ],
+        )?;
+
+        // Create run directory for workspace and logs.
+        fs_err::create_dir_all(&workspace_path)?;
+
+        Ok(Run {
+            db_path: self.db_path.clone(),
+            id,
+            state: RunState::Pending,
+            base_dir: self.base_dir.clone(),
+        })
     }
 
-    /// Scan for orphaned runs in `pending/` and `active/` directories.
-    ///
-    /// Entries that cannot be opened (missing/unreadable `meta.yml` or
-    /// `times.yml`) are quarantined to `failed/` so they don't stay
-    /// stuck in pending/active forever.
+    /// Find runs stuck in `pending` or `active` states.
     ///
     /// The caller decides how to reconcile the returned runs:
-    /// - `pending/` entries should be re-enqueued.
-    /// - `active/` entries with no live runner should be marked failed.
+    /// - `pending` entries should be re-enqueued or completed.
+    /// - `active` entries with no live runner should be marked failed.
     pub fn scan_orphans(&self) -> Result<Vec<Run>> {
-        let mut orphans = Vec::new();
-
-        for &state in &[RunState::Pending, RunState::Active] {
-            let state_path = self.base.join(state.dir_name());
-            let entries = match fs_err::read_dir(&state_path) {
-                Ok(entries) => entries,
-                Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
-                Err(e) => return Err(e.into()), // cov-excl-line
-            };
-
-            for entry in entries {
-                let entry = entry?;
-                let name = match entry.file_name().to_str() {
-                    Some(n) => n.to_string(),
-                    None => continue, // cov-excl-line
-                };
-
-                if name.starts_with('.') {
-                    continue;
-                }
+        let db = crate::db::open(&self.db_path)?;
+        let mut stmt = db.prepare(
+            "SELECT id, state FROM runs WHERE state IN ('pending', 'active') AND repo = ?1",
+        )?;
+        let rows = stmt.query_map(rusqlite::params![&self.repo], |row| {
+            let id: String = row.get(0)?;
+            let state_str: String = row.get(1)?;
+            Ok((id, state_str))
+        })?;
 
-                match Run::open(self.base.clone(), state, name.clone()) {
-                    Ok(run) => orphans.push(run),
-                    Err(e) => {
-                        tracing::warn!(
-                            state = ?state,
-                            run_id = %name,
-                            error = %display_chain(&e),
-                            "quarantining unreadable run to failed/"
-                        );
-                        self.quarantine(&state_path.join(&name), &name)?;
-                    }
-                }
-            }
+        let mut orphans = Vec::new();
+        for row in rows {
+            let (id, state_str) = row?;
+            let state: RunState = state_str.parse().expect("DB enforces valid states");
+            orphans.push(Run {
+                db_path: self.db_path.clone(),
+                id,
+                state,
+                base_dir: self.base_dir.clone(),
+            });
         }
-
         Ok(orphans)
     }
 
-    /// Move a broken run directory into `failed/` so it stops blocking
-    /// pending/active. The contents may be unreadable; we only care
-    /// about getting it out of the active state buckets.
-    fn quarantine(&self, src: &Path, id: &str) -> Result<()> {
-        let failed_dir = self.base.join(RunState::Failed.dir_name());
-        fs_err::create_dir_all(&failed_dir)?;
-        fs_err::rename(src, failed_dir.join(id))?;
-        Ok(())
-    }
-
     /// Reconcile orphaned runs from a previous server instance.
     ///
-    /// - `pending/` orphans are moved to `complete/` (will be re-enqueued when
+    /// - `pending` orphans are moved to `complete` (will be re-enqueued when
     ///   the runner exists; for now, immediately completed).
-    /// - `active/` orphans are moved to `failed/` (no live runner).
+    /// - `active` orphans are moved to `failed` (no live runner).
     pub fn reconcile_orphans(&self) -> Result<()> {
         let orphans = self.scan_orphans()?;
         for orphan in &orphans {
             tracing::warn!(
-                run_id = %orphan.id(), // cov-excl-line
-                state = ?orphan.state(), // cov-excl-line
+                run_id = %orphan.id(),
+                state = ?orphan.state(),
                 "found orphaned run"
             );
         }
 
-        for mut orphan in orphans {
-            match orphan.state() {
-                RunState::Pending => {
-                    tracing::warn!(
-                        run_id = %orphan.id(), // cov-excl-line
-                        "completing orphaned pending run"
-                    );
-                    if let Err(e) = orphan.transition(RunState::Complete) {
-                        tracing::error!(
-                            run_id = %orphan.id(), // cov-excl-line
-                            error = %display_chain(&e),
-                            "failed to transition orphaned pending run"
-                        );
-                    }
-                }
-                RunState::Active => {
-                    tracing::warn!(
-                        run_id = %orphan.id(), // cov-excl-line
-                        "marking orphaned active run as failed"
-                    );
-                    if let Err(e) = orphan.transition(RunState::Failed) {
-                        tracing::error!(
-                            run_id = %orphan.id(), // cov-excl-line
-                            error = %display_chain(&e),
-                            "failed to transition orphaned active run to failed"
-                        );
-                    }
-                }
-                RunState::Complete | RunState::Failed => {
-                    unreachable!("scan_orphans only returns pending/active")
-                }
-            }
-        }
+        let now = Timestamp::now().as_millisecond();
+        let db = crate::db::open(&self.db_path)?;
+
+        // Active orphans → failed
+        db.execute(
+            "UPDATE runs SET state = 'failed', finished_at_ms = ?1, container_id = NULL, failure_kind = 'orphaned'
+             WHERE state = 'active' AND repo = ?2",
+            rusqlite::params![now, &self.repo],
+        )?;
+
+        // Pending orphans → complete (matching current behavior;
+        // umykvluw changes this to failed separately)
+        db.execute(
+            "UPDATE runs SET state = 'complete', started_at_ms = ?1, finished_at_ms = ?1
+             WHERE state = 'pending' AND repo = ?2",
+            rusqlite::params![now, &self.repo],
+        )?;
 
         Ok(())
     }
 }
 
-/// A CI run on disk.
+/// A CI run backed by a SQLite row.
 ///
-/// Owns the path to the run directory and the in-memory execution
-/// state used while driving a pipeline. Tracks current state so that
-/// `transition` can move the directory in one call.
+/// Owns the path to the database and the run's in-memory state cache.
+/// Reads and writes go through SQL. The run directory on disk holds
+/// the workspace and per-job log files.
 pub struct Run {
-    base: PathBuf,
-    state: RunState,
+    db_path: PathBuf,
     id: String,
+    state: RunState,
+    base_dir: PathBuf,
 }
 
 impl Run {
     /// The resolved path to this run's directory on disk.
     pub fn path(&self) -> PathBuf {
-        self.base.join(self.state.dir_name()).join(&self.id)
+        self.base_dir.join(&self.id)
     }
 
     /// The run's ID.
@@ -307,23 +272,33 @@ impl Run {
         self.state
     }
 
-    /// Open an existing run from disk.
-    ///
-    /// `state` is the directory the run is expected to be in (e.g.
-    /// `pending/`, `active/`). Returns an error if `meta.yml` or
-    /// `times.yml` are missing or unreadable.
-    pub fn open(base: PathBuf, state: RunState, id: String) -> Result<Self> {
-        let run = Self { base, state, id };
-        run.read_meta()?;
-        run.read_times()?;
-        Ok(run)
+    /// Open an existing run from the database by ID.
+    pub fn open(db_path: PathBuf, id: String, base_dir: PathBuf) -> Result<Self> {
+        let db = crate::db::open(&db_path)?;
+        let state_str: String = db.query_row(
+            "SELECT state FROM runs WHERE id = ?1",
+            rusqlite::params![&id],
+            |row| row.get(0),
+        )?;
+        let state: RunState = state_str.parse().map_err(|_| {
+            std::io::Error::new(
+                std::io::ErrorKind::InvalidData,
+                format!("invalid state in db: {state_str}"),
+            )
+        })?;
+        Ok(Self {
+            db_path,
+            id,
+            state,
+            base_dir,
+        })
     }
 
     /// Drive `pipeline` to completion through this run.
     ///
     /// Consumes the pipeline, taking ownership of its Lua VM. Constructs
     /// a fresh [`Runtime`] with `secrets`, the source outputs
-    /// (`:quire/push` from `meta.yml`), and the per-job transitive-input
+    /// (`:quire/push` from metadata), and the per-job transitive-input
     /// sets; installs it on the VM, topo-sorts the jobs, transitions
     /// Pending → Active, then invokes each `run_fn` in dependency order
     /// with the runtime handle as its sole argument. Returns a map of
@@ -331,9 +306,6 @@ impl Run {
     /// `Complete` if every job's `run_fn` returned without error,
     /// otherwise `Failed`.
     ///
-    /// Source-ref filtering (e.g. running only `quire/push`-reachable
-    /// jobs) is not yet implemented; for now every validated job runs.
-    ///
     /// Per-job logs are written to `jobs/<job-id>/log` inside the run
     /// directory before the final state transition, so logs are
     /// available for both successful and failed runs.
@@ -349,12 +321,10 @@ impl Run {
 
         // Transition to Active *before* building/starting the
         // container. The docker build can take a long time and
-        // happens with the run in `active/` so the on-disk state
+        // happens with the run in `active` so the database state
         // accurately reflects "this run is in progress." It also
-        // pins `self.path()` for the lifetime of the run, so the
-        // `container.yml` path captured by `DockerLifecycle` stays
-        // valid until just before the final Complete/Failed
-        // transition (where we explicitly drop the runtime first).
+        // means `container_id` is allowed (the CHECK constraint
+        // permits it in `active`).
         self.transition(RunState::Active)?;
 
         let executor_runtime = match self.build_executor_runtime(executor, workspace) {
@@ -412,10 +382,7 @@ impl Run {
 
         // Drop the runtime *before* the final transition. In docker
         // mode this fires `DockerLifecycle::drop`, which stamps
-        // `container_stopped_at` in `<run-dir>/container.yml`. The
-        // path it captured is still valid here (the run is in
-        // active/); the subsequent transition moves the file into
-        // place with the rest of the run dir.
+        // `container_stopped_at` in the database.
         drop(rt_value);
         let _ = lua; // release the Lua borrow tied to `runtime`.
         drop(runtime);
@@ -433,9 +400,8 @@ impl Run {
     }
 
     /// Build the per-run container if `executor` is `Docker`, writing
-    /// `container.yml` incrementally as each phase completes. Run must
-    /// already be in `active/` so `self.path()` is stable for the
-    /// lifetime of the returned [`DockerLifecycle`].
+    /// build and container timestamps to the database incrementally.
+    /// Run must already be in `active` so `container_id` is permitted.
     fn build_executor_runtime(
         &self,
         executor: Executor,
@@ -449,38 +415,42 @@ impl Run {
                 }
 
                 // Build phase.
-                let mut record = ContainerRecord {
-                    build_started_at: Some(Timestamp::now()),
-                    ..Default::default()
-                };
-                self.write_container_record(&record)?;
+                let now = Timestamp::now().as_millisecond();
+                let db = crate::db::open(&self.db_path)?;
+                db.execute(
+                    "UPDATE runs SET build_started_at_ms = ?1 WHERE id = ?2",
+                    rusqlite::params![now, &self.id],
+                )?;
 
                 let dockerfile = workspace.join(".quire/Dockerfile");
                 if !dockerfile.exists() {
                     return Err(Error::DockerfileMissing);
                 }
-                let tag = format!("quire-ci/{}:{}", repo_segment(&self.base), self.id);
+                let tag = format!("quire-ci/{}:{}", repo_segment(&self.base_dir), self.id);
 
                 crate::ci::docker::docker_build(&dockerfile, workspace, &tag)?;
 
-                record.image_tag = Some(tag.clone());
-                record.build_finished_at = Some(Timestamp::now());
-                self.write_container_record(&record)?;
+                let build_finished = Timestamp::now().as_millisecond();
+                db.execute(
+                    "UPDATE runs SET image_tag = ?1, build_finished_at_ms = ?2 WHERE id = ?3",
+                    rusqlite::params![&tag, build_finished, &self.id],
+                )?;
 
-                // Start phase. The bind-mount target inside the
-                // container doubles as the working directory for every
-                // `(sh …)` invocation routed through `docker exec`.
+                // Start phase.
                 const WORK_DIR: &str = "/work";
                 let session =
                     crate::ci::docker::ContainerSession::start(&tag, workspace, WORK_DIR)?;
 
-                record.container_id = Some(session.container_id.clone());
-                record.container_started_at = Some(session.container_started_at);
-                self.write_container_record(&record)?;
+                let container_started = session.container_started_at.as_millisecond();
+                db.execute(
+                    "UPDATE runs SET container_id = ?1, container_started_at_ms = ?2 WHERE id = ?3",
+                    rusqlite::params![&session.container_id, container_started, &self.id],
+                )?;
 
                 Ok(ExecutorRuntime::Docker(DockerLifecycle {
                     session,
-                    record_path: self.path().join("container.yml"),
+                    db_path: self.db_path.clone(),
+                    run_id: self.id.clone(),
                     work_dir: WORK_DIR.to_string(),
                 }))
             }
@@ -490,10 +460,8 @@ impl Run {
     /// Write per-job log files from the captured `(sh …)` outputs.
     ///
     /// Creates `jobs/<job-id>/log.yml` in the run directory for each
-    /// job that has outputs. The file contains a YAML list of `ShOutput`
-    /// entries — command, exit code, stdout, stderr — one per `(sh …)`
-    /// call. Written before the final state transition so logs are
-    /// available for both successful and failed runs.
+    /// job that has outputs. Written before the final state transition
+    /// so logs are available for both successful and failed runs.
     fn write_all_logs(&self, outputs: &HashMap<String, Vec<ShOutput>>) -> Result<()> {
         for (job_id, sh_outputs) in outputs {
             if sh_outputs.is_empty() {
@@ -508,13 +476,12 @@ impl Run {
 
     /// Transition the run from its current state to a new state.
     ///
-    /// Moves the run directory between state parent directories and stamps
-    /// `started_at` (entering Active) or `finished_at` (entering Complete or
-    /// Failed) on `times.yml`. Each timestamp is set at most once.
+    /// Executes a single `UPDATE` in the database, stamping
+    /// `started_at` (entering Active) or `finished_at` (entering
+    /// Complete or Failed) and clearing `container_id` on terminal
+    /// states. Each timestamp is set at most once.
     pub fn transition(&mut self, to: RunState) -> Result<()> {
         use RunState::*;
-        // Allowed transitions. Pending->Complete is the orphan-reconcile
-        // placeholder; everything else is the normal trigger lifecycle.
         let allowed = matches!(
             (self.state, to),
             (Pending, Active) | (Pending, Complete) | (Active, Complete) | (Active, Failed)
@@ -526,75 +493,76 @@ impl Run {
             });
         }
 
-        let src = self.path();
-        let dst_parent = self.base.join(to.dir_name());
+        let now = Timestamp::now().as_millisecond();
+        let db = crate::db::open(&self.db_path)?;
 
-        if !src.exists() {
-            return Err(Error::Io(std::io::Error::new(
-                std::io::ErrorKind::NotFound,
-                format!("run directory not found: {}", src.display()),
-            )));
-        }
-
-        fs_err::create_dir_all(&dst_parent)?;
-        let dst = dst_parent.join(&self.id);
-        fs_err::rename(&src, &dst)?;
-        self.state = to;
-
-        let mut times = self.read_times()?;
-        let now = Timestamp::now();
+        // Build the SET clause dynamically based on the target state.
         match to {
-            RunState::Active if times.started_at.is_none() => times.started_at = Some(now),
-            RunState::Complete | RunState::Failed if times.finished_at.is_none() => {
-                times.finished_at = Some(now)
+            Active => {
+                db.execute(
+                    "UPDATE runs SET state = 'active', started_at_ms = COALESCE(started_at_ms, ?1)
+                     WHERE id = ?2",
+                    rusqlite::params![now, &self.id],
+                )?;
+            }
+            Complete | Failed => {
+                db.execute(
+                    "UPDATE runs SET state = ?1, \
+                        started_at_ms = COALESCE(started_at_ms, ?2), \
+                        finished_at_ms = COALESCE(finished_at_ms, ?3), \
+                        container_id = NULL \
+                     WHERE id = ?4",
+                    rusqlite::params![to.as_str(), now, now, &self.id],
+                )?;
             }
-            _ => {} // cov-excl-line
+            _ => unreachable!("checked by allowed match above"),
         }
-        self.write_times(&times)?;
-        self.update_latest()?;
-        Ok(())
-    }
 
-    /// Atomically update the `latest` symlink to point at this run.
-    fn update_latest(&self) -> Result<()> {
-        let latest = self.base.join("latest");
-        let link_target = PathBuf::from(self.state.dir_name()).join(&self.id);
-        let tmp_link = self.base.join(".tmp-latest");
-        let _ = fs_err::remove_file(&tmp_link);
-        std::os::unix::fs::symlink(&link_target, &tmp_link)?;
-        let _ = fs_err::remove_file(&latest);
-        fs_err::rename(&tmp_link, &latest)?;
+        self.state = to;
         Ok(())
     }
 
-    /// Read the timestamps recorded for this run.
-    pub fn read_times(&self) -> Result<RunTimes> {
-        read_yaml(&self.path().join("times.yml"))
-    }
-
     /// Read the immutable metadata for this run.
     pub fn read_meta(&self) -> Result<RunMeta> {
-        read_yaml(&self.path().join("meta.yml"))
-    }
-
-    /// Update the timestamps for this run (atomic write).
-    pub fn write_times(&self, times: &RunTimes) -> Result<()> {
-        write_yaml(&self.path().join("times.yml"), times)
+        let db = crate::db::open(&self.db_path)?;
+        let (sha, ref_name, pushed_at_ms) = db.query_row(
+            "SELECT sha, ref_name, pushed_at_ms FROM runs WHERE id = ?1",
+            rusqlite::params![&self.id],
+            |row| {
+                let sha: String = row.get(0)?;
+                let ref_name: String = row.get(1)?;
+                let pushed_at_ms: i64 = row.get(2)?;
+                Ok((sha, ref_name, pushed_at_ms))
+            },
+        )?;
+        Ok(RunMeta {
+            sha,
+            r#ref: ref_name,
+            pushed_at: Timestamp::from_millisecond(pushed_at_ms)
+                .expect("db stores valid timestamps"),
+        })
     }
 
-    /// Read this run's `container.yml` record. Returns the deserialized
-    /// `ContainerRecord`. Errors if the file is missing or malformed —
-    /// callers should use `path().join("container.yml").exists()` if they
-    /// want to handle the absent case as "host mode."
-    pub fn read_container_record(&self) -> Result<ContainerRecord> {
-        read_yaml(&self.path().join("container.yml"))
+    /// Read the `started_at` timestamp for this run, if set.
+    pub fn read_started_at(&self) -> Result<Option<Timestamp>> {
+        let db = crate::db::open(&self.db_path)?;
+        let ms: Option<i64> = db.query_row(
+            "SELECT started_at_ms FROM runs WHERE id = ?1",
+            rusqlite::params![&self.id],
+            |row| row.get(0),
+        )?;
+        Ok(ms.map(|m| Timestamp::from_millisecond(m).expect("valid timestamp")))
     }
 
-    /// Atomically write this run's `container.yml` record (temp file +
-    /// rename). Each call replaces the file; partial fields are
-    /// represented as `None` and skipped from the output.
-    pub fn write_container_record(&self, record: &ContainerRecord) -> Result<()> {
-        write_yaml(&self.path().join("container.yml"), record)
+    /// Read the `finished_at` timestamp for this run, if set.
+    pub fn read_finished_at(&self) -> Result<Option<Timestamp>> {
+        let db = crate::db::open(&self.db_path)?;
+        let ms: Option<i64> = db.query_row(
+            "SELECT finished_at_ms FROM runs WHERE id = ?1",
+            rusqlite::params![&self.id],
+            |row| row.get(0),
+        )?;
+        Ok(ms.map(|m| Timestamp::from_millisecond(m).expect("valid timestamp")))
     }
 }
 
@@ -670,12 +638,6 @@ pub(crate) fn write_yaml<T: serde::Serialize>(path: &Path, value: &T) -> Result<
     Ok(())
 }
 
-/// Read a deserializable value from a YAML file.
-fn read_yaml<T: serde::de::DeserializeOwned>(path: &Path) -> Result<T> {
-    let f = fs_err::File::open(path)?;
-    Ok(serde_yaml_ng::from_reader(std::io::BufReader::new(f))?)
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -684,11 +646,16 @@ mod tests {
     fn tmp_quire() -> (tempfile::TempDir, Quire) {
         let dir = tempfile::tempdir().expect("tempdir");
         let quire = Quire::new(dir.path().to_path_buf());
+        // Initialize the database.
+        let mut db = crate::db::open(&quire.db_path()).expect("init db");
+        crate::db::migrate(&mut db).expect("migrate db");
+        drop(db);
         (dir, quire)
     }
 
     fn test_runs(quire: &Quire) -> Runs {
-        Runs::new(quire.base_dir().join("runs").join("test.git"))
+        let base_dir = quire.base_dir().join("runs").join("test.git");
+        Runs::new(quire.db_path(), "test.git".to_string(), base_dir)
     }
 
     /// Materialize a workspace directory under the test Quire's base dir.
@@ -812,11 +779,17 @@ mod tests {
     }
 
     #[test]
-    fn run_state_dir_name() {
-        assert_eq!(RunState::Pending.dir_name(), "pending");
-        assert_eq!(RunState::Active.dir_name(), "active");
-        assert_eq!(RunState::Complete.dir_name(), "complete");
-        assert_eq!(RunState::Failed.dir_name(), "failed");
+    fn run_state_round_trips() {
+        for state in [
+            RunState::Pending,
+            RunState::Active,
+            RunState::Complete,
+            RunState::Failed,
+            RunState::Superseded,
+        ] {
+            assert!(state.as_str().parse::<RunState>().is_ok());
+        }
+        assert!("unknown".parse::<RunState>().is_err());
     }
 
     #[test]
@@ -829,70 +802,47 @@ mod tests {
     }
 
     #[test]
-    fn create_symlinks_latest() {
-        let (_dir, quire) = tmp_quire();
-        let runs = test_runs(&quire);
-        let mut run = runs.create(&test_meta()).expect("create");
-
-        let latest = runs.base.join("latest");
-        assert!(latest.is_symlink(), "latest should be a symlink");
-        let target = fs_err::read_link(&latest).expect("read link");
-        assert_eq!(
-            target,
-            PathBuf::from(RunState::Pending.dir_name()).join(run.id())
-        );
-        assert!(latest.exists(), "latest should resolve to a real directory");
-
-        // Symlink should follow through transitions.
-        run.transition(RunState::Active).expect("to active");
-        let target = fs_err::read_link(&latest).expect("read link");
-        assert_eq!(
-            target,
-            PathBuf::from(RunState::Active.dir_name()).join(run.id())
-        );
-        assert!(latest.exists(), "latest should resolve after transition");
-    }
-
-    #[test]
-    fn create_writes_files_in_pending() {
+    fn create_writes_row_in_pending_state() {
         let (_dir, quire) = tmp_quire();
         let runs = test_runs(&quire);
         let run = runs.create(&test_meta()).expect("create");
 
-        let path = run.path();
-        assert!(path.exists(), "run directory should exist");
-        assert!(path.join("meta.yml").exists());
-        assert!(path.join("times.yml").exists());
         assert_eq!(run.state(), RunState::Pending);
 
+        // Verify workspace directory was created.
+        let workspace = run.path().join("workspace");
+        assert!(workspace.exists(), "workspace directory should exist");
+
+        // Verify metadata round-trips through the DB.
         let meta = run.read_meta().expect("read meta");
         assert_eq!(meta.sha, "abc123");
 
-        let state = run.read_times().expect("read state");
-        assert!(state.started_at.is_none());
-        assert!(state.finished_at.is_none());
+        // No started_at yet.
+        let started = run.read_started_at().expect("read started_at");
+        assert!(started.is_none());
+        let finished = run.read_finished_at().expect("read finished_at");
+        assert!(finished.is_none());
     }
 
     #[test]
-    fn transition_moves_directory() {
+    fn transition_updates_state_in_db() {
         let (_dir, quire) = tmp_quire();
         let runs = test_runs(&quire);
         let mut run = runs.create(&test_meta()).expect("create");
         let id = run.id().to_string();
 
-        let old_path = run.path();
         run.transition(RunState::Active).expect("transition");
-
-        assert!(!old_path.exists(), "pending dir should be gone");
         assert_eq!(run.state(), RunState::Active);
 
-        let new_path = run.path();
-        assert!(new_path.exists(), "active dir should exist");
+        // Verify started_at was stamped.
+        let started = run.read_started_at().expect("read started_at");
+        assert!(started.is_some(), "started_at should be stamped");
 
-        // Meta is byte-identical after move.
-        let meta = run.read_meta().expect("read meta");
-        assert_eq!(meta.sha, "abc123");
-        assert_eq!(run.id(), id);
+        // Re-open the run and verify state persists.
+        let reopened =
+            Run::open(quire.db_path(), id.clone(), runs.base_dir.clone()).expect("reopen");
+        assert_eq!(reopened.state(), RunState::Active);
+        assert_eq!(reopened.id(), id);
     }
 
     #[test]
@@ -902,9 +852,9 @@ mod tests {
         let mut run = runs.create(&test_meta()).expect("create");
 
         run.transition(RunState::Active).expect("to active");
-        let times = run.read_times().expect("read state");
-        assert!(times.started_at.is_some(), "started_at should be stamped");
-        assert!(times.finished_at.is_none());
+        let started = run.read_started_at().expect("read started_at");
+        assert!(started.is_some(), "started_at should be stamped");
+        assert!(run.read_finished_at().expect("read").is_none());
     }
 
     #[test]
@@ -917,14 +867,12 @@ mod tests {
         completed
             .transition(RunState::Complete)
             .expect("to complete");
-        let times = completed.read_times().expect("read state");
-        assert!(times.finished_at.is_some());
+        assert!(completed.read_finished_at().expect("read").is_some());
 
         let mut failed = runs.create(&test_meta()).expect("create");
         failed.transition(RunState::Active).expect("to active");
         failed.transition(RunState::Failed).expect("to failed");
-        let failed_times = failed.read_times().expect("read state");
-        assert!(failed_times.finished_at.is_some());
+        assert!(failed.read_finished_at().expect("read").is_some());
     }
 
     #[test]
@@ -953,35 +901,13 @@ mod tests {
         let mut run = runs.create(&test_meta()).expect("create");
 
         run.transition(RunState::Active).expect("to active");
-        let active_times = run.read_times().expect("read state");
-        let started = active_times.started_at;
+        let started = run.read_started_at().expect("read started_at");
 
         run.transition(RunState::Complete).expect("to complete");
-        let complete_times = run.read_times().expect("read state");
-        assert_eq!(complete_times.started_at, started, "started_at preserved");
-    }
-
-    #[test]
-    fn transition_keeps_existing_started_at() {
-        let (_dir, quire) = tmp_quire();
-        let runs = test_runs(&quire);
-        let mut run = runs.create(&test_meta()).expect("create");
-
-        // Pre-stamp started_at before transitioning to Active.
-        let pre: Timestamp = "2026-04-28T12:00:00Z".parse().unwrap();
-        run.write_times(&RunTimes {
-            started_at: Some(pre),
-            finished_at: None,
-        })
-        .expect("write times");
-
-        run.transition(RunState::Active).expect("to active");
-
-        let times = run.read_times().expect("read times");
         assert_eq!(
-            times.started_at,
-            Some(pre),
-            "should keep pre-set started_at"
+            run.read_started_at().expect("read"),
+            started,
+            "started_at preserved"
         );
     }
 
@@ -995,19 +921,6 @@ mod tests {
         run.transition(RunState::Complete).expect("to complete");
 
         assert_eq!(run.state(), RunState::Complete);
-        assert!(run.path().exists());
-    }
-
-    #[test]
-    fn transition_errors_on_missing_source() {
-        let mut run = Run {
-            base: PathBuf::from("/tmp/quire-test-runs/test.git"),
-            state: RunState::Pending,
-            id: uuid::Uuid::now_v7().to_string(),
-        };
-
-        let result = run.transition(RunState::Active);
-        assert!(result.is_err());
     }
 
     #[test]
@@ -1039,6 +952,7 @@ mod tests {
         let (_dir, quire) = tmp_quire();
         let runs = test_runs(&quire);
         let mut run = runs.create(&test_meta()).expect("create");
+        run.transition(RunState::Active).expect("transition");
         run.transition(RunState::Complete).expect("transition");
 
         let orphans = runs.scan_orphans().expect("scan");
@@ -1046,69 +960,12 @@ mod tests {
     }
 
     #[test]
-    fn scan_orphans_quarantines_unreadable_runs() {
-        let (_dir, quire) = tmp_quire();
-        let base = quire.base_dir().join("runs").join("test.git");
-        let runs = Runs::new(base.clone());
-
-        // Create a run, then break it by removing meta.yml.
-        let run = runs.create(&test_meta()).expect("create");
-        let id = run.id().to_string();
-        fs_err::remove_file(run.path().join("meta.yml")).expect("remove meta");
-
-        let orphans = runs.scan_orphans().expect("scan");
-        assert!(orphans.is_empty(), "broken run should not be returned");
-
-        let pending = base.join(RunState::Pending.dir_name()).join(&id);
-        assert!(!pending.exists(), "broken run should leave pending/");
-
-        let failed = base.join(RunState::Failed.dir_name()).join(&id);
-        assert!(failed.exists(), "broken run should land in failed/");
-    }
-
-    #[test]
-    fn scan_orphans_empty_when_no_runs_dir() {
+    fn scan_orphans_empty_when_no_runs() {
         let (_dir, quire) = tmp_quire();
         let runs = test_runs(&quire);
         assert!(runs.scan_orphans().expect("scan").is_empty());
     }
 
-    #[test]
-    fn scan_orphans_skips_dot_prefixed_entries() {
-        let (_dir, quire) = tmp_quire();
-        let runs = test_runs(&quire);
-        let run = runs.create(&test_meta()).expect("create");
-
-        // Drop a dot-prefixed directory into pending/ alongside the real run.
-        let pending_dir = runs.base.join(RunState::Pending.dir_name());
-        fs_err::create_dir_all(pending_dir.join(".tmp-stale")).expect("mkdir dot");
-
-        let orphans = runs.scan_orphans().expect("scan");
-        assert_eq!(orphans.len(), 1);
-        assert_eq!(orphans[0].id(), run.id());
-    }
-
-    #[test]
-    fn write_times_updates_in_place() {
-        let (_dir, quire) = tmp_quire();
-        let runs = test_runs(&quire);
-        let run = runs.create(&test_meta()).expect("create");
-
-        let started: Timestamp = "2026-04-28T12:00:01Z".parse().expect("parse");
-        run.write_times(&RunTimes {
-            started_at: Some(started),
-            finished_at: None,
-        })
-        .expect("write state");
-
-        let loaded = run.read_times().expect("read state");
-        assert_eq!(loaded.started_at, Some(started));
-
-        // Meta is unchanged.
-        let loaded_meta = run.read_meta().expect("read meta");
-        assert_eq!(loaded_meta, test_meta());
-    }
-
     #[test]
     fn reconcile_completes_pending_orphans() {
         let (_dir, quire) = tmp_quire();
@@ -1119,10 +976,8 @@ mod tests {
         runs.reconcile_orphans().expect("reconcile");
 
         // Pending orphan should be moved to complete.
-        let completed = runs.base.join(RunState::Complete.dir_name()).join(&id);
-        assert!(completed.exists(), "orphan should be in complete/");
-        let pending = runs.base.join(RunState::Pending.dir_name()).join(&id);
-        assert!(!pending.exists(), "orphan should not be in pending/");
+        let reopened = Run::open(quire.db_path(), id, runs.base_dir.clone()).expect("reopen");
+        assert_eq!(reopened.state(), RunState::Complete);
     }
 
     #[test]
@@ -1135,9 +990,8 @@ mod tests {
 
         runs.reconcile_orphans().expect("reconcile");
 
-        // Active orphan should be moved to failed.
-        let failed = runs.base.join(RunState::Failed.dir_name()).join(&id);
-        assert!(failed.exists(), "orphan should be in failed/");
+        let reopened = Run::open(quire.db_path(), id, runs.base_dir.clone()).expect("reopen");
+        assert_eq!(reopened.state(), RunState::Failed);
     }
 
     fn load(source: &str) -> Pipeline {
@@ -1200,9 +1054,9 @@ mod tests {
             )
             .expect("execute");
 
-        // Verify the run landed in complete/ on disk.
-        let completed = runs.base.join(RunState::Complete.dir_name()).join(&run_id);
-        assert!(completed.exists(), "run should be in complete/");
+        // Verify the run landed in complete in the DB.
+        let reopened = Run::open(quire.db_path(), run_id, runs.base_dir.clone()).expect("reopen");
+        assert_eq!(reopened.state(), RunState::Complete);
 
         let a = &outputs["a"];
         let b = &outputs["b"];
@@ -1214,8 +1068,6 @@ mod tests {
 
     #[test]
     fn execute_runs_jobs_in_topo_order() {
-        // `b` depends on `a`, but the registration order puts `b` first.
-        // Topo-sorted execution must run `a` before `b`.
         let (_dir, quire) = tmp_quire();
         let runs = test_runs(&quire);
         let run = runs.create(&test_meta()).expect("create");
@@ -1267,9 +1119,9 @@ mod tests {
             .expect_err("expected failure");
         assert!(matches!(err, Error::JobFailed { ref job, .. } if job == "a"));
 
-        // Verify the run landed in failed/ on disk.
-        let failed = runs.base.join(RunState::Failed.dir_name()).join(&run_id);
-        assert!(failed.exists(), "run should be in failed/");
+        // Verify the run is failed in the DB.
+        let reopened = Run::open(quire.db_path(), run_id, runs.base_dir.clone()).expect("reopen");
+        assert_eq!(reopened.state(), RunState::Failed);
     }
 
     #[test]
@@ -1303,8 +1155,6 @@ mod tests {
 
     #[test]
     fn jobs_returns_quire_push_outputs_through_transitive_input() {
-        // `b` depends on `a` which depends on `:quire/push`; `b` reads
-        // `:quire/push` directly even though it's not a direct input.
         let (_dir, quire) = tmp_quire();
         let runs = test_runs(&quire);
         let run = runs.create(&test_meta()).expect("create");
@@ -1366,7 +1216,6 @@ mod tests {
 
     #[test]
     fn jobs_errors_on_non_ancestor_job() {
-        // `peer` exists as a job but isn't an ancestor of `grab`.
         let (_dir, quire) = tmp_quire();
         let runs = test_runs(&quire);
         let run = runs.create(&test_meta()).expect("create");
@@ -1432,7 +1281,6 @@ mod tests {
         let runs = test_runs(&quire);
         let run = runs.create(&test_meta()).expect("create");
 
-        // `a` does nothing, `b` reads `a`'s outputs — should get nil.
         let pipeline = load(
             r#"(local ci (require :quire.ci))
 (ci.job :a [:quire/push] (fn [_] nil))
@@ -1478,8 +1326,7 @@ mod tests {
         .expect("execute");
 
         let log_path = runs
-            .base
-            .join(RunState::Complete.dir_name())
+            .base_dir
             .join(&run_id)
             .join("jobs")
             .join("greet")
@@ -1518,8 +1365,8 @@ mod tests {
             Executor::Host,
         );
 
-        let failed_dir = runs.base.join(RunState::Failed.dir_name()).join(&run_id);
-        assert!(failed_dir.exists(), "run should be in failed/");
+        let failed_dir = runs.base_dir.join(&run_id);
+        assert!(failed_dir.exists(), "run directory should exist");
 
         let log_path = failed_dir.join("jobs").join("a").join("log.yml");
         assert!(
@@ -1633,28 +1480,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn container_record_round_trips_through_yaml() {
-        let (_dir, quire) = tmp_quire();
-        let runs = test_runs(&quire);
-        let run = runs.create(&test_meta()).expect("create");
-
-        let now: Timestamp = "2026-05-04T16:20:01Z".parse().expect("parse");
-        let later: Timestamp = "2026-05-04T16:21:09Z".parse().expect("parse");
-        let record = ContainerRecord {
-            image_tag: Some("quire-ci/test:run-id".into()),
-            container_id: Some("9f3b8a72c1d4".into()),
-            build_started_at: Some(now),
-            build_finished_at: Some(later),
-            container_started_at: Some(later),
-            container_stopped_at: None,
-        };
-        run.write_container_record(&record).expect("write");
-
-        let read = run.read_container_record().expect("read");
-        assert_eq!(read, record);
-    }
-
     #[test]
     fn repo_segment_returns_final_component() {
         assert_eq!(repo_segment(Path::new("runs/test.git")), "test.git");
@@ -1667,12 +1492,8 @@ mod tests {
 
     #[test]
     fn repo_segment_sanitizes_for_docker_tags() {
-        // Docker rejects tags whose component starts with `.` or `-` —
-        // tempdir names produced by `tempfile::tempdir()` start with `.`.
         assert_eq!(repo_segment(Path::new("/tmp/.tmpAbCdEf")), "tmpabcdef");
-        // Uppercase is rejected; lowercase fine.
         assert_eq!(repo_segment(Path::new("MyRepo.git")), "myrepo.git");
-        // Other invalid characters become underscores.
         assert_eq!(
             repo_segment(Path::new("repo with spaces")),
             "repo_with_spaces"
@@ -1686,7 +1507,6 @@ mod tests {
             return;
         }
 
-        // Build a real git repo with a Dockerfile committed at HEAD.
         let dir = tempfile::tempdir().expect("tempdir");
         let src_repo = dir.path().join("src");
         fs_err::create_dir_all(&src_repo).expect("mkdir src");
@@ -1746,9 +1566,6 @@ mod tests {
         let run = runs.create(&meta).expect("create");
         let run_id = run.id().to_string();
 
-        // Run `uname -s` inside the container. On macOS `uname -s`
-        // returns `Darwin`; getting `Linux` back proves the command
-        // ran inside the alpine container, not on the host.
         let pipeline = load(
             r#"(local ci (require :quire.ci))
 (ci.job :probe [:quire/push] (fn [{: sh}] (sh ["uname" "-s"])))"#,
@@ -1773,21 +1590,31 @@ mod tests {
             probe[0].stdout,
         );
 
-        // Verify container.yml was written with all fields.
-        let complete = runs.base.join(RunState::Complete.dir_name()).join(&run_id);
-        let record_path = complete.join("container.yml");
-        assert!(record_path.exists(), "container.yml should exist");
-        let record: ContainerRecord =
-            serde_yaml_ng::from_str(&fs_err::read_to_string(&record_path).unwrap()).unwrap();
-        assert!(record.image_tag.is_some());
-        assert!(record.container_id.is_some());
-        assert!(record.build_started_at.is_some());
-        assert!(record.build_finished_at.is_some());
-        assert!(record.container_started_at.is_some());
-        assert!(record.container_stopped_at.is_some());
+        // Verify container metadata was written to the DB.
+        let db = crate::db::open(&quire.db_path()).expect("open db");
+        let image_tag: Option<String> = db
+            .query_row(
+                "SELECT image_tag FROM runs WHERE id = ?1",
+                rusqlite::params![&run_id],
+                |row| row.get(0),
+            )
+            .expect("query");
+        assert!(image_tag.is_some(), "image_tag should be set");
+
+        let container_stopped_ms: Option<i64> = db
+            .query_row(
+                "SELECT container_stopped_at_ms FROM runs WHERE id = ?1",
+                rusqlite::params![&run_id],
+                |row| row.get(0),
+            )
+            .expect("query");
+        assert!(
+            container_stopped_ms.is_some(),
+            "container_stopped_at_ms should be set"
+        );
 
         // Cleanup the image we built.
-        if let Some(tag) = record.image_tag {
+        if let Some(tag) = image_tag {
             let _ = std::process::Command::new("docker")
                 .args(["image", "rm", &tag])
                 .output();
diff --git a/src/db.rs b/src/db.rs
new file mode 100644
index 0000000..39d6f9c
--- /dev/null
+++ b/src/db.rs
@@ -0,0 +1,57 @@
+//! Database connection management and migration runner.
+//!
+//! [`open`] creates a connection with WAL mode and foreign keys enabled.
+//! [`migrate`] runs pending migrations — call once at server startup.
+
+use std::path::Path;
+
+use rusqlite::Connection;
+use rusqlite_migration::{M, Migrations};
+
+/// The ordered set of schema migrations. Append-only — never edit
+/// a migration that has already shipped.
+static MIGRATIONS: std::sync::LazyLock<Migrations<'static>> = std::sync::LazyLock::new(|| {
+    Migrations::new(vec![M::up(include_str!("../migrations/0001_initial.sql"))])
+});
+
+/// Error from running migrations.
+#[derive(Debug, thiserror::Error)]
+pub enum MigrationError {
+    #[error(transparent)]
+    Sqlite(#[from] rusqlite::Error),
+    #[error("migration error: {0}")]
+    Migration(#[source] rusqlite_migration::Error),
+}
+
+impl From<rusqlite_migration::Error> for MigrationError {
+    fn from(e: rusqlite_migration::Error) -> Self {
+        Self::Migration(e)
+    }
+}
+
+/// Open the database at `path`, enable WAL mode and foreign keys.
+/// Creates the file if it doesn't exist.
+///
+/// Does not run migrations. Call [`migrate`] once at server startup.
+pub fn open(path: &Path) -> Result<Connection, rusqlite::Error> {
+    let conn = Connection::open(path)?;
+    conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA foreign_keys = ON;")?;
+    Ok(conn)
+}
+
+/// Run any pending migrations on the given connection.
+///
+/// Call once at server startup, after [`open`].
+pub fn migrate(conn: &mut Connection) -> Result<(), MigrationError> {
+    MIGRATIONS.to_latest(conn)?;
+    Ok(())
+}
+
+/// Open an in-memory database with migrations applied (for tests).
+#[cfg(test)]
+pub fn open_in_memory() -> Result<Connection, MigrationError> {
+    let mut conn = Connection::open_in_memory()?;
+    conn.execute_batch("PRAGMA foreign_keys = ON;")?;
+    MIGRATIONS.to_latest(&mut conn)?;
+    Ok(conn)
+}
diff --git a/src/error.rs b/src/error.rs
index b469ed9..dd6f57f 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -23,6 +23,9 @@ pub enum Error {
     #[error(transparent)]
     Secret(#[from] secret::Error),
 
+    #[error(transparent)]
+    Sql(#[from] rusqlite::Error),
+
     #[error(transparent)]
     Yaml(#[from] serde_yaml_ng::Error),
 
diff --git a/src/lib.rs b/src/lib.rs
index 69dc499..ab7b623 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,5 @@
 pub mod ci;
+pub mod db;
 mod error;
 pub mod event;
 pub mod fennel;
diff --git a/src/quire.rs b/src/quire.rs
index 29295be..c750661 100644
--- a/src/quire.rs
+++ b/src/quire.rs
@@ -129,8 +129,12 @@ impl Repo {
     }
 
     /// Access CI runs for this repo.
-    pub fn runs(&self) -> Runs {
-        Runs::new(self.runs_base())
+    pub fn runs(&self, db_path: &Path) -> Runs {
+        Runs::new(
+            db_path.to_path_buf(),
+            self.name().to_string(),
+            self.runs_base(),
+        )
     }
 }
 
@@ -167,6 +171,10 @@ impl Quire {
         self.base_dir.join("config.fnl")
     }
 
+    pub fn db_path(&self) -> PathBuf {
+        self.base_dir.join("quire.db")
+    }
+
     pub fn socket_path(&self) -> PathBuf {
         self.base_dir.join("server.sock")
     }