Switch CI run store from filesystem to SQLite
Replaces the directory-per-run state management (pending/active/complete/failed
directories with YAML sidecars) with a single SQLite database at
<quire-root>/quire.db. State transitions are now single UPDATE statements
inside transactions, orphan reconciliation is a single SQL pass, and container
lifecycle timestamps are tracked in the runs table.
db::open is the sole connection factory (WAL mode, foreign keys). db::migrate
runs pending migrations — called once at server startup, not on every open.
Runs and Run each open their own connection via db::open. Ci::runs is removed;
all callers use Repo::runs, which derives the runs base dir from the repo name
and quire root (single source of truth).
Run directories on disk persist only for workspace materialization and
per-job log files.
Schema: runs table with CHECK constraints enforcing the state machine,
jobs table for per-job state. Migrations via rusqlite_migration with
SQL files under migrations/.
Assisted-by: GLM-5.1 via pi
diff --git a/Cargo.lock b/Cargo.lock
index b19270d..242efe0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -34,7 +34,7 @@ dependencies = [
"bytestring",
"derive_more",
"encoding_rs",
- "foldhash",
+ "foldhash 0.1.5",
"futures-core",
"http 0.2.12",
"httparse",
@@ -129,7 +129,7 @@ dependencies = [
"cfg-if",
"derive_more",
"encoding_rs",
- "foldhash",
+ "foldhash 0.1.5",
"futures-core",
"futures-util",
"impl-more",
@@ -699,6 +699,18 @@ dependencies = [
"windows-sys 0.61.2",
]
+[[package]]
+name = "fallible-iterator"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
+
+[[package]]
+name = "fallible-streaming-iterator"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
+
[[package]]
name = "fastrand"
version = "2.4.1"
@@ -750,6 +762,12 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
+
[[package]]
name = "form_urlencoded"
version = "1.2.2"
@@ -905,7 +923,16 @@ version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
- "foldhash",
+ "foldhash 0.1.5",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+dependencies = [
+ "foldhash 0.2.0",
]
[[package]]
@@ -914,6 +941,15 @@ version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51"
+[[package]]
+name = "hashlink"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea0b22561a9c04a7cb1a302c013e0259cd3b4bb619f145b32f72b8b4bcbed230"
+dependencies = [
+ "hashbrown 0.16.1",
+]
+
[[package]]
name = "heck"
version = "0.5.0"
@@ -1376,6 +1412,17 @@ version = "0.2.186"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
+[[package]]
+name = "libsqlite3-sys"
+version = "0.37.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f111c8c41e7c61a49cd34e44c7619462967221a6443b0ec299e0ac30cfb9b1"
+dependencies = [
+ "cc",
+ "pkg-config",
+ "vcpkg",
+]
+
[[package]]
name = "linux-raw-sys"
version = "0.12.1"
@@ -2023,6 +2070,8 @@ dependencies = [
"petgraph",
"predicates",
"regex",
+ "rusqlite",
+ "rusqlite_migration",
"sentry",
"sentry-tracing",
"serde",
@@ -2186,6 +2235,41 @@ dependencies = [
"windows-sys 0.52.0",
]
+[[package]]
+name = "rsqlite-vfs"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8a1f2315036ef6b1fbacd1972e8ee7688030b0a2121edfc2a6550febd41574d"
+dependencies = [
+ "hashbrown 0.16.1",
+ "thiserror",
+]
+
+[[package]]
+name = "rusqlite"
+version = "0.39.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0d2b0146dd9661bf67bb107c0bb2a55064d556eeb3fc314151b957f313bcd4e"
+dependencies = [
+ "bitflags",
+ "fallible-iterator",
+ "fallible-streaming-iterator",
+ "hashlink",
+ "libsqlite3-sys",
+ "smallvec",
+ "sqlite-wasm-rs",
+]
+
+[[package]]
+name = "rusqlite_migration"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "410e4d2d97ff816796ed012b789c7381ae42c09a809822a75d29a01022181184"
+dependencies = [
+ "log",
+ "rusqlite",
+]
+
[[package]]
name = "rustc-demangle"
version = "0.1.27"
@@ -2652,6 +2736,18 @@ dependencies = [
"windows-sys 0.61.2",
]
+[[package]]
+name = "sqlite-wasm-rs"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b2c760607300407ddeaee518acf28c795661b7108c75421303dbefb237d3a36"
+dependencies = [
+ "cc",
+ "js-sys",
+ "rsqlite-vfs",
+ "wasm-bindgen",
+]
+
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
@@ -3164,6 +3260,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
[[package]]
name = "wait-timeout"
version = "0.2.1"
diff --git a/Cargo.toml b/Cargo.toml
index 4df289c..cd784ed 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,6 +17,8 @@ miette = { version = "*", features = ["fancy"] }
mlua = { version = "*", features = ["lua54", "serde", "vendored", "error-send"] }
regex = "*"
petgraph = "*"
+rusqlite = { version = "*", features = ["bundled"] }
+rusqlite_migration = "*"
sentry = { version = "*", features = ["backtrace", "contexts", "debug-images", "panic", "release-health", "reqwest", "rustls", "tokio"], default-features = false }
sentry-tracing = "*"
serde = { version = "*", features = ["derive"] }
diff --git a/docs/CI.md b/docs/CI.md
index 0a74a2d..48e676d 100644
--- a/docs/CI.md
+++ b/docs/CI.md
@@ -26,31 +26,33 @@ The runner doesn't get its own process because **it doesn't execute user code in
Within the host process, `(sh ...)` is the only sanctioned host-effect primitive in the Lua VM. See "Sandbox the in-process VM" below — the compile-then-execute split removes `io`/`os`/`debug` from the execute VM so a buggy or hostile ci.fnl can't bypass the chokepoint.
-## Communication: filesystem as state of record, channels as optimization
+## Communication: SQLite as state of record, channels as optimization
-Run records on disk are the **durable truth** once written. The hook is a thin transport: it sends a push event over a Unix socket to `quire serve`, which is the sole writer of run records on disk.
+Run records in SQLite are the **durable truth** once written. The hook is a thin transport: it sends a push event over a Unix socket to `quire serve`, which is the sole writer of run records.
-| Component | Reads from disk | Writes to disk | In-memory comms |
+| Component | Reads from | Writes to | In-memory comms |
| --- | --- | --- | --- |
| Hook (`post-receive`) | — | — | push event → `quire serve` socket listener |
-| Runner (in-process with `quire serve`) | run records on startup | `meta.json`, `state.json`, `jobs/*/`, logs | wakeup from listener (mpsc); broadcast logs → web |
-| Web (`quire serve`) | run records on demand | — | subscribe to log broadcasts |
+| Runner (in-process with `quire serve`) | SQLite on startup | SQLite, `jobs/*/`, logs | wakeup from listener (Notify); broadcast logs → web |
+| Web (`quire serve`) | SQLite on demand | — | subscribe to log broadcasts |
-The listener task (also inside `quire serve`) bridges the hook process boundary to the in-process runner. It binds `/var/quire/server.sock` on startup, parses incoming events, writes the initial run record, and signals the runner via mpsc. The wakeup signal carries no payload — the runner re-derives state by scanning `pending/`, so missed or duplicated wakes are idempotent.
+The listener task (also inside `quire serve`) bridges the hook process boundary to the in-process runner. It binds `/var/quire/server.sock` on startup, parses incoming events, inserts the initial run row, and signals the runner. The wakeup signal carries no payload — the runner queries SQLite for the next pending run, so missed or duplicated wakes are idempotent.
-On startup, the runner walks `runs/pending/` and `runs/active/`, reconstructs the queue, and reconciles orphans (any `active/` entry whose container is no longer running gets marked failed). Crash resilience covers `quire serve` restart: any run record committed before the crash gets picked up.
+On startup, the runner reconciles orphans: any `active` row whose container is no longer running gets marked `failed`. Crash resilience covers `quire serve` restart: any run row committed before the crash gets picked up.
-**v1 limitation: zero-loss-on-server-down is not provided.** If `quire serve` is down at push time, the hook's socket connect fails, the pusher sees a stderr warning, and no run is created. The push itself remains accepted by git (post-receive runs after acceptance). The v1 mitigation is "run `quire serve` under a supervisor that restarts it"; a hook fallback that writes `meta.json` directly when the socket is unreachable is a deferred follow-up if this ever bites in practice.
+**v1 limitation: zero-loss-on-server-down is not provided.** If `quire serve` is down at push time, the hook's socket connect fails, the pusher sees a stderr warning, and no run is created. The push itself remains accepted by git (post-receive runs after acceptance). The v1 mitigation is "run `quire serve` under a supervisor that restarts it"; a hook fallback that inserts directly into SQLite when the socket is unreachable is a deferred follow-up if this ever bites in practice.
-The "we could one day extract the runner into its own process" door stays open: the on-disk schema doesn't change, the listener-to-runner mpsc becomes a Unix socket. Not building it now.
+The "we could one day extract the runner into its own process" door stays open: the SQLite schema doesn't change, the listener-to-runner wakeup becomes a Unix socket. Not building it now.
-## Storage: no database
+## Storage: SQLite
-No SQLite in v1. Run records, job state, logs, and the queue all live as files under `/var/quire/runs/`. None of the queries the v1 web UI wants are slow at this scale; the in-memory queue handles low-latency enqueue/dequeue.
+Run state, job state, and the run queue live in a single SQLite database at `<quire-data-root>/quire.db`. The database is the primary store for all run lifecycle data. The filesystem holds per-run workspaces and per-job log files only.
-**The commitment, written down so it doesn't drift:** if SQLite ever earns its keep — most likely trigger is FTS5 over logs — it enters as a **secondary index over the filesystem**, never as a primary store. Files remain canonical. The database is rebuildable: `quire reindex` walks `runs/` and repopulates. If the database is corrupted or lost, recovery is mechanical. The rule: `rm /var/quire/quire.db && quire reindex` returns the system to a working state. If that ever stops being true, the database has crossed a line it shouldn't have.
+The database is project-scoped, not CI-scoped, even though the only tables today are CI tables. Future tables (config snapshots, hook event audit, etc.) live in the same file.
-This is the principle that prevents drift. The temptation to migrate state into SQLite ("just this one thing, it's so much easier") is constant once it exists; without the rule written down, the second time you reach for SQLite you'll have forgotten why you originally said no.
+Migrations are SQL files under `migrations/` at the project root, embedded into the binary via `include_str!`. `rusqlite_migration` tracks `PRAGMA user_version` and applies missing migrations transactionally. Each future schema change adds a new file (`0002_*.sql`, `0003_*.sql`, …) and a corresponding `M::up` entry. Files are append-only — never edit a migration that has already shipped.
+
+SQLite is the queue: `quire serve` finds the next pending run with a `SELECT`, not by scanning directories. The wakeup signal stays an in-process `tokio::sync::Notify` — used only to nudge the runner, not to carry data.
## Concurrency: max one run at a time
@@ -146,28 +148,30 @@ The reason this is the chosen path rather than "subprocess + rlimit, no bwrap"
## Run lifecycle
-1. **`post-receive` hook** sends a push event (one JSON line: `{type, repo, pushed_at, refs: [{ref, old_sha, new_sha}, ...]}`) over `/var/quire/server.sock` and exits. The listener task in `quire serve` parses the event, allocates a run-id per ref, writes `runs/<repo>/<run-id>/{meta.json, state.json}`, and signals the runner via mpsc. No CI work runs in the hook itself.
-2. **Runner picks up** the entry from the queue. Atomic rename `pending/<id>` → `active/<id>` for state-machine clarity.
+1. **`post-receive` hook** sends a push event (one JSON line: `{type, repo, pushed_at, refs: [{ref, old_sha, new_sha}, ...]}`) over `/var/quire/server.sock` and exits. The listener task in `quire serve` parses the event, allocates a run-id per ref, inserts a row into `runs` in `pending` state, and signals the runner. No CI work runs in the hook itself.
+2. **Runner picks up** the entry from the queue. Single `UPDATE runs SET state = 'active'` in SQLite.
3. **Materialize workspace.** `git --git-dir=repos/foo.git archive <sha> | tar -x -C workspace/`. No worktree, no checkout state on the bare repo. Workspace is throwaway; deleted at end of run.
4. **Evaluate `.quire/ci.fnl`** in the host process (see above). Pipeline image is read from the `(ci.image ...)` registration; jobs are registered via `(ci.job ...)`; the run-fns are not yet invoked.
-5. **Start the run container.** `docker run -d --rm --mount type=bind,src=<run-dir>,dst=/work -w /work <image> sleep infinity`. Container ID stowed on the runtime. The run's container hosts every `(sh ...)` call from every job in the run.
-6. **Per ready job:** invoke its run-fn in topological order. Each `(sh ...)` call inside the run-fn issues `docker exec` (no TTY) into the run container, streams stdout/stderr into `jobs/<job-id>/log.jsonl` as JSONL events (one per `sh-start`, `stdout`/`stderr`, `sh-exit`), and returns `{exit, stdout, stderr, cmd}` to Lua. Container-level events (`container-start`, `container-died`, `container-end`) go into the run's own `<run-dir>/log.jsonl`.
-7. **Tear down the run container.** `docker stop` + `docker rm`. Even on error paths — no orphaned containers if a run-fn errors.
-8. **Aggregate.** Write final status to the run directory. Move `active/<id>` → `complete/<id>` (or `failed/<id>`).
+5. **Start the run container.** `docker run -d --rm --mount type=bind,src=<run-dir>,dst=/work -w /work <image> sleep infinity`. Container ID written to the `runs` row. The run's container hosts every `(sh ...)` call from every job in the run.
+6. **Per ready job:** invoke its run-fn in topological order. Each `(sh ...)` call inside the run-fn issues `docker exec` (no TTY) into the run container, captures stdout/stderr and exit code, and returns `{exit, stdout, stderr, cmd}` to Lua.
+7. **Tear down the run container.** `docker stop` + `docker rm`. Even on error paths — no orphaned containers if a run-fn errors. `container_stopped_at_ms` written to the `runs` row.
+8. **Aggregate.** Write final status via `UPDATE runs SET state = 'complete'` (or `'failed'`). Per-job logs are written to `jobs/<job-id>/log.yml` on disk before the final transition.
## Run record schema
```
+quire.db
+ runs table: id, repo, ref_name, sha, pushed_at_ms, state, failure_kind,
+ queued_at_ms, started_at_ms, finished_at_ms, container_id,
+ image_tag, build_started_at_ms, build_finished_at_ms,
+ container_started_at_ms, container_stopped_at_ms, workspace_path
+ jobs table: run_id, job_id, state, exit_code, started_at_ms, finished_at_ms
+
runs/<repo>/<run-id>/
- meta.json # immutable: sha, ref, pusher, pushed_at
- state.json # mutable: status, started_at, finished_at, runner_pid, container_id
- log.jsonl # per-run events: container-start, container-died, container-end
+ workspace/ # materialized checkout
jobs/
<job-id>/
- spec.json # immutable: inputs, registration source location
- state.json # mutable: status, started_at, finished_at, outputs
- log.jsonl # per-job events: sh-start, stdout, stderr, sh-exit
- cancel # touch-file; runner checks before each job
+ log.yml # per-job sh output logs
```
Two principles fall out:
@@ -246,7 +250,7 @@ Punt on cache invalidation until it actually annoys. "Delete the cache dir" is a
## Locked-in decisions
* **Runner is in-process** with `quire serve` as a tokio task; not a separate process. Filesystem is the state of record; channels are the wakeup optimization.
-* **No SQLite in v1.** If it enters later, it's a secondary index over the filesystem, never primary. `rm quire.db && quire reindex` must always recover.
+* **SQLite is the primary store for run and job state.** Migrations under `migrations/`, embedded into the binary. The filesystem holds workspaces and per-job log files only.
* **Per-run container**, not per-job and not long-lived runners. One `docker run` at run start, `docker exec` per `(sh ...)` call from each job, `docker stop` at run end. Per-job container differentiation is a deferred extension.
* **`(sh ...)` is the only host-effect primitive in the Lua VM.** No `(container ...)` primitive. The execute VM is hardened (no `io`/`os`/`debug`) so `sh` becomes the documented chokepoint — every effect is auditable, persistable, redactable in one place.
* **Pipeline-level image declaration via `(ci.image ...)`.** Single image per pipeline; per-job override deferred until pipelines actually need heterogeneity.
diff --git a/docs/PLAN.md b/docs/PLAN.md
index 8e7cd05..abb6a61 100644
--- a/docs/PLAN.md
+++ b/docs/PLAN.md
@@ -46,6 +46,7 @@ One volume mounted into the container:
```
/var/quire/
+ quire.db # SQLite database
repos/
foo.git/
quire/
@@ -54,10 +55,11 @@ One volume mounted into the container:
quire/...
runs/
<repo>/<run-id>/
- meta.fnl status, ref, sha, pipeline source, timings
- log streamed stdout/stderr
- artifacts/
- config.fnl global config
+ workspace/ # materialized checkout
+ jobs/
+ <job-id>/
+ log.yml # per-job sh output logs
+ config.fnl # global config
```
Per-repo config (`public_runs`, etc.) is checked into the repo at `.quire/config.fnl`, not stored in the bare repo's `quire/` directory. The `quire/` directory holds only generated artifacts.
diff --git a/docs/superpowers/plans/2026-05-06-ci-sqlite-migration.md b/docs/superpowers/plans/2026-05-06-ci-sqlite-migration.md
new file mode 100644
index 0000000..3c32c05
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-06-ci-sqlite-migration.md
@@ -0,0 +1,913 @@
+# CI SQLite Migration Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Replace the filesystem-backed CI run store with a single SQLite database, preserving the existing CI lifecycle (trigger → execute → complete/fail).
+
+**Architecture:** A `src/db.rs` module owns the SQLite connection, migrations, and schema. `Runs` and `Run` structs query and mutate the DB instead of managing directories. State directories (`pending/`, `active/`, `complete/`, `failed/`) and per-run YAML sidecars are removed. Run directories persist only for workspace materialization and log storage.
+
+**Tech Stack:** `rusqlite`, `rusqlite_migration`, `include_str!` for embedded SQL migrations.
+
+**Design doc:** `docs/plans/2026-05-06-ci-sqlite-migration-design.md`
+
+---
+
+## File structure
+
+| Action | File | Responsibility |
+|--------|------|---------------|
+| Create | `migrations/0001_initial.sql` | Schema DDL for `runs` and `jobs` tables |
+| Create | `src/db.rs` | Connection management, WAL mode, migration runner |
+| Modify | `Cargo.toml` | Add `rusqlite`, `rusqlite_migration` dependencies |
+| Modify | `src/lib.rs` | Export `db` module |
+| Modify | `src/ci/run.rs` | Rewrite `Runs` and `Run` to use SQLite; remove `write_yaml`/`read_yaml` helpers |
+| Modify | `src/ci/mod.rs` | Update `trigger_ref` to pass DB conn; remove old filesystem paths |
+| Modify | `src/ci/error.rs` | Add `Sql` error variant, remove `Yaml` variant |
+| Modify | `src/ci/runtime.rs` | Update `DockerLifecycle` to use DB for container record writes |
+| Modify | `src/ci/docker.rs` | No changes expected (shell-out layer) |
+| Modify | `src/quire.rs` | Add `db()` method returning a DB handle; remove `Runs` convenience methods that are now DB-scoped |
+| Modify | `src/bin/quire/server.rs` | Open DB on startup; pass to orphan reconciliation |
+| Modify | `src/bin/quire/commands/ci.rs` | Use DB for `ci run` command |
+| Modify | `docs/CI.md` | Update storage section, layout, lifecycle description |
+
+---
+
+## Task 1: Add dependencies and create migration file
+
+**Files:**
+- Create: `migrations/0001_initial.sql`
+- Modify: `Cargo.toml`
+
+- [ ] **Step 1: Add rusqlite and rusqlite_migration to Cargo.toml**
+
+Add to `[dependencies]` in `Cargo.toml`:
+
+```toml
+rusqlite = { version = "*", features = ["bundled"] }
+rusqlite_migration = "*"
+```
+
+- [ ] **Step 2: Create the initial migration file**
+
+Create `migrations/0001_initial.sql` with the schema from the design doc:
+
+```sql
+CREATE TABLE runs (
+ id TEXT PRIMARY KEY,
+ repo TEXT NOT NULL,
+ ref_name TEXT NOT NULL,
+ sha TEXT NOT NULL,
+ pushed_at_ms INTEGER NOT NULL,
+ state TEXT NOT NULL,
+ failure_kind TEXT,
+ queued_at_ms INTEGER NOT NULL,
+ started_at_ms INTEGER,
+ finished_at_ms INTEGER,
+ container_id TEXT,
+ workspace_path TEXT NOT NULL,
+
+ CHECK (state IN ('pending', 'active', 'complete', 'failed', 'superseded')),
+
+ CHECK (started_at_ms IS NULL OR started_at_ms >= queued_at_ms),
+ CHECK (finished_at_ms IS NULL OR finished_at_ms >= queued_at_ms),
+ CHECK (finished_at_ms IS NULL OR started_at_ms IS NULL
+ OR finished_at_ms >= started_at_ms),
+
+ CHECK (CASE state
+ WHEN 'pending' THEN started_at_ms IS NULL AND finished_at_ms IS NULL AND container_id IS NULL
+ WHEN 'active' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NULL
+ WHEN 'complete' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL AND container_id IS NULL
+ WHEN 'failed' THEN finished_at_ms IS NOT NULL AND container_id IS NULL
+ WHEN 'superseded' THEN finished_at_ms IS NOT NULL AND container_id IS NULL
+ END)
+);
+
+CREATE INDEX runs_repo_pushed_at ON runs(repo, pushed_at_ms DESC);
+CREATE INDEX runs_state ON runs(state);
+
+CREATE TABLE jobs (
+ run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
+ job_id TEXT NOT NULL,
+ state TEXT NOT NULL,
+ exit_code INTEGER,
+ started_at_ms INTEGER,
+ finished_at_ms INTEGER,
+
+ CHECK (state IN ('pending', 'active', 'complete', 'failed', 'skipped', 'aborted')),
+
+ CHECK (started_at_ms IS NULL OR finished_at_ms IS NULL
+ OR finished_at_ms >= started_at_ms),
+
+ CHECK (CASE state
+ WHEN 'pending' THEN started_at_ms IS NULL AND finished_at_ms IS NULL
+ WHEN 'active' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NULL
+ WHEN 'complete' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL
+ WHEN 'failed' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL
+ WHEN 'skipped' THEN started_at_ms IS NULL AND finished_at_ms IS NOT NULL
+ WHEN 'aborted' THEN finished_at_ms IS NOT NULL
+ END),
+
+ PRIMARY KEY (run_id, job_id)
+);
+```
+
+- [ ] **Step 3: Verify it compiles**
+
+Run: `cargo check --workspace`
+Expected: compiles (dependencies resolve; no code uses them yet)
+
+- [ ] **Step 4: Commit**
+
+```
+Add rusqlite dependencies and initial schema migration
+```
+
+---
+
+## Task 2: Create `src/db.rs` — connection management and migration runner
+
+**Files:**
+- Create: `src/db.rs`
+- Modify: `src/lib.rs`
+
+- [ ] **Step 1: Create `src/db.rs`**
+
+```rust
+//! Database connection management and migration runner.
+//!
+//! Owns the SQLite connection, WAL mode pragma, foreign key enforcement,
+//! and the ordered list of migrations. Callers borrow a connection handle
+//! from [`open`] rather than opening their own.
+
+use std::path::Path;
+use std::sync::LazyLock;
+
+use rusqlite::Connection;
+use rusqlite_migration::{Migrations, M};
+
+use crate::error::Error;
+
+/// The ordered set of schema migrations. Append-only — never edit
+/// a migration that has already shipped.
+static MIGRATIONS: LazyLock<Migrations<'static>> = LazyLock::new(|| {
+ Migrations::new(vec![
+ M::up(include_str!("../migrations/0001_initial.sql")),
+ ])
+});
+
+/// Open the database at `path`, enable WAL mode and foreign keys,
+/// and run any pending migrations. Creates the file if it doesn't
+/// exist.
+pub fn open(path: &Path) -> Result<Connection, Error> {
+ let mut conn = Connection::open(path)?;
+ conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA foreign_keys = ON;")?;
+ MIGRATIONS.to_latest(&mut conn)?;
+ Ok(conn)
+}
+
+/// Open an in-memory database (for tests). Same pragmas and
+/// migrations as the on-disk version.
+#[cfg(test)]
+pub fn open_in_memory() -> Result<Connection, Error> {
+ let mut conn = Connection::open_in_memory()?;
+ conn.execute_batch("PRAGMA foreign_keys = ON;")?;
+ MIGRATIONS.to_latest(&mut conn)?;
+ Ok(conn)
+}
+```
+
+- [ ] **Step 2: Export the module from `src/lib.rs`**
+
+Add `pub mod db;` to `src/lib.rs`.
+
+- [ ] **Step 3: Add `Sql` error variant to `src/error.rs`**
+
+Add `rusqlite` error conversion. In `src/error.rs`:
+
+```rust
+#[error(transparent)]
+Sql(#[from] rusqlite::Error),
+```
+
+And in `src/ci/error.rs`:
+
+```rust
+#[error(transparent)]
+Sql(#[from] rusqlite::Error),
+```
+
+Also remove the `Yaml` and `Utf8` variants from `src/ci/error.rs` once nothing uses them (will clean up in Task 3).
+
+- [ ] **Step 4: Verify it compiles**
+
+Run: `cargo check --workspace`
+
+- [ ] **Step 5: Commit**
+
+```
+Add db module with SQLite connection management and migrations
+```
+
+---
+
+## Task 3: Rewrite `Runs` and `Run` to use SQLite
+
+This is the core of the migration. The `Runs` struct owns a DB connection (or a path to the DB file) and a base path for run directories (workspace + logs). `Run` owns a connection and a run ID.
+
+**Files:**
+- Modify: `src/ci/run.rs`
+- Modify: `src/ci/error.rs`
+
+### Key changes to `src/ci/run.rs`
+
+**Struct changes:**
+
+- `Runs` now holds: `db: rusqlite::Connection`, `repo: String`, `base_dir: PathBuf` (for run directories)
+- `Run` now holds: `db: rusqlite::Connection`, `id: String`, `repo: String`, `base_dir: PathBuf`
+- `RunState` gains `Superseded` variant
+- Remove `RunMeta`, `RunTimes`, `ContainerRecord` as persistence types — their fields map to columns
+- Keep `RunMeta` as an in-memory input type for `Runs::create` (callers still pass sha/ref/pushed_at)
+- Remove `write_yaml` / `read_yaml` helpers
+
+**`Runs::create` changes:**
+
+```sql
+INSERT INTO runs (id, repo, ref_name, sha, pushed_at_ms, state,
+ queued_at_ms, workspace_path)
+VALUES (?, ?, ?, ?, ?, 'pending', ?, ?);
+```
+
+The `workspace_path` is `<base_dir>/<id>/workspace`. The run directory is created at create time.
+
+**`Run::transition` changes:**
+
+```sql
+UPDATE runs SET state = ?, started_at_ms = ?, finished_at_ms = ?, container_id = NULL
+WHERE id = ?;
+```
+
+Single UPDATE in a transaction. No directory renames. Timestamps stamped as in the current code.
+
+**`Run::read_meta` / `read_times` / `write_times` changes:**
+
+Replaced by direct column reads from the `runs` row. Expose accessor methods instead of returning structs:
+
+- `Run::sha()`, `Run::ref_name()`, `Run::pushed_at_ms()` — read from DB
+- `Run::started_at_ms()`, `Run::finished_at_ms()` — read from DB
+- `Run::state()` — cached from last query or read fresh
+
+**`Run::read_container_record` / `write_container_record` changes:**
+
+`container_id` is a column on `runs`. The container timestamps (build_started_at, etc.) are not in the current schema — they can be added to the `runs` table in a follow-up migration. For now, keep writing `container.yml` as a file in the run directory for the container lifecycle timestamps, and only track `container_id` in the DB. This is consistent with the design doc's schema which only has `container_id`.
+
+**`DockerLifecycle` changes:**
+
+`record_path` still points to `<run-dir>/container.yml` for the container timestamps. The `container_id` is also written to the DB when it's set. The `Drop` impl continues to write `container_stopped_at` to the YAML file.
+
+**`Runs::scan_orphans` changes:**
+
+```sql
+SELECT id, state FROM runs WHERE state IN ('pending', 'active') AND repo = ?;
+```
+
+No more directory scanning. Quarantine concept goes away (unreadable runs were a filesystem artifact).
+
+**`Runs::reconcile_orphans` changes:**
+
+```sql
+UPDATE runs SET state = 'failed', finished_at_ms = ?, container_id = NULL, failure_kind = 'orphaned'
+WHERE state = 'active' AND repo = ?;
+```
+
+For pending orphans, the design doc says `umykvluw` lands separately. Current behavior transitions them to `complete`. Keep that behavior for now but use the DB:
+
+```sql
+UPDATE runs SET state = 'complete', finished_at_ms = ?, container_id = NULL
+WHERE state = 'pending' AND repo = ?;
+```
+
+**`Run::path` changes:**
+
+Returns `<base_dir>/<id>/` — the run directory for workspace and logs. No state subdirectory.
+
+**`Run::update_latest` changes:**
+
+Removed entirely. The `latest` symlink was a filesystem workaround; the DB query `SELECT id FROM runs WHERE repo = ? ORDER BY queued_at_ms DESC LIMIT 1` replaces it.
+
+**`Run::write_all_logs` changes:**
+
+Stays the same — writes YAML log files under `<run-dir>/jobs/<job-id>/log.yml`. Logs live on disk per the design doc.
+
+- [ ] **Step 1: Write the new `RunState` with `Superseded` variant and accessor methods**
+
+Update `RunState` to include `Superseded`:
+
+```rust
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum RunState {
+ Pending,
+ Active,
+ Complete,
+ Failed,
+ Superseded,
+}
+
+impl RunState {
+ pub fn as_str(&self) -> &'static str {
+ match self {
+ RunState::Pending => "pending",
+ RunState::Active => "active",
+ RunState::Complete => "complete",
+ RunState::Failed => "failed",
+ RunState::Superseded => "superseded",
+ }
+ }
+
+ pub fn from_str(s: &str) -> Option<Self> {
+ match s {
+ "pending" => Some(RunState::Pending),
+ "active" => Some(RunState::Active),
+ "complete" => Some(RunState::Complete),
+ "failed" => Some(RunState::Failed),
+ "superseded" => Some(RunState::Superseded),
+ _ => None,
+ }
+ }
+}
+```
+
+Remove `dir_name()`.
+
+- [ ] **Step 2: Rewrite `Runs` struct**
+
+```rust
+pub struct Runs {
+ db: rusqlite::Connection,
+ repo: String,
+ base_dir: PathBuf,
+}
+```
+
+`base_dir` is `<quire-root>/runs/<repo>/`. Run directories live at `<base_dir>/<id>/`.
+
+Update constructor:
+
+```rust
+impl Runs {
+ pub fn new(db: rusqlite::Connection, repo: String, base_dir: PathBuf) -> Self {
+ Self { db, repo, base_dir }
+ }
+}
+```
+
+- [ ] **Step 3: Rewrite `Runs::create`**
+
+```rust
+pub fn create(&self, meta: &RunMeta) -> Result<Run> {
+ let id = uuid::Uuid::now_v7().to_string();
+ let workspace_path = self.base_dir.join(&id).join("workspace");
+
+ self.db.execute(
+ "INSERT INTO runs (id, repo, ref_name, sha, pushed_at_ms, state, queued_at_ms, workspace_path)
+ VALUES (?1, ?2, ?3, ?4, ?5, 'pending', ?6, ?7)",
+ rusqlite::params![
+ &id,
+ &self.repo,
+ &meta.r#ref,
+ &meta.sha,
+ meta.pushed_at.as_millisecond(),
+ jiff::Timestamp::now().as_millisecond(),
+ workspace_path.to_str().ok_or_else(|| std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ "workspace path is not valid UTF-8",
+ ))?,
+ ],
+ )?;
+
+ // Create run directory for workspace and logs.
+ fs_err::create_dir_all(&workspace_path)?;
+
+ Ok(Run {
+ id,
+ repo: self.repo.clone(),
+ base_dir: self.base_dir.clone(),
+ state: RunState::Pending,
+ })
+}
+```
+
+Note: `Run` caches `state` in memory to avoid a round-trip after creation. It also needs a way to execute DB statements. Two options:
+- (A) `Run` clones the connection or holds a reference
+- (B) `Run` takes `&Connection` on each method call
+
+Option (B) is cleaner for borrowing but makes `Run::execute` harder since it consumes `self`. Go with option (A): `Run` holds its own `rusqlite::Connection`. SQLite allows multiple connections to the same file in WAL mode. Alternatively, since `rusqlite::Connection` is not `Clone`, `Run` can take ownership of the connection from `Runs`.
+
+Actually, the simplest approach: `Runs` holds the DB path (not a connection), and each method opens a short-lived connection. Or better: pass `&Connection` to each method. Since `Run::execute` needs to do many operations, it should hold its own connection.
+
+Let me reconsider: `Runs` creates `Run` objects. The caller (trigger_ref, server startup) has a connection. Let's make `Runs` hold a `&Connection` lifetime... but that gets messy with ownership.
+
+Simplest correct approach: `Runs` owns a `Connection`. `Run` borrows `&Connection`. But `Run::execute` consumes `self` and the pipeline, and the runtime needs its own state...
+
+Final decision: `Run` holds its own `rusqlite::Connection`. It opens a new connection to the same DB file. This is standard SQLite practice — multiple connections in WAL mode are fine. `Runs` holds the DB path and opens connections as needed.
+
+Update:
+
+```rust
+pub struct Runs {
+ db_path: PathBuf,
+ repo: String,
+ base_dir: PathBuf,
+}
+
+impl Runs {
+ pub fn new(db_path: PathBuf, repo: String, base_dir: PathBuf) -> Self {
+ Self { db_path, repo, base_dir }
+ }
+
+ fn conn(&self) -> Result<rusqlite::Connection> {
+ let conn = rusqlite::Connection::open(&self.db_path)?;
+ conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA foreign_keys = ON;")?;
+ Ok(conn)
+ }
+
+ pub fn create(&self, meta: &RunMeta) -> Result<Run> {
+ let conn = self.conn()?;
+ // ... insert ...
+ Run::open(conn, self.repo.clone(), self.base_dir.clone(), &id)
+ }
+}
+```
+
+Hmm, but opening a new connection per operation is wasteful. Let me think about this differently.
+
+Better approach: `Runs` holds a `rusqlite::Connection`. `Run::execute` is where the long-lived operation happens. Before `execute`, `Run` can get its own connection (or we pass one in). For the simpler methods (transition, read_meta, etc.), `Run` can borrow from... somewhere.
+
+Actually, the cleanest solution: make `Run` hold a `rusqlite::Connection` that it receives at construction. `Runs` opens a connection in `create` and transfers it to the new `Run`. For methods that don't consume `self` (like `read_meta`, `transition`), `Run` uses its owned connection. For `execute`, it already owns one.
+
+But then `Runs` needs a new connection for each `create` call. Unless `Runs` doesn't hold a connection at all — it holds the db path.
+
+Let me look at how `Runs` is used:
+
+1. `Runs::new(base)` — constructed with a path
+2. `runs.create(&meta)` — returns a `Run`
+3. `runs.scan_orphans()` — returns `Vec<Run>`
+4. `runs.reconcile_orphans()` — internally iterates
+
+And `Run` is used:
+1. `run.id()`, `run.state()`, `run.path()`
+2. `run.transition(RunState::Active)`
+3. `run.read_meta()`, `run.read_times()`, `run.write_times()`
+4. `run.read_container_record()`, `run.write_container_record()`
+5. `run.execute(pipeline, secrets, git_dir, workspace, executor)` — consumes self
+
+The pattern is: `Runs` creates `Run` objects, and callers work with `Run` objects. Both need DB access.
+
+Simplest clean approach: both `Runs` and `Run` hold `rusqlite::Connection`. Since SQLite supports multiple connections in WAL mode, `Runs::create` opens a new connection for the new `Run`. For `Runs` methods like `scan_orphans` and `reconcile_orphans`, it uses its own connection.
+
+- [ ] **Step 4: Rewrite `Run` struct**
+
+```rust
+pub struct Run {
+ db: rusqlite::Connection,
+ id: String,
+ state: RunState,
+ base_dir: PathBuf,
+}
+```
+
+Methods use `self.db` for all reads/writes. No more filesystem state management.
+
+- [ ] **Step 5: Rewrite `Run::transition`**
+
+```rust
+pub fn transition(&mut self, to: RunState) -> Result<()> {
+ let allowed = matches!(
+ (self.state, to),
+ (RunState::Pending, RunState::Active)
+ | (RunState::Pending, RunState::Complete)
+ | (RunState::Active, RunState::Complete)
+ | (RunState::Active, RunState::Failed)
+ );
+ if !allowed {
+ return Err(Error::InvalidTransition { from: self.state, to });
+ }
+
+ let now = jiff::Timestamp::now().as_millisecond();
+
+ self.db.execute(
+ "UPDATE runs SET state = ?1,
+ started_at_ms = CASE WHEN ?2 = 'active' AND started_at_ms IS NULL THEN ?3 ELSE started_at_ms END,
+ finished_at_ms = CASE WHEN ?2 IN ('complete', 'failed') AND finished_at_ms IS NULL THEN ?3 ELSE finished_at_ms END,
+ container_id = CASE WHEN ?2 IN ('complete', 'failed') THEN NULL ELSE container_id END
+ WHERE id = ?4",
+ rusqlite::params![to.as_str(), to.as_str(), now, &self.id],
+ )?;
+
+ self.state = to;
+ Ok(())
+}
+```
+
+- [ ] **Step 6: Rewrite `Run::read_meta` as column accessors**
+
+```rust
+pub fn read_meta(&self) -> Result<RunMeta> {
+ let (sha, ref_name, pushed_at_ms) = self.db.query_row(
+ "SELECT sha, ref_name, pushed_at_ms FROM runs WHERE id = ?1",
+ rusqlite::params![&self.id],
+ |row| Ok((row.get(0)?, row.get(1)?, row.get::<_, i64>(2)?)),
+ )?;
+ Ok(RunMeta {
+ sha,
+ r#ref: ref_name,
+ pushed_at: jiff::Timestamp::from_millisecond(pushed_at_ms).expect("valid timestamp"),
+ })
+}
+```
+
+- [ ] **Step 7: Remove `write_yaml` / `read_yaml`, `RunTimes`, `ContainerRecord` persistence**
+
+`RunTimes` and `ContainerRecord` as separate YAML-backed types go away. Timestamps are columns on `runs`. `container_id` is a column. Container lifecycle timestamps (build_started_at etc.) can stay as a `container.yml` file in the run dir for now, since the schema only tracks `container_id`.
+
+Keep `RunMeta` as an in-memory struct passed to `Runs::create`. Remove `RunTimes` as a public type — callers use `run.started_at()` etc.
+
+Actually, keep `ContainerRecord` around for the file-based container timestamps, since the DB schema only has `container_id`. The `DockerLifecycle` still writes `container.yml`.
+
+- [ ] **Step 8: Rewrite `Run::path`**
+
+```rust
+pub fn path(&self) -> PathBuf {
+ self.base_dir.join(&self.id)
+}
+```
+
+No state subdirectory. The run dir is always `<base_dir>/<id>/`.
+
+- [ ] **Step 9: Remove `update_latest`**
+
+No more symlink. Remove the method entirely.
+
+- [ ] **Step 10: Rewrite `Runs::scan_orphans` and `reconcile_orphans`**
+
+```rust
+pub fn scan_orphans(&self) -> Result<Vec<Run>> {
+ let mut stmt = self.db.prepare(
+ "SELECT id, state FROM runs WHERE state IN ('pending', 'active') AND repo = ?1"
+ )?;
+ let rows = stmt.query_map(rusqlite::params![&self.repo], |row| {
+ let id: String = row.get(0)?;
+ let state_str: String = row.get(1)?;
+ let state = RunState::from_str(&state_str).expect("DB enforces valid states");
+ Ok((id, state))
+ })?;
+
+ let mut orphans = Vec::new();
+ for row in rows {
+ let (id, state) = row?;
+ let db = self.conn()?; // each Run gets its own connection
+ orphans.push(Run { db, id, state, base_dir: self.base_dir.clone() });
+ }
+ Ok(orphans)
+}
+```
+
+```rust
+pub fn reconcile_orphans(&self) -> Result<()> {
+ let now = jiff::Timestamp::now().as_millisecond();
+
+ // Active orphans → failed
+ self.db.execute(
+ "UPDATE runs SET state = 'failed', finished_at_ms = ?1, container_id = NULL, failure_kind = 'orphaned'
+ WHERE state = 'active' AND repo = ?2",
+ rusqlite::params![now, &self.repo],
+ )?;
+
+ // Pending orphans → complete (matching current behavior; umykvluw changes this to failed)
+ self.db.execute(
+ "UPDATE runs SET state = 'complete', finished_at_ms = ?1, container_id = NULL
+ WHERE state = 'pending' AND repo = ?2",
+ rusqlite::params![now, &self.repo],
+ )?;
+
+ Ok(())
+}
+```
+
+- [ ] **Step 11: Rewrite `Run::execute` to use DB**
+
+The execute method is the most complex. Key changes:
+- `self.transition(RunState::Active)` works as before (now DB-backed)
+- `build_executor_runtime` writes `container_id` to the DB instead of (or in addition to) `container.yml`
+- `write_all_logs` stays file-based (logs on disk per design doc)
+- The `DockerLifecycle.record_path` stays for container timestamps, but `container_id` is tracked in the DB
+
+- [ ] **Step 12: Update `Run::build_executor_runtime`**
+
+After building the container and getting the session, write `container_id` to the DB:
+
+```rust
+self.db.execute(
+ "UPDATE runs SET container_id = ?1 WHERE id = ?2",
+ rusqlite::params![&session.container_id, &self.id],
+)?;
+```
+
+Keep writing `container.yml` for the build/container timestamps.
+
+- [ ] **Step 13: Remove unused error variants from `src/ci/error.rs`**
+
+Remove `Yaml` and `Utf8` variants if nothing uses them. Add `Sql` variant.
+
+- [ ] **Step 14: Run tests and fix compilation errors**
+
+Run: `cargo check --workspace`
+Then: `cargo test --workspace -q`
+
+Fix any compilation errors. The tests in `run.rs` will need updating since they construct `Runs` with the old API.
+
+- [ ] **Step 15: Update tests in `src/ci/run.rs`**
+
+Key test changes:
+- `tmp_quire()` helpers create an in-memory DB via `db::open_in_memory()` or open a temp file DB
+- `test_runs()` creates `Runs::new(db_path, "test.git".to_string(), base_dir)`
+- Tests no longer check for state directories (`pending/`, `active/`, etc.)
+- Tests check run directories at `<base_dir>/<id>/`
+- `scan_orphans` tests verify DB queries instead of directory scans
+- Remove `create_symlinks_latest` test (no more symlink)
+- Remove `scan_orphans_quarantines_unreadable_runs` test (no more quarantine — that was a filesystem artifact)
+- Update `transition_errors_on_missing_source` — no more missing directory, but could test with a run ID that doesn't exist in the DB
+
+- [ ] **Step 16: Run full test suite**
+
+Run: `cargo test --workspace -q`
+
+- [ ] **Step 17: Commit**
+
+```
+Rewrite Runs and Run to use SQLite for state storage
+```
+
+---
+
+## Task 4: Update `src/ci/mod.rs` — trigger path
+
+**Files:**
+- Modify: `src/ci/mod.rs`
+
+The `trigger_ref` function currently calls `ci.runs(repo.runs_base()).create(&meta)`. After the migration, it needs to pass the DB connection/path.
+
+- [ ] **Step 1: Update `Ci::runs` signature**
+
+Change from:
+
+```rust
+pub fn runs(&self, runs_base: PathBuf) -> Runs
+```
+
+To:
+
+```rust
+pub fn runs(&self, db_path: &Path, repo: &str, runs_base: PathBuf) -> Runs
+```
+
+Or, better: pass through the Quire-level DB path. The `trigger` function has access to `quire`, so it can pass `quire.db_path()`.
+
+- [ ] **Step 2: Update `trigger` and `trigger_ref` functions**
+
+```rust
+pub fn trigger(quire: &crate::Quire, event: &PushEvent) {
+ // ... existing repo resolution ...
+ let db_path = quire.db_path();
+ for push_ref in event.updated_refs() {
+ if let Err(e) = trigger_ref(&repo, &db_path, event.pushed_at, push_ref, &secrets) {
+ // ... error handling ...
+ }
+ }
+}
+
+fn trigger_ref(
+ repo: &Repo,
+ db_path: &Path,
+ pushed_at: jiff::Timestamp,
+ push_ref: &PushRef,
+ secrets: &HashMap<String, crate::secret::SecretString>,
+) -> error::Result<()> {
+ // ... existing code ...
+ let mut run = ci.runs(db_path, repo.name(), repo.runs_base()).create(&meta)?;
+ // ... rest stays largely the same ...
+}
+```
+
+- [ ] **Step 3: Update tests in `src/ci/mod.rs`**
+
+Tests that create `Runs` need the new signature. Use `db::open_in_memory()` or a temp file for the DB.
+
+- [ ] **Step 4: Run tests**
+
+Run: `cargo test --workspace -q`
+
+- [ ] **Step 5: Commit**
+
+```
+Update CI trigger path to use SQLite
+```
+
+---
+
+## Task 5: Update `src/quire.rs` — DB path accessor
+
+**Files:**
+- Modify: `src/quire.rs`
+
+- [ ] **Step 1: Add `db_path` method to `Quire`**
+
+```rust
+pub fn db_path(&self) -> PathBuf {
+ self.base_dir.join("quire.db")
+}
+```
+
+- [ ] **Step 2: Update `Repo::runs` and `Repo::runs_base`**
+
+`Repo::runs` currently returns `Runs::new(self.runs_base())`. Update to pass the DB path:
+
+```rust
+pub fn runs(&self, db_path: &Path) -> Runs {
+ Runs::new(
+ db_path.to_path_buf(),
+ self.name().to_string(),
+ self.runs_base(),
+ )
+}
+```
+
+Or remove the convenience method and let callers construct `Runs` directly with the right params.
+
+- [ ] **Step 3: Run tests**
+
+Run: `cargo test --workspace -q`
+
+- [ ] **Step 4: Commit**
+
+```
+Add DB path accessor to Quire and update Repo::runs
+```
+
+---
+
+## Task 6: Update server startup — open DB and reconcile orphans
+
+**Files:**
+- Modify: `src/bin/quire/server.rs`
+
+- [ ] **Step 1: Open the database on startup**
+
+Add after the socket setup, before orphan reconciliation:
+
+```rust
+let db_path = quire.db_path();
+tracing::info!(path = %db_path.display(), "opening database");
+let db = crate::db::open(&db_path)?;
+```
+
+- [ ] **Step 2: Update orphan reconciliation to use DB**
+
+```rust
+for repo in quire.repos().context("failed to list repos")? {
+ let runs = repo.runs(&db_path);
+ runs.reconcile_orphans()?;
+}
+```
+
+Note: if `Runs` holds a connection (not a path), the server would pass a reference or the path. If `Runs` takes a path and opens its own connection, this is straightforward.
+
+- [ ] **Step 3: Run tests**
+
+Run: `cargo test --workspace -q`
+
+- [ ] **Step 4: Commit**
+
+```
+Open SQLite database on server startup for orphan reconciliation
+```
+
+---
+
+## Task 7: Update `ci run` CLI command
+
+**Files:**
+- Modify: `src/bin/quire/commands/ci.rs`
+
+- [ ] **Step 1: Update `ci::run` to use DB**
+
+The `run` function creates a `Runs` with a tempdir. Now it needs a DB path. Use a temp file for the DB:
+
+```rust
+let db_path = tmp.path().join("quire.db");
+let db = quire::db::open(&db_path)?;
+let runs = Runs::new(db_path, "local".to_string(), tmp.path().to_path_buf());
+```
+
+- [ ] **Step 2: Run tests**
+
+Run: `cargo test --workspace -q`
+
+- [ ] **Step 3: Commit**
+
+```
+Update ci run command to use SQLite
+```
+
+---
+
+## Task 8: Update `src/ci/runtime.rs` — DockerLifecycle DB writes
+
+**Files:**
+- Modify: `src/ci/runtime.rs`
+
+The `DockerLifecycle` currently writes `container_stopped_at` to a YAML file. After the migration:
+- `container_id` is tracked in the DB
+- Container lifecycle timestamps can stay in `container.yml` for now (the DB schema only has `container_id`)
+- The `Drop` impl for `DockerLifecycle` needs a DB connection to clear `container_id`
+
+The challenge: `DockerLifecycle` needs DB access in its `Drop` impl. Options:
+- (A) Give `DockerLifecycle` the DB path so it can open a connection in `Drop`
+- (B) Keep writing container timestamps to `container.yml` only; the DB `container_id` is managed by `Run::build_executor_runtime` and `Run::transition` (which already clears it)
+
+Go with (B): `Run::transition` already sets `container_id = NULL` when transitioning to Complete/Failed. The `DockerLifecycle` only needs to write `container_stopped_at` to the YAML file. No changes to `DockerLifecycle` needed beyond what's already handled by the transition logic.
+
+- [ ] **Step 1: Verify `DockerLifecycle` Drop still works**
+
+The `Drop` impl writes to `container.yml` at `self.record_path`. This path is still valid since run directories still exist at `<base_dir>/<id>/`. No changes needed.
+
+- [ ] **Step 2: Verify container_id is cleared on state transition**
+
+The `Run::transition` SQL already sets `container_id = NULL` for complete/failed states. Confirm this is working.
+
+- [ ] **Step 3: Commit (if changes needed, otherwise skip)**
+
+---
+
+## Task 9: Update `docs/CI.md`
+
+**Files:**
+- Modify: `docs/CI.md`
+
+- [ ] **Step 1: Update the "Storage" section**
+
+Remove "No SQLite in v1" and the secondary-index-only commitment. Replace with the SQLite-as-primary-store description from the design doc.
+
+- [ ] **Step 2: Update the volume layout**
+
+Replace the directory-based run layout with:
+
+```
+/var/quire/
+ quire.db # SQLite database
+ repos/<name>.git/ # bare repos, unchanged
+ runs/<repo>/<run-id>/ # per-run workspace
+ workspace/ # materialized checkout
+ jobs/<job-id>/
+ log.yml # per-job sh output logs
+```
+
+- [ ] **Step 3: Update the lifecycle description**
+
+Replace the directory-rename lifecycle with SQL state transitions.
+
+- [ ] **Step 4: Remove the in-memory queue / mpsc references**
+
+The design doc says SQLite is the queue. The `mpsc` references in CI.md should be updated (or noted as "replaced by DB queries" — the actual queue replacement is a follow-up since the runner isn't built yet).
+
+- [ ] **Step 5: Commit**
+
+```
+Update CI docs for SQLite migration
+```
+
+---
+
+## Task 10: Clean up and final verification
+
+- [ ] **Step 1: Remove dead code**
+
+- Remove `write_yaml` and `read_yaml` helpers from `run.rs`
+- Remove `Yaml` error variant from `ci/error.rs` if unused
+- Remove `repo_segment` function if unused (was for Docker image tags from path — check if still needed)
+- Remove any `serde_yaml_ng` usage in `run.rs` that's no longer needed
+
+- [ ] **Step 2: Run `just all`**
+
+Run: `just all`
+Expected: all checks pass (fmt, clippy, test)
+
+- [ ] **Step 3: Run coverage**
+
+Run: `just coverage`
+Expected: 100% coverage maintained
+
+- [ ] **Step 4: Commit**
+
+```
+Clean up dead code from filesystem run store
+```
diff --git a/migrations/0001_initial.sql b/migrations/0001_initial.sql
new file mode 100644
index 0000000..4645069
--- /dev/null
+++ b/migrations/0001_initial.sql
@@ -0,0 +1,62 @@
+CREATE TABLE runs (
+ id TEXT PRIMARY KEY,
+ repo TEXT NOT NULL,
+ ref_name TEXT NOT NULL,
+ sha TEXT NOT NULL,
+ pushed_at_ms INTEGER NOT NULL,
+ state TEXT NOT NULL,
+ failure_kind TEXT,
+ queued_at_ms INTEGER NOT NULL,
+ started_at_ms INTEGER,
+ finished_at_ms INTEGER,
+ container_id TEXT,
+ image_tag TEXT,
+ build_started_at_ms INTEGER,
+ build_finished_at_ms INTEGER,
+ container_started_at_ms INTEGER,
+ container_stopped_at_ms INTEGER,
+ workspace_path TEXT NOT NULL,
+
+ CHECK (state IN ('pending', 'active', 'complete', 'failed', 'superseded')),
+
+ CHECK (started_at_ms IS NULL OR started_at_ms >= queued_at_ms),
+ CHECK (finished_at_ms IS NULL OR finished_at_ms >= queued_at_ms),
+ CHECK (finished_at_ms IS NULL OR started_at_ms IS NULL
+ OR finished_at_ms >= started_at_ms),
+
+ CHECK (CASE state
+ WHEN 'pending' THEN started_at_ms IS NULL AND finished_at_ms IS NULL AND container_id IS NULL
+ WHEN 'active' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NULL
+ WHEN 'complete' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL AND container_id IS NULL
+ WHEN 'failed' THEN finished_at_ms IS NOT NULL AND container_id IS NULL
+ WHEN 'superseded' THEN finished_at_ms IS NOT NULL AND container_id IS NULL
+ END)
+);
+
+CREATE INDEX runs_repo_pushed_at ON runs(repo, pushed_at_ms DESC);
+CREATE INDEX runs_state ON runs(state);
+
+CREATE TABLE jobs (
+ run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
+ job_id TEXT NOT NULL,
+ state TEXT NOT NULL,
+ exit_code INTEGER,
+ started_at_ms INTEGER,
+ finished_at_ms INTEGER,
+
+ CHECK (state IN ('pending', 'active', 'complete', 'failed', 'skipped', 'aborted')),
+
+ CHECK (started_at_ms IS NULL OR finished_at_ms IS NULL
+ OR finished_at_ms >= started_at_ms),
+
+ CHECK (CASE state
+ WHEN 'pending' THEN started_at_ms IS NULL AND finished_at_ms IS NULL
+ WHEN 'active' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NULL
+ WHEN 'complete' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL
+ WHEN 'failed' THEN started_at_ms IS NOT NULL AND finished_at_ms IS NOT NULL
+ WHEN 'skipped' THEN started_at_ms IS NULL AND finished_at_ms IS NOT NULL
+ WHEN 'aborted' THEN finished_at_ms IS NOT NULL
+ END),
+
+ PRIMARY KEY (run_id, job_id)
+);
diff --git a/src/bin/quire/commands/ci.rs b/src/bin/quire/commands/ci.rs
index 0dd1f20..5382cdb 100644
--- a/src/bin/quire/commands/ci.rs
+++ b/src/bin/quire/commands/ci.rs
@@ -65,7 +65,11 @@ pub async fn run(quire: &Quire, maybe_sha: Option<&str>, executor: Executor) ->
// (e.g. $XDG_CACHE_HOME/quire/local-runs) so logs survive past the
// command and `tail -f` becomes useful.
let tmp = tempfile::tempdir().into_diagnostic()?;
- let runs = Runs::new(tmp.path().to_path_buf());
+ let db_path = tmp.path().join("quire.db");
+ let mut db = quire::db::open(&db_path).into_diagnostic()?;
+ quire::db::migrate(&mut db).into_diagnostic()?;
+ drop(db);
+ let runs = Runs::new(db_path, "local".to_string(), tmp.path().to_path_buf());
let meta = RunMeta {
sha: commit.sha.clone(),
diff --git a/src/bin/quire/server.rs b/src/bin/quire/server.rs
index 9db3aa9..cb56ef6 100644
--- a/src/bin/quire/server.rs
+++ b/src/bin/quire/server.rs
@@ -39,9 +39,16 @@ pub async fn run(quire: &Quire) -> Result<()> {
tracing::info!(path = %socket_path.display(), "listening on event socket");
+ // Open and migrate the database.
+ let db_path = quire.db_path();
+ tracing::info!(path = %db_path.display(), "opening database");
+ let mut db = quire::db::open(&db_path).into_diagnostic()?;
+ quire::db::migrate(&mut db).into_diagnostic()?;
+ drop(db);
+
// Scan for orphaned runs from a previous server instance.
for repo in quire.repos().context("failed to list repos")? {
- repo.runs().reconcile_orphans()?;
+ repo.runs(&db_path).reconcile_orphans()?;
}
let quire_handle = quire.clone();
diff --git a/src/ci/error.rs b/src/ci/error.rs
index c1f9124..2995401 100644
--- a/src/ci/error.rs
+++ b/src/ci/error.rs
@@ -67,6 +67,9 @@ pub enum Error {
#[error(transparent)]
Utf8(#[from] std::string::FromUtf8Error),
+ #[error(transparent)]
+ Sql(#[from] rusqlite::Error),
+
#[error(transparent)]
Secret(#[from] secret::Error),
diff --git a/src/ci/mod.rs b/src/ci/mod.rs
index f96b851..5ac86f5 100644
--- a/src/ci/mod.rs
+++ b/src/ci/mod.rs
@@ -13,7 +13,7 @@ pub(crate) mod error;
pub use error::{Error, Result};
pub use pipeline::{DefinitionError, Diagnostic, Job, Pipeline, PipelineError, StructureError};
-pub use run::{Executor, Run, RunMeta, RunState, RunTimes, Runs, materialize_workspace};
+pub use run::{Executor, Run, RunMeta, RunState, Runs, materialize_workspace};
/// A resolved commit reference.
///
@@ -26,7 +26,7 @@ pub struct CommitRef {
pub display: String,
}
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
use crate::display_chain;
use crate::event::{PushEvent, PushRef};
@@ -49,11 +49,6 @@ impl Ci {
Self { repo_path }
}
- /// Access CI runs for this repo.
- pub fn runs(&self, runs_base: PathBuf) -> Runs {
- Runs::new(runs_base)
- }
-
/// Read and compile ci.fnl at a given SHA, returning the validated
/// pipeline.
///
@@ -134,8 +129,9 @@ pub fn trigger(quire: &crate::Quire, event: &PushEvent) {
}
};
+ let db_path = quire.db_path();
for push_ref in event.updated_refs() {
- if let Err(e) = trigger_ref(&repo, event.pushed_at, push_ref, &secrets) {
+ if let Err(e) = trigger_ref(&repo, &db_path, event.pushed_at, push_ref, &secrets) {
tracing::error!(
repo = %event.repo,
sha = %push_ref.new_sha, // cov-excl-line
@@ -149,6 +145,7 @@ pub fn trigger(quire: &crate::Quire, event: &PushEvent) {
/// Create and run CI for a single updated ref.
fn trigger_ref(
repo: &Repo,
+ db_path: &Path,
pushed_at: jiff::Timestamp,
push_ref: &PushRef,
secrets: &HashMap<String, crate::secret::SecretString>,
@@ -165,7 +162,7 @@ fn trigger_ref(
pushed_at,
};
- let mut run = ci.runs(repo.runs_base()).create(&meta)?;
+ let mut run = repo.runs(db_path).create(&meta)?;
tracing::info!(
run_id = %run.id(), // cov-excl-line
@@ -245,6 +242,10 @@ mod tests {
);
let quire = Quire::new(dir.path().to_path_buf());
+ // Initialize the database.
+ let mut db = crate::db::open(&quire.db_path()).expect("init db");
+ crate::db::migrate(&mut db).expect("migrate db");
+ drop(db);
(dir, quire, "test.git".to_string())
}
@@ -267,6 +268,9 @@ mod tests {
);
let quire = Quire::new(dir.path().to_path_buf());
+ let mut db = crate::db::open(&quire.db_path()).expect("init db");
+ crate::db::migrate(&mut db).expect("migrate db");
+ drop(db);
(dir, quire, "test.git".to_string())
}
@@ -278,13 +282,6 @@ mod tests {
String::from_utf8(output.stdout).unwrap().trim().to_string()
}
- #[test]
- fn ci_new_and_runs() {
- let ci = Ci::new(PathBuf::from("/tmp/test"));
- let _runs = ci.runs(PathBuf::from("/tmp/runs"));
- // Just confirm construction works — Runs::new is covered elsewhere.
- }
-
#[test]
fn ci_pipeline_returns_none_when_no_ci_fnl() {
let (_dir, quire, name) = bare_repo_without_ci();
@@ -362,11 +359,17 @@ mod tests {
r#ref: "refs/heads/main".to_string(),
};
- trigger_ref(&repo, pushed_at, &push_ref, &HashMap::new())
- .expect("trigger_ref should succeed");
+ trigger_ref(
+ &repo,
+ &quire.db_path(),
+ pushed_at,
+ &push_ref,
+ &HashMap::new(),
+ )
+ .expect("trigger_ref should succeed");
// Verify a run was created in complete/.
- let runs = repo.runs();
+ let runs = repo.runs(&quire.db_path());
let orphans = runs.scan_orphans().expect("scan");
assert!(orphans.is_empty(), "run should be complete, not orphaned");
}
@@ -383,8 +386,14 @@ mod tests {
r#ref: "refs/heads/main".to_string(),
};
- trigger_ref(&repo, pushed_at, &push_ref, &HashMap::new())
- .expect("should succeed without ci.fnl");
+ trigger_ref(
+ &repo,
+ &quire.db_path(),
+ pushed_at,
+ &push_ref,
+ &HashMap::new(),
+ )
+ .expect("should succeed without ci.fnl");
}
#[test]
@@ -400,7 +409,13 @@ mod tests {
r#ref: "refs/heads/main".to_string(),
};
- let result = trigger_ref(&repo, pushed_at, &push_ref, &HashMap::new());
+ let result = trigger_ref(
+ &repo,
+ &quire.db_path(),
+ pushed_at,
+ &push_ref,
+ &HashMap::new(),
+ );
assert!(result.is_err(), "invalid pipeline should fail");
}
diff --git a/src/ci/run.rs b/src/ci/run.rs
index e48e5f6..ecf4fa3 100644
--- a/src/ci/run.rs
+++ b/src/ci/run.rs
@@ -1,9 +1,9 @@
-//! On-disk storage for CI runs.
+//! SQLite-backed storage for CI runs.
//!
-//! A run is a directory under `runs/<repo>/<state>/<id>/` containing
-//! `meta.yml` (immutable) and `times.yml` (timestamps). The directory's
-//! parent name is the authoritative state; transitions are atomic
-//! `rename` operations.
+//! A run is a row in the `runs` table identified by UUID. State
+//! transitions are single `UPDATE` statements inside a transaction.
+//! Run directories on disk hold the materialized workspace and per-job
+//! log files, but state lives in the database.
use std::collections::HashMap;
use std::path::{Path, PathBuf};
@@ -26,10 +26,9 @@ pub enum Executor {
Docker,
}
-/// Owns a [`ContainerSession`](crate::ci::docker::ContainerSession)
-/// alongside the run-dir's `container.yml` path so [`Drop`] can stamp
-/// `container_stopped_at` *before* the session itself drops and fires
-/// `docker stop`.
+/// Owns the per-run container session alongside the database path
+/// so [`Drop`] can stamp `container_stopped_at` *before* the session
+/// itself drops and fires `docker stop`.
///
/// Field declaration order matters: `session` is declared first so it
/// drops first after this struct's custom `Drop` body returns. The
@@ -37,7 +36,8 @@ pub enum Executor {
/// `docker stop`.
pub(super) struct DockerLifecycle {
pub(super) session: crate::ci::docker::ContainerSession,
- record_path: PathBuf,
+ db_path: PathBuf,
+ run_id: String,
pub(super) work_dir: String,
}
@@ -46,19 +46,22 @@ impl Drop for DockerLifecycle {
// Stamp `container_stopped_at` before ContainerSession's Drop
// (`docker stop`) fires. Errors are logged and swallowed —
// Drop cannot return Result.
- match read_yaml::<ContainerRecord>(&self.record_path) {
- Ok(mut rec) => {
- rec.container_stopped_at = Some(Timestamp::now());
- if let Err(e) = write_yaml(&self.record_path, &rec) {
+ match crate::db::open(&self.db_path) {
+ Ok(conn) => {
+ let now = Timestamp::now().as_millisecond();
+ if let Err(e) = conn.execute(
+ "UPDATE runs SET container_stopped_at_ms = ?1 WHERE id = ?2",
+ rusqlite::params![now, &self.run_id],
+ ) {
tracing::error!(
- error = %display_chain(&e),
+ error = %display_chain(&Error::from(e)),
"failed to write container_stopped_at"
);
}
}
Err(e) => tracing::error!(
error = %display_chain(&e),
- "failed to read container.yml before stop"
+ "failed to open db before container stop"
),
}
// After this body returns, fields drop in declaration order:
@@ -73,21 +76,39 @@ pub enum RunState {
Active,
Complete,
Failed,
+ Superseded,
}
impl RunState {
- /// The directory name used for this state in the run storage layout.
- pub fn dir_name(&self) -> &'static str {
+ pub fn as_str(&self) -> &'static str {
match self {
RunState::Pending => "pending",
RunState::Active => "active",
RunState::Complete => "complete",
RunState::Failed => "failed",
+ RunState::Superseded => "superseded",
+ }
+ }
+}
+
+impl std::str::FromStr for RunState {
+ type Err = ();
+
+ fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+ match s {
+ "pending" => Some(RunState::Pending),
+ "active" => Some(RunState::Active),
+ "complete" => Some(RunState::Complete),
+ "failed" => Some(RunState::Failed),
+ "superseded" => Some(RunState::Superseded),
+ _ => None,
}
+ .ok_or(())
}
}
-/// Immutable metadata for a CI run. Written once and never modified.
+/// Immutable metadata for a CI run. Passed to `Runs::create` at
+/// enqueue time; the fields are written to the `runs` row once.
#[derive(Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct RunMeta {
/// The commit SHA that triggered this run.
@@ -98,203 +119,147 @@ pub struct RunMeta {
pub pushed_at: Timestamp,
}
-/// Timestamps recorded across the run lifecycle. The directory name is the
-/// authoritative state; this file records when transitions happened.
-#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-pub struct RunTimes {
- /// When the run was picked up (moved to active).
- #[serde(skip_serializing_if = "Option::is_none")]
- pub started_at: Option<Timestamp>,
- /// When the run finished (moved to complete/failed).
- #[serde(skip_serializing_if = "Option::is_none")]
- pub finished_at: Option<Timestamp>,
-}
-
-/// Container metadata for a docker-mode run, persisted to
-/// `<run-dir>/container.yml`. Each field is populated incrementally as
-/// the lifecycle progresses; absence implies "not yet (or never)
-/// reached." Host-mode runs do not write this file.
-#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-pub struct ContainerRecord {
- #[serde(skip_serializing_if = "Option::is_none")]
- pub image_tag: Option<String>,
- #[serde(skip_serializing_if = "Option::is_none")]
- pub container_id: Option<String>,
- #[serde(skip_serializing_if = "Option::is_none")]
- pub build_started_at: Option<Timestamp>,
- #[serde(skip_serializing_if = "Option::is_none")]
- pub build_finished_at: Option<Timestamp>,
- #[serde(skip_serializing_if = "Option::is_none")]
- pub container_started_at: Option<Timestamp>,
- #[serde(skip_serializing_if = "Option::is_none")]
- pub container_stopped_at: Option<Timestamp>,
-}
-
/// Access to CI runs for a single repo.
///
-/// Owns the base path (`runs/<repo>/`) and provides run creation
-/// and orphan reconciliation. Obtain one via `Ci::runs()`.
+/// Owns the database path, repo name, and base directory for run
+/// artifacts (workspace, logs). Each method opens a connection via
+/// [`crate::db::open`]. Obtain one via `Ci::runs()`.
#[derive(Debug)]
pub struct Runs {
- base: PathBuf,
+ db_path: PathBuf,
+ repo: String,
+ base_dir: PathBuf,
}
impl Runs {
- pub fn new(base: PathBuf) -> Self {
- Self { base }
+ pub fn new(db_path: PathBuf, repo: String, base_dir: PathBuf) -> Self {
+ Self {
+ db_path,
+ repo,
+ base_dir,
+ }
}
/// Create a new run record in the `pending` state.
///
- /// Writes `meta.yml` and `times.yml` atomically (temp dir + rename).
+ /// Inserts a row into `runs` and creates the run directory for
+ /// workspace materialization and log storage.
pub fn create(&self, meta: &RunMeta) -> Result<Run> {
- let pending_dir = self.base.join(RunState::Pending.dir_name());
let id = uuid::Uuid::now_v7().to_string();
-
- fs_err::create_dir_all(&pending_dir)?;
-
- let tmp_dir = pending_dir.join(format!(".tmp-{id}"));
- fs_err::create_dir_all(&tmp_dir)?;
-
- write_yaml(&tmp_dir.join("meta.yml"), meta)?;
- write_yaml(&tmp_dir.join("times.yml"), &RunTimes::default())?;
-
- let final_dir = pending_dir.join(&id);
- fs_err::rename(&tmp_dir, &final_dir)?;
-
- // Set the latest symlink after opening the run so it can do it.
- let run = Run::open(self.base.clone(), RunState::Pending, id)?;
- run.update_latest()?;
- Ok(run)
+ let workspace_path = self.base_dir.join(&id).join("workspace");
+
+ let db = crate::db::open(&self.db_path)?;
+ db.execute(
+ "INSERT INTO runs (id, repo, ref_name, sha, pushed_at_ms, state, queued_at_ms, workspace_path)
+ VALUES (?1, ?2, ?3, ?4, ?5, 'pending', ?6, ?7)",
+ rusqlite::params![
+ &id,
+ &self.repo,
+ &meta.r#ref,
+ &meta.sha,
+ meta.pushed_at.as_millisecond(),
+ Timestamp::now().as_millisecond(),
+ workspace_path.to_str().ok_or_else(|| std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ "workspace path is not valid UTF-8",
+ ))?,
+ ],
+ )?;
+
+ // Create run directory for workspace and logs.
+ fs_err::create_dir_all(&workspace_path)?;
+
+ Ok(Run {
+ db_path: self.db_path.clone(),
+ id,
+ state: RunState::Pending,
+ base_dir: self.base_dir.clone(),
+ })
}
- /// Scan for orphaned runs in `pending/` and `active/` directories.
- ///
- /// Entries that cannot be opened (missing/unreadable `meta.yml` or
- /// `times.yml`) are quarantined to `failed/` so they don't stay
- /// stuck in pending/active forever.
+ /// Find runs stuck in `pending` or `active` states.
///
/// The caller decides how to reconcile the returned runs:
- /// - `pending/` entries should be re-enqueued.
- /// - `active/` entries with no live runner should be marked failed.
+ /// - `pending` entries should be re-enqueued or completed.
+ /// - `active` entries with no live runner should be marked failed.
pub fn scan_orphans(&self) -> Result<Vec<Run>> {
- let mut orphans = Vec::new();
-
- for &state in &[RunState::Pending, RunState::Active] {
- let state_path = self.base.join(state.dir_name());
- let entries = match fs_err::read_dir(&state_path) {
- Ok(entries) => entries,
- Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
- Err(e) => return Err(e.into()), // cov-excl-line
- };
-
- for entry in entries {
- let entry = entry?;
- let name = match entry.file_name().to_str() {
- Some(n) => n.to_string(),
- None => continue, // cov-excl-line
- };
-
- if name.starts_with('.') {
- continue;
- }
+ let db = crate::db::open(&self.db_path)?;
+ let mut stmt = db.prepare(
+ "SELECT id, state FROM runs WHERE state IN ('pending', 'active') AND repo = ?1",
+ )?;
+ let rows = stmt.query_map(rusqlite::params![&self.repo], |row| {
+ let id: String = row.get(0)?;
+ let state_str: String = row.get(1)?;
+ Ok((id, state_str))
+ })?;
- match Run::open(self.base.clone(), state, name.clone()) {
- Ok(run) => orphans.push(run),
- Err(e) => {
- tracing::warn!(
- state = ?state,
- run_id = %name,
- error = %display_chain(&e),
- "quarantining unreadable run to failed/"
- );
- self.quarantine(&state_path.join(&name), &name)?;
- }
- }
- }
+ let mut orphans = Vec::new();
+ for row in rows {
+ let (id, state_str) = row?;
+ let state: RunState = state_str.parse().expect("DB enforces valid states");
+ orphans.push(Run {
+ db_path: self.db_path.clone(),
+ id,
+ state,
+ base_dir: self.base_dir.clone(),
+ });
}
-
Ok(orphans)
}
- /// Move a broken run directory into `failed/` so it stops blocking
- /// pending/active. The contents may be unreadable; we only care
- /// about getting it out of the active state buckets.
- fn quarantine(&self, src: &Path, id: &str) -> Result<()> {
- let failed_dir = self.base.join(RunState::Failed.dir_name());
- fs_err::create_dir_all(&failed_dir)?;
- fs_err::rename(src, failed_dir.join(id))?;
- Ok(())
- }
-
/// Reconcile orphaned runs from a previous server instance.
///
- /// - `pending/` orphans are moved to `complete/` (will be re-enqueued when
+ /// - `pending` orphans are moved to `complete` (will be re-enqueued when
/// the runner exists; for now, immediately completed).
- /// - `active/` orphans are moved to `failed/` (no live runner).
+ /// - `active` orphans are moved to `failed` (no live runner).
pub fn reconcile_orphans(&self) -> Result<()> {
let orphans = self.scan_orphans()?;
for orphan in &orphans {
tracing::warn!(
- run_id = %orphan.id(), // cov-excl-line
- state = ?orphan.state(), // cov-excl-line
+ run_id = %orphan.id(),
+ state = ?orphan.state(),
"found orphaned run"
);
}
- for mut orphan in orphans {
- match orphan.state() {
- RunState::Pending => {
- tracing::warn!(
- run_id = %orphan.id(), // cov-excl-line
- "completing orphaned pending run"
- );
- if let Err(e) = orphan.transition(RunState::Complete) {
- tracing::error!(
- run_id = %orphan.id(), // cov-excl-line
- error = %display_chain(&e),
- "failed to transition orphaned pending run"
- );
- }
- }
- RunState::Active => {
- tracing::warn!(
- run_id = %orphan.id(), // cov-excl-line
- "marking orphaned active run as failed"
- );
- if let Err(e) = orphan.transition(RunState::Failed) {
- tracing::error!(
- run_id = %orphan.id(), // cov-excl-line
- error = %display_chain(&e),
- "failed to transition orphaned active run to failed"
- );
- }
- }
- RunState::Complete | RunState::Failed => {
- unreachable!("scan_orphans only returns pending/active")
- }
- }
- }
+ let now = Timestamp::now().as_millisecond();
+ let db = crate::db::open(&self.db_path)?;
+
+ // Active orphans → failed
+ db.execute(
+ "UPDATE runs SET state = 'failed', finished_at_ms = ?1, container_id = NULL, failure_kind = 'orphaned'
+ WHERE state = 'active' AND repo = ?2",
+ rusqlite::params![now, &self.repo],
+ )?;
+
+ // Pending orphans → complete (matching current behavior;
+ // umykvluw changes this to failed separately)
+ db.execute(
+ "UPDATE runs SET state = 'complete', started_at_ms = ?1, finished_at_ms = ?1
+ WHERE state = 'pending' AND repo = ?2",
+ rusqlite::params![now, &self.repo],
+ )?;
Ok(())
}
}
-/// A CI run on disk.
+/// A CI run backed by a SQLite row.
///
-/// Owns the path to the run directory and the in-memory execution
-/// state used while driving a pipeline. Tracks current state so that
-/// `transition` can move the directory in one call.
+/// Owns the path to the database and the run's in-memory state cache.
+/// Reads and writes go through SQL. The run directory on disk holds
+/// the workspace and per-job log files.
pub struct Run {
- base: PathBuf,
- state: RunState,
+ db_path: PathBuf,
id: String,
+ state: RunState,
+ base_dir: PathBuf,
}
impl Run {
/// The resolved path to this run's directory on disk.
pub fn path(&self) -> PathBuf {
- self.base.join(self.state.dir_name()).join(&self.id)
+ self.base_dir.join(&self.id)
}
/// The run's ID.
@@ -307,23 +272,33 @@ impl Run {
self.state
}
- /// Open an existing run from disk.
- ///
- /// `state` is the directory the run is expected to be in (e.g.
- /// `pending/`, `active/`). Returns an error if `meta.yml` or
- /// `times.yml` are missing or unreadable.
- pub fn open(base: PathBuf, state: RunState, id: String) -> Result<Self> {
- let run = Self { base, state, id };
- run.read_meta()?;
- run.read_times()?;
- Ok(run)
+ /// Open an existing run from the database by ID.
+ pub fn open(db_path: PathBuf, id: String, base_dir: PathBuf) -> Result<Self> {
+ let db = crate::db::open(&db_path)?;
+ let state_str: String = db.query_row(
+ "SELECT state FROM runs WHERE id = ?1",
+ rusqlite::params![&id],
+ |row| row.get(0),
+ )?;
+ let state: RunState = state_str.parse().map_err(|_| {
+ std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ format!("invalid state in db: {state_str}"),
+ )
+ })?;
+ Ok(Self {
+ db_path,
+ id,
+ state,
+ base_dir,
+ })
}
/// Drive `pipeline` to completion through this run.
///
/// Consumes the pipeline, taking ownership of its Lua VM. Constructs
/// a fresh [`Runtime`] with `secrets`, the source outputs
- /// (`:quire/push` from `meta.yml`), and the per-job transitive-input
+ /// (`:quire/push` from metadata), and the per-job transitive-input
/// sets; installs it on the VM, topo-sorts the jobs, transitions
/// Pending → Active, then invokes each `run_fn` in dependency order
/// with the runtime handle as its sole argument. Returns a map of
@@ -331,9 +306,6 @@ impl Run {
/// `Complete` if every job's `run_fn` returned without error,
/// otherwise `Failed`.
///
- /// Source-ref filtering (e.g. running only `quire/push`-reachable
- /// jobs) is not yet implemented; for now every validated job runs.
- ///
/// Per-job logs are written to `jobs/<job-id>/log` inside the run
/// directory before the final state transition, so logs are
/// available for both successful and failed runs.
@@ -349,12 +321,10 @@ impl Run {
// Transition to Active *before* building/starting the
// container. The docker build can take a long time and
- // happens with the run in `active/` so the on-disk state
+ // happens with the run in `active` so the database state
// accurately reflects "this run is in progress." It also
- // pins `self.path()` for the lifetime of the run, so the
- // `container.yml` path captured by `DockerLifecycle` stays
- // valid until just before the final Complete/Failed
- // transition (where we explicitly drop the runtime first).
+ // means `container_id` is allowed (the CHECK constraint
+ // permits it in `active`).
self.transition(RunState::Active)?;
let executor_runtime = match self.build_executor_runtime(executor, workspace) {
@@ -412,10 +382,7 @@ impl Run {
// Drop the runtime *before* the final transition. In docker
// mode this fires `DockerLifecycle::drop`, which stamps
- // `container_stopped_at` in `<run-dir>/container.yml`. The
- // path it captured is still valid here (the run is in
- // active/); the subsequent transition moves the file into
- // place with the rest of the run dir.
+ // `container_stopped_at` in the database.
drop(rt_value);
let _ = lua; // release the Lua borrow tied to `runtime`.
drop(runtime);
@@ -433,9 +400,8 @@ impl Run {
}
/// Build the per-run container if `executor` is `Docker`, writing
- /// `container.yml` incrementally as each phase completes. Run must
- /// already be in `active/` so `self.path()` is stable for the
- /// lifetime of the returned [`DockerLifecycle`].
+ /// build and container timestamps to the database incrementally.
+ /// Run must already be in `active` so `container_id` is permitted.
fn build_executor_runtime(
&self,
executor: Executor,
@@ -449,38 +415,42 @@ impl Run {
}
// Build phase.
- let mut record = ContainerRecord {
- build_started_at: Some(Timestamp::now()),
- ..Default::default()
- };
- self.write_container_record(&record)?;
+ let now = Timestamp::now().as_millisecond();
+ let db = crate::db::open(&self.db_path)?;
+ db.execute(
+ "UPDATE runs SET build_started_at_ms = ?1 WHERE id = ?2",
+ rusqlite::params![now, &self.id],
+ )?;
let dockerfile = workspace.join(".quire/Dockerfile");
if !dockerfile.exists() {
return Err(Error::DockerfileMissing);
}
- let tag = format!("quire-ci/{}:{}", repo_segment(&self.base), self.id);
+ let tag = format!("quire-ci/{}:{}", repo_segment(&self.base_dir), self.id);
crate::ci::docker::docker_build(&dockerfile, workspace, &tag)?;
- record.image_tag = Some(tag.clone());
- record.build_finished_at = Some(Timestamp::now());
- self.write_container_record(&record)?;
+ let build_finished = Timestamp::now().as_millisecond();
+ db.execute(
+ "UPDATE runs SET image_tag = ?1, build_finished_at_ms = ?2 WHERE id = ?3",
+ rusqlite::params![&tag, build_finished, &self.id],
+ )?;
- // Start phase. The bind-mount target inside the
- // container doubles as the working directory for every
- // `(sh …)` invocation routed through `docker exec`.
+ // Start phase.
const WORK_DIR: &str = "/work";
let session =
crate::ci::docker::ContainerSession::start(&tag, workspace, WORK_DIR)?;
- record.container_id = Some(session.container_id.clone());
- record.container_started_at = Some(session.container_started_at);
- self.write_container_record(&record)?;
+ let container_started = session.container_started_at.as_millisecond();
+ db.execute(
+ "UPDATE runs SET container_id = ?1, container_started_at_ms = ?2 WHERE id = ?3",
+ rusqlite::params![&session.container_id, container_started, &self.id],
+ )?;
Ok(ExecutorRuntime::Docker(DockerLifecycle {
session,
- record_path: self.path().join("container.yml"),
+ db_path: self.db_path.clone(),
+ run_id: self.id.clone(),
work_dir: WORK_DIR.to_string(),
}))
}
@@ -490,10 +460,8 @@ impl Run {
/// Write per-job log files from the captured `(sh …)` outputs.
///
/// Creates `jobs/<job-id>/log.yml` in the run directory for each
- /// job that has outputs. The file contains a YAML list of `ShOutput`
- /// entries — command, exit code, stdout, stderr — one per `(sh …)`
- /// call. Written before the final state transition so logs are
- /// available for both successful and failed runs.
+ /// job that has outputs. Written before the final state transition
+ /// so logs are available for both successful and failed runs.
fn write_all_logs(&self, outputs: &HashMap<String, Vec<ShOutput>>) -> Result<()> {
for (job_id, sh_outputs) in outputs {
if sh_outputs.is_empty() {
@@ -508,13 +476,12 @@ impl Run {
/// Transition the run from its current state to a new state.
///
- /// Moves the run directory between state parent directories and stamps
- /// `started_at` (entering Active) or `finished_at` (entering Complete or
- /// Failed) on `times.yml`. Each timestamp is set at most once.
+ /// Executes a single `UPDATE` in the database, stamping
+ /// `started_at` (entering Active) or `finished_at` (entering
+ /// Complete or Failed) and clearing `container_id` on terminal
+ /// states. Each timestamp is set at most once.
pub fn transition(&mut self, to: RunState) -> Result<()> {
use RunState::*;
- // Allowed transitions. Pending->Complete is the orphan-reconcile
- // placeholder; everything else is the normal trigger lifecycle.
let allowed = matches!(
(self.state, to),
(Pending, Active) | (Pending, Complete) | (Active, Complete) | (Active, Failed)
@@ -526,75 +493,76 @@ impl Run {
});
}
- let src = self.path();
- let dst_parent = self.base.join(to.dir_name());
+ let now = Timestamp::now().as_millisecond();
+ let db = crate::db::open(&self.db_path)?;
- if !src.exists() {
- return Err(Error::Io(std::io::Error::new(
- std::io::ErrorKind::NotFound,
- format!("run directory not found: {}", src.display()),
- )));
- }
-
- fs_err::create_dir_all(&dst_parent)?;
- let dst = dst_parent.join(&self.id);
- fs_err::rename(&src, &dst)?;
- self.state = to;
-
- let mut times = self.read_times()?;
- let now = Timestamp::now();
+ // Build the SET clause dynamically based on the target state.
match to {
- RunState::Active if times.started_at.is_none() => times.started_at = Some(now),
- RunState::Complete | RunState::Failed if times.finished_at.is_none() => {
- times.finished_at = Some(now)
+ Active => {
+ db.execute(
+ "UPDATE runs SET state = 'active', started_at_ms = COALESCE(started_at_ms, ?1)
+ WHERE id = ?2",
+ rusqlite::params![now, &self.id],
+ )?;
+ }
+ Complete | Failed => {
+ db.execute(
+ "UPDATE runs SET state = ?1, \
+ started_at_ms = COALESCE(started_at_ms, ?2), \
+ finished_at_ms = COALESCE(finished_at_ms, ?3), \
+ container_id = NULL \
+ WHERE id = ?4",
+ rusqlite::params![to.as_str(), now, now, &self.id],
+ )?;
}
- _ => {} // cov-excl-line
+ _ => unreachable!("checked by allowed match above"),
}
- self.write_times(×)?;
- self.update_latest()?;
- Ok(())
- }
- /// Atomically update the `latest` symlink to point at this run.
- fn update_latest(&self) -> Result<()> {
- let latest = self.base.join("latest");
- let link_target = PathBuf::from(self.state.dir_name()).join(&self.id);
- let tmp_link = self.base.join(".tmp-latest");
- let _ = fs_err::remove_file(&tmp_link);
- std::os::unix::fs::symlink(&link_target, &tmp_link)?;
- let _ = fs_err::remove_file(&latest);
- fs_err::rename(&tmp_link, &latest)?;
+ self.state = to;
Ok(())
}
- /// Read the timestamps recorded for this run.
- pub fn read_times(&self) -> Result<RunTimes> {
- read_yaml(&self.path().join("times.yml"))
- }
-
/// Read the immutable metadata for this run.
pub fn read_meta(&self) -> Result<RunMeta> {
- read_yaml(&self.path().join("meta.yml"))
- }
-
- /// Update the timestamps for this run (atomic write).
- pub fn write_times(&self, times: &RunTimes) -> Result<()> {
- write_yaml(&self.path().join("times.yml"), times)
+ let db = crate::db::open(&self.db_path)?;
+ let (sha, ref_name, pushed_at_ms) = db.query_row(
+ "SELECT sha, ref_name, pushed_at_ms FROM runs WHERE id = ?1",
+ rusqlite::params![&self.id],
+ |row| {
+ let sha: String = row.get(0)?;
+ let ref_name: String = row.get(1)?;
+ let pushed_at_ms: i64 = row.get(2)?;
+ Ok((sha, ref_name, pushed_at_ms))
+ },
+ )?;
+ Ok(RunMeta {
+ sha,
+ r#ref: ref_name,
+ pushed_at: Timestamp::from_millisecond(pushed_at_ms)
+ .expect("db stores valid timestamps"),
+ })
}
- /// Read this run's `container.yml` record. Returns the deserialized
- /// `ContainerRecord`. Errors if the file is missing or malformed —
- /// callers should use `path().join("container.yml").exists()` if they
- /// want to handle the absent case as "host mode."
- pub fn read_container_record(&self) -> Result<ContainerRecord> {
- read_yaml(&self.path().join("container.yml"))
+ /// Read the `started_at` timestamp for this run, if set.
+ pub fn read_started_at(&self) -> Result<Option<Timestamp>> {
+ let db = crate::db::open(&self.db_path)?;
+ let ms: Option<i64> = db.query_row(
+ "SELECT started_at_ms FROM runs WHERE id = ?1",
+ rusqlite::params![&self.id],
+ |row| row.get(0),
+ )?;
+ Ok(ms.map(|m| Timestamp::from_millisecond(m).expect("valid timestamp")))
}
- /// Atomically write this run's `container.yml` record (temp file +
- /// rename). Each call replaces the file; partial fields are
- /// represented as `None` and skipped from the output.
- pub fn write_container_record(&self, record: &ContainerRecord) -> Result<()> {
- write_yaml(&self.path().join("container.yml"), record)
+ /// Read the `finished_at` timestamp for this run, if set.
+ pub fn read_finished_at(&self) -> Result<Option<Timestamp>> {
+ let db = crate::db::open(&self.db_path)?;
+ let ms: Option<i64> = db.query_row(
+ "SELECT finished_at_ms FROM runs WHERE id = ?1",
+ rusqlite::params![&self.id],
+ |row| row.get(0),
+ )?;
+ Ok(ms.map(|m| Timestamp::from_millisecond(m).expect("valid timestamp")))
}
}
@@ -670,12 +638,6 @@ pub(crate) fn write_yaml<T: serde::Serialize>(path: &Path, value: &T) -> Result<
Ok(())
}
-/// Read a deserializable value from a YAML file.
-fn read_yaml<T: serde::de::DeserializeOwned>(path: &Path) -> Result<T> {
- let f = fs_err::File::open(path)?;
- Ok(serde_yaml_ng::from_reader(std::io::BufReader::new(f))?)
-}
-
#[cfg(test)]
mod tests {
use super::*;
@@ -684,11 +646,16 @@ mod tests {
fn tmp_quire() -> (tempfile::TempDir, Quire) {
let dir = tempfile::tempdir().expect("tempdir");
let quire = Quire::new(dir.path().to_path_buf());
+ // Initialize the database.
+ let mut db = crate::db::open(&quire.db_path()).expect("init db");
+ crate::db::migrate(&mut db).expect("migrate db");
+ drop(db);
(dir, quire)
}
fn test_runs(quire: &Quire) -> Runs {
- Runs::new(quire.base_dir().join("runs").join("test.git"))
+ let base_dir = quire.base_dir().join("runs").join("test.git");
+ Runs::new(quire.db_path(), "test.git".to_string(), base_dir)
}
/// Materialize a workspace directory under the test Quire's base dir.
@@ -812,11 +779,17 @@ mod tests {
}
#[test]
- fn run_state_dir_name() {
- assert_eq!(RunState::Pending.dir_name(), "pending");
- assert_eq!(RunState::Active.dir_name(), "active");
- assert_eq!(RunState::Complete.dir_name(), "complete");
- assert_eq!(RunState::Failed.dir_name(), "failed");
+ fn run_state_round_trips() {
+ for state in [
+ RunState::Pending,
+ RunState::Active,
+ RunState::Complete,
+ RunState::Failed,
+ RunState::Superseded,
+ ] {
+ assert!(state.as_str().parse::<RunState>().is_ok());
+ }
+ assert!("unknown".parse::<RunState>().is_err());
}
#[test]
@@ -829,70 +802,47 @@ mod tests {
}
#[test]
- fn create_symlinks_latest() {
- let (_dir, quire) = tmp_quire();
- let runs = test_runs(&quire);
- let mut run = runs.create(&test_meta()).expect("create");
-
- let latest = runs.base.join("latest");
- assert!(latest.is_symlink(), "latest should be a symlink");
- let target = fs_err::read_link(&latest).expect("read link");
- assert_eq!(
- target,
- PathBuf::from(RunState::Pending.dir_name()).join(run.id())
- );
- assert!(latest.exists(), "latest should resolve to a real directory");
-
- // Symlink should follow through transitions.
- run.transition(RunState::Active).expect("to active");
- let target = fs_err::read_link(&latest).expect("read link");
- assert_eq!(
- target,
- PathBuf::from(RunState::Active.dir_name()).join(run.id())
- );
- assert!(latest.exists(), "latest should resolve after transition");
- }
-
- #[test]
- fn create_writes_files_in_pending() {
+ fn create_writes_row_in_pending_state() {
let (_dir, quire) = tmp_quire();
let runs = test_runs(&quire);
let run = runs.create(&test_meta()).expect("create");
- let path = run.path();
- assert!(path.exists(), "run directory should exist");
- assert!(path.join("meta.yml").exists());
- assert!(path.join("times.yml").exists());
assert_eq!(run.state(), RunState::Pending);
+ // Verify workspace directory was created.
+ let workspace = run.path().join("workspace");
+ assert!(workspace.exists(), "workspace directory should exist");
+
+ // Verify metadata round-trips through the DB.
let meta = run.read_meta().expect("read meta");
assert_eq!(meta.sha, "abc123");
- let state = run.read_times().expect("read state");
- assert!(state.started_at.is_none());
- assert!(state.finished_at.is_none());
+ // No started_at yet.
+ let started = run.read_started_at().expect("read started_at");
+ assert!(started.is_none());
+ let finished = run.read_finished_at().expect("read finished_at");
+ assert!(finished.is_none());
}
#[test]
- fn transition_moves_directory() {
+ fn transition_updates_state_in_db() {
let (_dir, quire) = tmp_quire();
let runs = test_runs(&quire);
let mut run = runs.create(&test_meta()).expect("create");
let id = run.id().to_string();
- let old_path = run.path();
run.transition(RunState::Active).expect("transition");
-
- assert!(!old_path.exists(), "pending dir should be gone");
assert_eq!(run.state(), RunState::Active);
- let new_path = run.path();
- assert!(new_path.exists(), "active dir should exist");
+ // Verify started_at was stamped.
+ let started = run.read_started_at().expect("read started_at");
+ assert!(started.is_some(), "started_at should be stamped");
- // Meta is byte-identical after move.
- let meta = run.read_meta().expect("read meta");
- assert_eq!(meta.sha, "abc123");
- assert_eq!(run.id(), id);
+ // Re-open the run and verify state persists.
+ let reopened =
+ Run::open(quire.db_path(), id.clone(), runs.base_dir.clone()).expect("reopen");
+ assert_eq!(reopened.state(), RunState::Active);
+ assert_eq!(reopened.id(), id);
}
#[test]
@@ -902,9 +852,9 @@ mod tests {
let mut run = runs.create(&test_meta()).expect("create");
run.transition(RunState::Active).expect("to active");
- let times = run.read_times().expect("read state");
- assert!(times.started_at.is_some(), "started_at should be stamped");
- assert!(times.finished_at.is_none());
+ let started = run.read_started_at().expect("read started_at");
+ assert!(started.is_some(), "started_at should be stamped");
+ assert!(run.read_finished_at().expect("read").is_none());
}
#[test]
@@ -917,14 +867,12 @@ mod tests {
completed
.transition(RunState::Complete)
.expect("to complete");
- let times = completed.read_times().expect("read state");
- assert!(times.finished_at.is_some());
+ assert!(completed.read_finished_at().expect("read").is_some());
let mut failed = runs.create(&test_meta()).expect("create");
failed.transition(RunState::Active).expect("to active");
failed.transition(RunState::Failed).expect("to failed");
- let failed_times = failed.read_times().expect("read state");
- assert!(failed_times.finished_at.is_some());
+ assert!(failed.read_finished_at().expect("read").is_some());
}
#[test]
@@ -953,35 +901,13 @@ mod tests {
let mut run = runs.create(&test_meta()).expect("create");
run.transition(RunState::Active).expect("to active");
- let active_times = run.read_times().expect("read state");
- let started = active_times.started_at;
+ let started = run.read_started_at().expect("read started_at");
run.transition(RunState::Complete).expect("to complete");
- let complete_times = run.read_times().expect("read state");
- assert_eq!(complete_times.started_at, started, "started_at preserved");
- }
-
- #[test]
- fn transition_keeps_existing_started_at() {
- let (_dir, quire) = tmp_quire();
- let runs = test_runs(&quire);
- let mut run = runs.create(&test_meta()).expect("create");
-
- // Pre-stamp started_at before transitioning to Active.
- let pre: Timestamp = "2026-04-28T12:00:00Z".parse().unwrap();
- run.write_times(&RunTimes {
- started_at: Some(pre),
- finished_at: None,
- })
- .expect("write times");
-
- run.transition(RunState::Active).expect("to active");
-
- let times = run.read_times().expect("read times");
assert_eq!(
- times.started_at,
- Some(pre),
- "should keep pre-set started_at"
+ run.read_started_at().expect("read"),
+ started,
+ "started_at preserved"
);
}
@@ -995,19 +921,6 @@ mod tests {
run.transition(RunState::Complete).expect("to complete");
assert_eq!(run.state(), RunState::Complete);
- assert!(run.path().exists());
- }
-
- #[test]
- fn transition_errors_on_missing_source() {
- let mut run = Run {
- base: PathBuf::from("/tmp/quire-test-runs/test.git"),
- state: RunState::Pending,
- id: uuid::Uuid::now_v7().to_string(),
- };
-
- let result = run.transition(RunState::Active);
- assert!(result.is_err());
}
#[test]
@@ -1039,6 +952,7 @@ mod tests {
let (_dir, quire) = tmp_quire();
let runs = test_runs(&quire);
let mut run = runs.create(&test_meta()).expect("create");
+ run.transition(RunState::Active).expect("transition");
run.transition(RunState::Complete).expect("transition");
let orphans = runs.scan_orphans().expect("scan");
@@ -1046,69 +960,12 @@ mod tests {
}
#[test]
- fn scan_orphans_quarantines_unreadable_runs() {
- let (_dir, quire) = tmp_quire();
- let base = quire.base_dir().join("runs").join("test.git");
- let runs = Runs::new(base.clone());
-
- // Create a run, then break it by removing meta.yml.
- let run = runs.create(&test_meta()).expect("create");
- let id = run.id().to_string();
- fs_err::remove_file(run.path().join("meta.yml")).expect("remove meta");
-
- let orphans = runs.scan_orphans().expect("scan");
- assert!(orphans.is_empty(), "broken run should not be returned");
-
- let pending = base.join(RunState::Pending.dir_name()).join(&id);
- assert!(!pending.exists(), "broken run should leave pending/");
-
- let failed = base.join(RunState::Failed.dir_name()).join(&id);
- assert!(failed.exists(), "broken run should land in failed/");
- }
-
- #[test]
- fn scan_orphans_empty_when_no_runs_dir() {
+ fn scan_orphans_empty_when_no_runs() {
let (_dir, quire) = tmp_quire();
let runs = test_runs(&quire);
assert!(runs.scan_orphans().expect("scan").is_empty());
}
- #[test]
- fn scan_orphans_skips_dot_prefixed_entries() {
- let (_dir, quire) = tmp_quire();
- let runs = test_runs(&quire);
- let run = runs.create(&test_meta()).expect("create");
-
- // Drop a dot-prefixed directory into pending/ alongside the real run.
- let pending_dir = runs.base.join(RunState::Pending.dir_name());
- fs_err::create_dir_all(pending_dir.join(".tmp-stale")).expect("mkdir dot");
-
- let orphans = runs.scan_orphans().expect("scan");
- assert_eq!(orphans.len(), 1);
- assert_eq!(orphans[0].id(), run.id());
- }
-
- #[test]
- fn write_times_updates_in_place() {
- let (_dir, quire) = tmp_quire();
- let runs = test_runs(&quire);
- let run = runs.create(&test_meta()).expect("create");
-
- let started: Timestamp = "2026-04-28T12:00:01Z".parse().expect("parse");
- run.write_times(&RunTimes {
- started_at: Some(started),
- finished_at: None,
- })
- .expect("write state");
-
- let loaded = run.read_times().expect("read state");
- assert_eq!(loaded.started_at, Some(started));
-
- // Meta is unchanged.
- let loaded_meta = run.read_meta().expect("read meta");
- assert_eq!(loaded_meta, test_meta());
- }
-
#[test]
fn reconcile_completes_pending_orphans() {
let (_dir, quire) = tmp_quire();
@@ -1119,10 +976,8 @@ mod tests {
runs.reconcile_orphans().expect("reconcile");
// Pending orphan should be moved to complete.
- let completed = runs.base.join(RunState::Complete.dir_name()).join(&id);
- assert!(completed.exists(), "orphan should be in complete/");
- let pending = runs.base.join(RunState::Pending.dir_name()).join(&id);
- assert!(!pending.exists(), "orphan should not be in pending/");
+ let reopened = Run::open(quire.db_path(), id, runs.base_dir.clone()).expect("reopen");
+ assert_eq!(reopened.state(), RunState::Complete);
}
#[test]
@@ -1135,9 +990,8 @@ mod tests {
runs.reconcile_orphans().expect("reconcile");
- // Active orphan should be moved to failed.
- let failed = runs.base.join(RunState::Failed.dir_name()).join(&id);
- assert!(failed.exists(), "orphan should be in failed/");
+ let reopened = Run::open(quire.db_path(), id, runs.base_dir.clone()).expect("reopen");
+ assert_eq!(reopened.state(), RunState::Failed);
}
fn load(source: &str) -> Pipeline {
@@ -1200,9 +1054,9 @@ mod tests {
)
.expect("execute");
- // Verify the run landed in complete/ on disk.
- let completed = runs.base.join(RunState::Complete.dir_name()).join(&run_id);
- assert!(completed.exists(), "run should be in complete/");
+ // Verify the run landed in complete in the DB.
+ let reopened = Run::open(quire.db_path(), run_id, runs.base_dir.clone()).expect("reopen");
+ assert_eq!(reopened.state(), RunState::Complete);
let a = &outputs["a"];
let b = &outputs["b"];
@@ -1214,8 +1068,6 @@ mod tests {
#[test]
fn execute_runs_jobs_in_topo_order() {
- // `b` depends on `a`, but the registration order puts `b` first.
- // Topo-sorted execution must run `a` before `b`.
let (_dir, quire) = tmp_quire();
let runs = test_runs(&quire);
let run = runs.create(&test_meta()).expect("create");
@@ -1267,9 +1119,9 @@ mod tests {
.expect_err("expected failure");
assert!(matches!(err, Error::JobFailed { ref job, .. } if job == "a"));
- // Verify the run landed in failed/ on disk.
- let failed = runs.base.join(RunState::Failed.dir_name()).join(&run_id);
- assert!(failed.exists(), "run should be in failed/");
+ // Verify the run is failed in the DB.
+ let reopened = Run::open(quire.db_path(), run_id, runs.base_dir.clone()).expect("reopen");
+ assert_eq!(reopened.state(), RunState::Failed);
}
#[test]
@@ -1303,8 +1155,6 @@ mod tests {
#[test]
fn jobs_returns_quire_push_outputs_through_transitive_input() {
- // `b` depends on `a` which depends on `:quire/push`; `b` reads
- // `:quire/push` directly even though it's not a direct input.
let (_dir, quire) = tmp_quire();
let runs = test_runs(&quire);
let run = runs.create(&test_meta()).expect("create");
@@ -1366,7 +1216,6 @@ mod tests {
#[test]
fn jobs_errors_on_non_ancestor_job() {
- // `peer` exists as a job but isn't an ancestor of `grab`.
let (_dir, quire) = tmp_quire();
let runs = test_runs(&quire);
let run = runs.create(&test_meta()).expect("create");
@@ -1432,7 +1281,6 @@ mod tests {
let runs = test_runs(&quire);
let run = runs.create(&test_meta()).expect("create");
- // `a` does nothing, `b` reads `a`'s outputs — should get nil.
let pipeline = load(
r#"(local ci (require :quire.ci))
(ci.job :a [:quire/push] (fn [_] nil))
@@ -1478,8 +1326,7 @@ mod tests {
.expect("execute");
let log_path = runs
- .base
- .join(RunState::Complete.dir_name())
+ .base_dir
.join(&run_id)
.join("jobs")
.join("greet")
@@ -1518,8 +1365,8 @@ mod tests {
Executor::Host,
);
- let failed_dir = runs.base.join(RunState::Failed.dir_name()).join(&run_id);
- assert!(failed_dir.exists(), "run should be in failed/");
+ let failed_dir = runs.base_dir.join(&run_id);
+ assert!(failed_dir.exists(), "run directory should exist");
let log_path = failed_dir.join("jobs").join("a").join("log.yml");
assert!(
@@ -1633,28 +1480,6 @@ mod tests {
);
}
- #[test]
- fn container_record_round_trips_through_yaml() {
- let (_dir, quire) = tmp_quire();
- let runs = test_runs(&quire);
- let run = runs.create(&test_meta()).expect("create");
-
- let now: Timestamp = "2026-05-04T16:20:01Z".parse().expect("parse");
- let later: Timestamp = "2026-05-04T16:21:09Z".parse().expect("parse");
- let record = ContainerRecord {
- image_tag: Some("quire-ci/test:run-id".into()),
- container_id: Some("9f3b8a72c1d4".into()),
- build_started_at: Some(now),
- build_finished_at: Some(later),
- container_started_at: Some(later),
- container_stopped_at: None,
- };
- run.write_container_record(&record).expect("write");
-
- let read = run.read_container_record().expect("read");
- assert_eq!(read, record);
- }
-
#[test]
fn repo_segment_returns_final_component() {
assert_eq!(repo_segment(Path::new("runs/test.git")), "test.git");
@@ -1667,12 +1492,8 @@ mod tests {
#[test]
fn repo_segment_sanitizes_for_docker_tags() {
- // Docker rejects tags whose component starts with `.` or `-` —
- // tempdir names produced by `tempfile::tempdir()` start with `.`.
assert_eq!(repo_segment(Path::new("/tmp/.tmpAbCdEf")), "tmpabcdef");
- // Uppercase is rejected; lowercase fine.
assert_eq!(repo_segment(Path::new("MyRepo.git")), "myrepo.git");
- // Other invalid characters become underscores.
assert_eq!(
repo_segment(Path::new("repo with spaces")),
"repo_with_spaces"
@@ -1686,7 +1507,6 @@ mod tests {
return;
}
- // Build a real git repo with a Dockerfile committed at HEAD.
let dir = tempfile::tempdir().expect("tempdir");
let src_repo = dir.path().join("src");
fs_err::create_dir_all(&src_repo).expect("mkdir src");
@@ -1746,9 +1566,6 @@ mod tests {
let run = runs.create(&meta).expect("create");
let run_id = run.id().to_string();
- // Run `uname -s` inside the container. On macOS `uname -s`
- // returns `Darwin`; getting `Linux` back proves the command
- // ran inside the alpine container, not on the host.
let pipeline = load(
r#"(local ci (require :quire.ci))
(ci.job :probe [:quire/push] (fn [{: sh}] (sh ["uname" "-s"])))"#,
@@ -1773,21 +1590,31 @@ mod tests {
probe[0].stdout,
);
- // Verify container.yml was written with all fields.
- let complete = runs.base.join(RunState::Complete.dir_name()).join(&run_id);
- let record_path = complete.join("container.yml");
- assert!(record_path.exists(), "container.yml should exist");
- let record: ContainerRecord =
- serde_yaml_ng::from_str(&fs_err::read_to_string(&record_path).unwrap()).unwrap();
- assert!(record.image_tag.is_some());
- assert!(record.container_id.is_some());
- assert!(record.build_started_at.is_some());
- assert!(record.build_finished_at.is_some());
- assert!(record.container_started_at.is_some());
- assert!(record.container_stopped_at.is_some());
+ // Verify container metadata was written to the DB.
+ let db = crate::db::open(&quire.db_path()).expect("open db");
+ let image_tag: Option<String> = db
+ .query_row(
+ "SELECT image_tag FROM runs WHERE id = ?1",
+ rusqlite::params![&run_id],
+ |row| row.get(0),
+ )
+ .expect("query");
+ assert!(image_tag.is_some(), "image_tag should be set");
+
+ let container_stopped_ms: Option<i64> = db
+ .query_row(
+ "SELECT container_stopped_at_ms FROM runs WHERE id = ?1",
+ rusqlite::params![&run_id],
+ |row| row.get(0),
+ )
+ .expect("query");
+ assert!(
+ container_stopped_ms.is_some(),
+ "container_stopped_at_ms should be set"
+ );
// Cleanup the image we built.
- if let Some(tag) = record.image_tag {
+ if let Some(tag) = image_tag {
let _ = std::process::Command::new("docker")
.args(["image", "rm", &tag])
.output();
diff --git a/src/db.rs b/src/db.rs
new file mode 100644
index 0000000..39d6f9c
--- /dev/null
+++ b/src/db.rs
@@ -0,0 +1,57 @@
+//! Database connection management and migration runner.
+//!
+//! [`open`] creates a connection with WAL mode and foreign keys enabled.
+//! [`migrate`] runs pending migrations — call once at server startup.
+
+use std::path::Path;
+
+use rusqlite::Connection;
+use rusqlite_migration::{M, Migrations};
+
+/// The ordered set of schema migrations. Append-only — never edit
+/// a migration that has already shipped.
+static MIGRATIONS: std::sync::LazyLock<Migrations<'static>> = std::sync::LazyLock::new(|| {
+ Migrations::new(vec![M::up(include_str!("../migrations/0001_initial.sql"))])
+});
+
+/// Error from running migrations.
+#[derive(Debug, thiserror::Error)]
+pub enum MigrationError {
+ #[error(transparent)]
+ Sqlite(#[from] rusqlite::Error),
+ #[error("migration error: {0}")]
+ Migration(#[source] rusqlite_migration::Error),
+}
+
+impl From<rusqlite_migration::Error> for MigrationError {
+ fn from(e: rusqlite_migration::Error) -> Self {
+ Self::Migration(e)
+ }
+}
+
+/// Open the database at `path`, enable WAL mode and foreign keys.
+/// Creates the file if it doesn't exist.
+///
+/// Does not run migrations. Call [`migrate`] once at server startup.
+pub fn open(path: &Path) -> Result<Connection, rusqlite::Error> {
+ let conn = Connection::open(path)?;
+ conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA foreign_keys = ON;")?;
+ Ok(conn)
+}
+
+/// Run any pending migrations on the given connection.
+///
+/// Call once at server startup, after [`open`].
+pub fn migrate(conn: &mut Connection) -> Result<(), MigrationError> {
+ MIGRATIONS.to_latest(conn)?;
+ Ok(())
+}
+
+/// Open an in-memory database with migrations applied (for tests).
+#[cfg(test)]
+pub fn open_in_memory() -> Result<Connection, MigrationError> {
+ let mut conn = Connection::open_in_memory()?;
+ conn.execute_batch("PRAGMA foreign_keys = ON;")?;
+ MIGRATIONS.to_latest(&mut conn)?;
+ Ok(conn)
+}
diff --git a/src/error.rs b/src/error.rs
index b469ed9..dd6f57f 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -23,6 +23,9 @@ pub enum Error {
#[error(transparent)]
Secret(#[from] secret::Error),
+ #[error(transparent)]
+ Sql(#[from] rusqlite::Error),
+
#[error(transparent)]
Yaml(#[from] serde_yaml_ng::Error),
diff --git a/src/lib.rs b/src/lib.rs
index 69dc499..ab7b623 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,5 @@
pub mod ci;
+pub mod db;
mod error;
pub mod event;
pub mod fennel;
diff --git a/src/quire.rs b/src/quire.rs
index 29295be..c750661 100644
--- a/src/quire.rs
+++ b/src/quire.rs
@@ -129,8 +129,12 @@ impl Repo {
}
/// Access CI runs for this repo.
- pub fn runs(&self) -> Runs {
- Runs::new(self.runs_base())
+ pub fn runs(&self, db_path: &Path) -> Runs {
+ Runs::new(
+ db_path.to_path_buf(),
+ self.name().to_string(),
+ self.runs_base(),
+ )
}
}
@@ -167,6 +171,10 @@ impl Quire {
self.base_dir.join("config.fnl")
}
+ pub fn db_path(&self) -> PathBuf {
+ self.base_dir.join("quire.db")
+ }
+
pub fn socket_path(&self) -> PathBuf {
self.base_dir.join("server.sock")
}