Add sh start/finish callback hook to Runtime
Lets observers see each sh call's lifecycle in real time, instead of
having to wait for take_outputs at the end of a run. The callback fires
ShEvent::Started before spawn and ShEvent::Finished after exit, gated
on a current job. Defaults to a no-op closure so the host path keeps
working unchanged; quire-ci will install a real one to drive its
event-stream output.

Assisted-by: claude-opus-4-7
change pnnptlnyssnrnportyptzowpvounsyks
commit 56d5b7091d78fd349e65b52cb74cbfd2aede6f9b
author Alpha Chen <alpha@kejadlen.dev>
date
parent uuqlurnw
diff --git a/quire-core/src/ci/runtime.rs b/quire-core/src/ci/runtime.rs
index fed40f2..5ce6d1c 100644
--- a/quire-core/src/ci/runtime.rs
+++ b/quire-core/src/ci/runtime.rs
@@ -52,6 +52,34 @@ pub type RuntimeResult<T> = std::result::Result<T, RuntimeError>;
 /// Per-sh timing: (index, started_at, finished_at).
 pub type ShTimings = Vec<(usize, Timestamp, Timestamp)>;
 
+/// Lifecycle events fired by [`Runtime::sh`] when a callback is
+/// installed via [`Runtime::set_sh_callback`]. Carries borrowed strings
+/// so the hot path doesn't allocate on the way through.
+#[derive(Debug)]
+pub enum ShEvent<'a> {
+    Started {
+        job_id: &'a str,
+        n: usize,
+        started_at: Timestamp,
+        cmd: &'a str,
+    },
+    Finished {
+        job_id: &'a str,
+        n: usize,
+        finished_at: Timestamp,
+        exit: i32,
+    },
+}
+
+/// A callback that observes [`Runtime::sh`] lifecycle events.
+pub type ShCallback = Box<dyn FnMut(ShEvent<'_>)>;
+
+/// The default callback installed on a fresh [`Runtime`]: drops every
+/// event. Replaced via [`Runtime::set_sh_callback`].
+fn noop_sh_callback() -> ShCallback {
+    Box::new(|_| {})
+}
+
 /// Per-execution runtime: owns the Lua VM, holds the secrets exposed
 /// to the job, the per-job `(jobs name)` views, the current-job
 /// cursor, and the per-job captured `sh` outputs.
@@ -87,6 +115,10 @@ pub struct Runtime {
     pub sh_timings: RefCell<HashMap<String, ShTimings>>,
     /// Per-job sh call counter for assigning sequential indices.
     sh_counter: RefCell<HashMap<String, usize>>,
+    /// Observer notified when an sh call starts and finishes. Defaults
+    /// to a no-op; callers install a real one via
+    /// [`Runtime::set_sh_callback`].
+    sh_callback: RefCell<ShCallback>,
     /// The materialized workspace for this run. Every `(sh …)` call
     /// runs here.
     workspace: std::path::PathBuf,
@@ -150,6 +182,7 @@ impl Runtime {
             outputs: RefCell::new(HashMap::new()),
             sh_timings: RefCell::new(HashMap::new()),
             sh_counter: RefCell::new(HashMap::new()),
+            sh_callback: RefCell::new(noop_sh_callback()),
             workspace,
         }
     }
@@ -200,6 +233,20 @@ impl Runtime {
         std::mem::take(&mut *self.sh_timings.borrow_mut())
     }
 
+    /// Install an observer for sh lifecycle events. The callback fires
+    /// once before each sh process spawns ([`ShEvent::Started`]) and
+    /// once after it exits ([`ShEvent::Finished`]), but only when an
+    /// `enter_job`/`leave_job` window is open — sh calls outside a job
+    /// are not observed, mirroring the recording behavior.
+    ///
+    /// Replaces the previously installed callback (the default is a
+    /// no-op). The callback must not call back into `Runtime::sh`
+    /// (re-entrant borrow on the callback slot) or into
+    /// `take_outputs` / `take_sh_timings` mid-run.
+    pub fn set_sh_callback(&self, callback: ShCallback) {
+        *self.sh_callback.borrow_mut() = callback;
+    }
+
     /// Resolve a declared secret by name, caching it for redaction.
     /// Errors if the name isn't declared or the secret's source
     /// can't be read.
@@ -216,9 +263,36 @@ impl Runtime {
     /// Run `cmd` with `opts` and record its output against the
     /// current job (if one is active). Non-zero exits come back in
     /// `:exit`, not as `Err`.
+    ///
+    /// Fires `ShEvent::Started` before the spawn and
+    /// `ShEvent::Finished` after exit when an sh callback has been
+    /// installed via [`Self::set_sh_callback`] *and* a job is current.
+    /// Callbacks must not re-enter `sh`.
     pub fn sh(&self, cmd: Cmd, opts: ShOpts) -> RuntimeResult<ShOutput> {
         let started_at = Timestamp::now();
         let program = cmd.program().to_string();
+
+        // Reserve (job_id, n) up-front when a job is current; we need
+        // `n` to label the Started callback. Increment runs before the
+        // spawn so a panic in run() still leaves a consistent counter.
+        let job_id_n: Option<(String, usize)> = self.current_job.borrow().as_ref().map(|job| {
+            let mut counter = self.sh_counter.borrow_mut();
+            let entry = counter.entry(job.clone()).or_insert(1);
+            let n = *entry;
+            *entry += 1;
+            (job.clone(), n)
+        });
+
+        if let Some((job_id, n)) = &job_id_n {
+            let cmd_display = redact(&cmd.to_string(), &self.registry.borrow());
+            (self.sh_callback.borrow_mut())(ShEvent::Started {
+                job_id,
+                n: *n,
+                started_at,
+                cmd: &cmd_display,
+            });
+        }
+
         let output =
             cmd.run(opts, &self.workspace)
                 .map_err(|e| RuntimeError::CommandSpawnFailed {
@@ -227,17 +301,18 @@ impl Runtime {
                     source: e,
                 })?;
         let finished_at = Timestamp::now();
-        if let Some(job) = self.current_job.borrow().as_ref() {
-            let n = {
-                let mut counter = self.sh_counter.borrow_mut();
-                let entry = counter.entry(job.clone()).or_insert(1);
-                let n = *entry;
-                *entry += 1;
-                n
-            };
+
+        if let Some((job_id, n)) = &job_id_n {
+            (self.sh_callback.borrow_mut())(ShEvent::Finished {
+                job_id,
+                n: *n,
+                finished_at,
+                exit: output.exit,
+            });
+
             self.outputs
                 .borrow_mut()
-                .entry(job.clone())
+                .entry(job_id.clone())
                 .or_default()
                 .push({
                     let reg = self.registry.borrow();
@@ -250,10 +325,11 @@ impl Runtime {
                 });
             self.sh_timings
                 .borrow_mut()
-                .entry(job.clone())
+                .entry(job_id.clone())
                 .or_default()
-                .push((n, started_at, finished_at));
+                .push((*n, started_at, finished_at));
         }
+
         Ok(output)
     }
 }
@@ -272,6 +348,7 @@ impl Runtime {
             outputs: RefCell::new(HashMap::new()),
             sh_timings: RefCell::new(HashMap::new()),
             sh_counter: RefCell::new(HashMap::new()),
+            sh_callback: RefCell::new(noop_sh_callback()),
             workspace: std::env::current_dir().expect("cwd"),
         }
     }
@@ -562,6 +639,75 @@ mod tests {
         );
     }
 
+    #[test]
+    fn sh_callback_fires_started_then_finished() {
+        let received: Rc<RefCell<Vec<String>>> = Rc::new(RefCell::new(Vec::new()));
+        let received_clone = received.clone();
+
+        let (runtime, run_fn) = rt(
+            r#"(local ci (require :quire.ci))
+(ci.job :go [:quire/push] (fn [{: sh}] (sh ["echo" "hi"])))"#,
+            HashMap::new(),
+        );
+        runtime.set_sh_callback(Box::new(move |event| match event {
+            ShEvent::Started { job_id, n, cmd, .. } => {
+                received_clone
+                    .borrow_mut()
+                    .push(format!("started:{job_id}:{n}:{cmd}"));
+            }
+            ShEvent::Finished {
+                job_id, n, exit, ..
+            } => {
+                received_clone
+                    .borrow_mut()
+                    .push(format!("finished:{job_id}:{n}:{exit}"));
+            }
+        }));
+        *runtime.current_job.borrow_mut() = Some("go".to_string());
+
+        let handle = RuntimeHandle(runtime.clone())
+            .into_lua(runtime.lua())
+            .expect("install runtime");
+        let _: mlua::Value = run_fn.call(handle).expect("sh call");
+
+        let calls = received.borrow();
+        assert_eq!(calls.len(), 2, "expected 2 events, got: {calls:?}");
+        assert!(
+            calls[0].starts_with("started:go:1:"),
+            "started event shape: {}",
+            calls[0]
+        );
+        assert!(
+            calls[0].contains("echo"),
+            "started should carry cmd: {}",
+            calls[0]
+        );
+        assert_eq!(calls[1], "finished:go:1:0");
+    }
+
+    #[test]
+    fn sh_callback_not_fired_without_current_job() {
+        let count = Rc::new(RefCell::new(0u32));
+        let count_clone = count.clone();
+
+        let (runtime, run_fn) = rt(
+            r#"(local ci (require :quire.ci))
+(ci.job :go [:quire/push] (fn [{: sh}] (sh ["echo" "hi"])))"#,
+            HashMap::new(),
+        );
+        runtime.set_sh_callback(Box::new(move |_event| {
+            *count_clone.borrow_mut() += 1;
+        }));
+        // No enter_job — current_job stays None.
+
+        let handle = RuntimeHandle(runtime.clone())
+            .into_lua(runtime.lua())
+            .expect("install runtime");
+        let _: mlua::Value = run_fn.call(handle).expect("sh call");
+
+        assert_eq!(*count.borrow(), 0, "callback should not fire outside a job");
+    }
+
     /// Build a pipeline whose single job's run-fn invokes `(sh …)`,
     /// invoke it with the runtime handle, and decode the resulting Lua
     /// table as ShOutput.