Skip to content

Commit 6d03bc0

Browse files
branchseerclaude
andcommitted
feat(cache): add output globs for cache restoration
Adds an `output` field to cached tasks: archives files matching the configured globs after a successful run and restores them on cache hit. Supports glob patterns, negative patterns, and `{pattern, base}` form with explicit base directory. When `output` is omitted or empty, no output archiving happens (matches prior behavior). Schema version bumped to 12 (CacheEntryKey carries output_config, CacheEntryValue carries output_archive). Old caches are reset on upgrade. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 868fdb1 commit 6d03bc0

96 files changed

Lines changed: 1570 additions & 43 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Changelog
22

3+
- **Added** `output` field for cached tasks: archives output files matching the configured globs after a successful run and restores them on cache hit. Patterns are relative to the package directory; supports negative patterns (e.g. `"!dist/cache/**"`) and `{pattern, base}` form for explicit base. ([#321](https://github.com/voidzero-dev/vite-task/pull/321))
34
- **Fixed** Windows cached tasks can now run package shims rewritten through PowerShell; default env passthrough now preserves `PATHEXT` ([#366](https://github.com/voidzero-dev/vite-task/pull/366))
45
- **Added** Platform support for targets without `input` auto-inference (e.g. Android). Tasks still run; those relying on auto-inference run uncached, with the summary noting that `input` must be configured manually to enable caching ([#352](https://github.com/voidzero-dev/vite-task/pull/352))
56
- **Fixed** `vp run` no longer aborts with `failed to prepare the command for injection: Invalid argument` when the user environment already has `LD_PRELOAD` (Linux) or `DYLD_INSERT_LIBRARIES` (macOS) set. The tracer shim is now appended to any existing value and placed last, so user preloads keep their symbol-interposition precedence ([#340](https://github.com/voidzero-dev/vite-task/issues/340))

Cargo.lock

Lines changed: 43 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ winsafe = { version = "0.0.27", features = ["kernel"] }
163163
xxhash-rust = { version = "0.8.15", features = ["const_xxh3"] }
164164
ntest = "0.9.5"
165165
terminal_size = "0.4"
166+
zstd = "0.13"
166167

167168
[workspace.metadata.cargo-shear]
168169
ignored = [

crates/vite_task/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ rustc-hash = { workspace = true }
2929
serde = { workspace = true, features = ["derive", "rc"] }
3030
serde_json = { workspace = true }
3131
thiserror = { workspace = true }
32+
tar = { workspace = true }
3233
tokio = { workspace = true, features = [
3334
"rt-multi-thread",
3435
"io-std",
@@ -40,13 +41,15 @@ tokio = { workspace = true, features = [
4041
tokio-util = { workspace = true }
4142
tracing = { workspace = true }
4243
twox-hash = { workspace = true }
44+
uuid = { workspace = true, features = ["v4"] }
4345
vite_path = { workspace = true }
4446
vite_select = { workspace = true }
4547
vite_str = { workspace = true }
4648
vite_task_graph = { workspace = true }
4749
vite_task_plan = { workspace = true }
4850
vite_workspace = { workspace = true }
4951
wax = { workspace = true }
52+
zstd = { workspace = true }
5053

5154
[dev-dependencies]
5255
tempfile = { workspace = true }
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
//! Output archive creation and extraction using tar + zstd compression.
2+
3+
use std::fs::File;
4+
5+
use vite_path::{AbsolutePath, RelativePathBuf};
6+
7+
/// Create a tar.zst archive from workspace-relative output file paths.
8+
///
9+
/// Files that no longer exist are silently skipped (the task may delete
10+
/// temporary files during execution).
11+
///
12+
/// # Errors
13+
///
14+
/// Returns an error if creating the archive file or adding entries fails.
15+
pub fn create_output_archive(
16+
workspace_root: &AbsolutePath,
17+
output_files: &[RelativePathBuf],
18+
archive_path: &AbsolutePath,
19+
) -> anyhow::Result<()> {
20+
let file = File::create(archive_path.as_path())?;
21+
let encoder = zstd::Encoder::new(file, 0)?.auto_finish();
22+
let mut builder = tar::Builder::new(encoder);
23+
24+
for rel_path in output_files {
25+
let abs_path = workspace_root.join(rel_path);
26+
// Skip files that no longer exist (task may delete temp files)
27+
if !abs_path.as_path().exists() {
28+
continue;
29+
}
30+
let metadata = std::fs::metadata(abs_path.as_path())?;
31+
if metadata.is_file() {
32+
let mut file = File::open(abs_path.as_path())?;
33+
let mut header = tar::Header::new_gnu();
34+
header.set_metadata(&metadata);
35+
header.set_cksum();
36+
builder.append_data(&mut header, rel_path.as_str(), &mut file)?;
37+
}
38+
}
39+
40+
builder.finish()?;
41+
Ok(())
42+
}
43+
44+
/// Extract a tar.zst archive, restoring files relative to workspace root.
45+
///
46+
/// Parent directories are created automatically. Existing files are overwritten.
47+
///
48+
/// # Errors
49+
///
50+
/// Returns an error if opening the archive or extracting entries fails.
51+
pub fn extract_output_archive(
52+
workspace_root: &AbsolutePath,
53+
archive_path: &AbsolutePath,
54+
) -> anyhow::Result<()> {
55+
let file = File::open(archive_path.as_path())?;
56+
let decoder = zstd::Decoder::new(file)?;
57+
let mut archive = tar::Archive::new(decoder);
58+
59+
archive.unpack(workspace_root.as_path())?;
60+
Ok(())
61+
}

crates/vite_task/src/session/cache/display.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ pub fn format_cache_status_inline(cache_status: &CacheStatus) -> Option<Str> {
174174
}
175175
}
176176
FingerprintMismatch::InputConfig => "input configuration changed",
177+
FingerprintMismatch::OutputConfig => "output configuration changed",
177178
FingerprintMismatch::InputChanged { kind, path } => {
178179
let desc = format_input_change_str(*kind, path.as_str());
179180
return Some(vite_str::format!("○ cache miss: {desc}, executing"));

crates/vite_task/src/session/cache/mod.rs

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Execution cache for storing and retrieving cached command results.
22
3+
pub mod archive;
34
pub mod display;
45

56
use std::{collections::BTreeMap, fmt::Display, fs::File, io::Write, sync::Arc, time::Duration};
@@ -14,7 +15,8 @@ use rusqlite::{Connection, OptionalExtension as _, config::DbConfig};
1415
use serde::{Deserialize, Serialize};
1516
use tokio::sync::Mutex;
1617
use vite_path::{AbsolutePath, RelativePathBuf};
17-
use vite_task_graph::config::ResolvedInputConfig;
18+
use vite_str::Str;
19+
use vite_task_graph::config::ResolvedGlobConfig;
1820
use vite_task_plan::cache_metadata::{CacheMetadata, ExecutionCacheKey, SpawnFingerprint};
1921
use wincode::{
2022
SchemaRead, SchemaReadOwned, SchemaWrite,
@@ -43,14 +45,18 @@ pub struct CacheEntryKey {
4345
pub spawn_fingerprint: SpawnFingerprint,
4446
/// Resolved input configuration that affects cache behavior.
4547
/// Glob patterns are workspace-root-relative.
46-
pub input_config: ResolvedInputConfig,
48+
pub input_config: ResolvedGlobConfig,
49+
/// Resolved output configuration that affects cache restoration.
50+
/// Glob patterns are workspace-root-relative.
51+
pub output_config: ResolvedGlobConfig,
4752
}
4853

4954
impl CacheEntryKey {
5055
fn from_metadata(cache_metadata: &CacheMetadata) -> Self {
5156
Self {
5257
spawn_fingerprint: cache_metadata.spawn_fingerprint.clone(),
5358
input_config: cache_metadata.input_config.clone(),
59+
output_config: cache_metadata.output_config.clone(),
5460
}
5561
}
5662
}
@@ -103,6 +109,9 @@ pub struct CacheEntryValue {
103109
/// Path is relative to workspace root, value is `xxHash3_64` of file content.
104110
/// Stored in the value (not the key) so changes can be detected and reported.
105111
pub globbed_inputs: BTreeMap<RelativePathBuf, u64>,
112+
/// Filename of the output archive (e.g. `{uuid}.tar.zst`) stored alongside
113+
/// `cache.db` in the cache directory. `None` if no output files were produced.
114+
pub output_archive: Option<Str>,
106115
}
107116

108117
#[derive(Debug)]
@@ -142,6 +151,8 @@ pub enum FingerprintMismatch {
142151
},
143152
/// Found a previous cache entry key for the same task, but `input_config` differs.
144153
InputConfig,
154+
/// Found a previous cache entry key for the same task, but `output_config` differs.
155+
OutputConfig,
145156

146157
InputChanged {
147158
kind: InputChangeKind,
@@ -158,6 +169,9 @@ impl Display for FingerprintMismatch {
158169
Self::InputConfig => {
159170
write!(f, "input configuration changed")
160171
}
172+
Self::OutputConfig => {
173+
write!(f, "output configuration changed")
174+
}
161175
Self::InputChanged { kind, path } => {
162176
write!(f, "{}", display::format_input_change_str(*kind, path.as_str()))
163177
}
@@ -201,16 +215,16 @@ impl ExecutionCache {
201215
"CREATE TABLE task_fingerprints (key BLOB PRIMARY KEY, value BLOB);",
202216
(),
203217
)?;
204-
conn.execute("PRAGMA user_version = 11", ())?;
218+
conn.execute("PRAGMA user_version = 12", ())?;
205219
}
206-
1..=10 => {
220+
1..=11 => {
207221
// old internal db version. reset
208222
conn.set_db_config(DbConfig::SQLITE_DBCONFIG_RESET_DATABASE, true)?;
209223
conn.execute("VACUUM", ())?;
210224
conn.set_db_config(DbConfig::SQLITE_DBCONFIG_RESET_DATABASE, false)?;
211225
}
212-
11 => break, // current version
213-
12.. => {
226+
12 => break, // current version
227+
13.. => {
214228
return Err(anyhow::anyhow!(
215229
"Unrecognized database version: {user_version}. \
216230
The cache may have been created by a newer version of Vite Task. \
@@ -270,11 +284,20 @@ impl ExecutionCache {
270284
self.get_cache_key_by_execution_key(execution_cache_key).await?
271285
{
272286
// Destructure to ensure we handle all fields when new ones are added
273-
let CacheEntryKey { spawn_fingerprint: old_spawn_fingerprint, input_config: _ } =
274-
old_cache_key;
287+
let CacheEntryKey {
288+
spawn_fingerprint: old_spawn_fingerprint,
289+
input_config: old_input_config,
290+
output_config: old_output_config,
291+
} = old_cache_key;
275292
let mismatch = if old_spawn_fingerprint == *spawn_fingerprint {
276-
// spawn fingerprint is the same but input_config or glob_base changed
277-
FingerprintMismatch::InputConfig
293+
// spawn fingerprint is the same but input_config or output_config changed
294+
if old_input_config != cache_metadata.input_config {
295+
FingerprintMismatch::InputConfig
296+
} else if old_output_config != cache_metadata.output_config {
297+
FingerprintMismatch::OutputConfig
298+
} else {
299+
FingerprintMismatch::InputConfig
300+
}
278301
} else {
279302
FingerprintMismatch::SpawnFingerprint {
280303
old: old_spawn_fingerprint,
@@ -288,16 +311,33 @@ impl ExecutionCache {
288311
}
289312

290313
/// Update cache after successful execution.
314+
///
315+
/// If a previous entry exists for the same cache key with a different
316+
/// `output_archive`, the stale archive file in `cache_dir` is removed
317+
/// (best-effort) so it doesn't accumulate on disk.
291318
#[tracing::instrument(level = "debug", skip_all)]
292319
pub async fn update(
293320
&self,
294321
cache_metadata: &CacheMetadata,
295322
cache_value: CacheEntryValue,
323+
cache_dir: &AbsolutePath,
296324
) -> anyhow::Result<()> {
297325
let execution_cache_key = &cache_metadata.execution_cache_key;
298326

299327
let cache_key = CacheEntryKey::from_metadata(cache_metadata);
300328

329+
// If a previous entry exists with a stale output archive, delete the
330+
// old file so the cache directory doesn't accumulate orphaned archives.
331+
if let Some(old_value) = self.get_by_cache_key(&cache_key).await?
332+
&& let Some(old_archive) = old_value.output_archive
333+
&& cache_value.output_archive.as_ref() != Some(&old_archive)
334+
{
335+
let old_archive_path = cache_dir.join(old_archive.as_str());
336+
// Best-effort cleanup: a missing file (e.g. after a crash or manual
337+
// cache clear) is fine, so we ignore the error.
338+
let _ = std::fs::remove_file(old_archive_path.as_path());
339+
}
340+
301341
self.upsert_cache_entry(&cache_key, &cache_value).await?;
302342
self.upsert_task_fingerprint(execution_cache_key, &cache_key).await?;
303343
Ok(())

crates/vite_task/src/session/execute/glob_inputs.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,58 @@ pub fn compute_globbed_inputs(
109109
Ok(result)
110110
}
111111

112+
/// Collect file paths matching positive globs, filtered by negative globs.
113+
///
114+
/// Like [`compute_globbed_inputs`] but only collects paths (no hashing).
115+
/// Used for determining which output files to archive.
116+
pub fn collect_glob_paths(
117+
workspace_root: &AbsolutePath,
118+
positive_globs: &std::collections::BTreeSet<Str>,
119+
negative_globs: &std::collections::BTreeSet<Str>,
120+
) -> anyhow::Result<Vec<RelativePathBuf>> {
121+
if positive_globs.is_empty() {
122+
return Ok(Vec::new());
123+
}
124+
125+
let negatives: Vec<Glob<'static>> = negative_globs
126+
.iter()
127+
.map(|p| Ok(Glob::new(p.as_str())?.into_owned()))
128+
.collect::<anyhow::Result<_>>()?;
129+
let negation = wax::any(negatives)?;
130+
131+
let mut result = Vec::new();
132+
133+
for pattern in positive_globs {
134+
let glob = Glob::new(pattern.as_str())?.into_owned();
135+
let walk = glob.walk(workspace_root.as_path());
136+
for entry in walk.not(negation.clone())? {
137+
let entry = match entry {
138+
Ok(entry) => entry,
139+
Err(err) => {
140+
let io_err: io::Error = err.into();
141+
if io_err.kind() == io::ErrorKind::NotFound {
142+
continue;
143+
}
144+
return Err(io_err.into());
145+
}
146+
};
147+
if !entry.file_type().is_file() {
148+
continue;
149+
}
150+
let path = entry.path();
151+
let Some(stripped) = path.strip_prefix(workspace_root.as_path()).ok() else {
152+
continue;
153+
};
154+
let relative = RelativePathBuf::new(stripped)?;
155+
result.push(relative);
156+
}
157+
}
158+
159+
result.sort();
160+
result.dedup();
161+
Ok(result)
162+
}
163+
112164
#[expect(clippy::disallowed_types, reason = "receives std::path::Path from wax glob walker")]
113165
fn hash_file_content(path: &std::path::Path) -> io::Result<u64> {
114166
super::hash::hash_content(io::BufReader::new(File::open(path)?))

0 commit comments

Comments
 (0)