Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions src/runtime/database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2268,6 +2268,92 @@ void Database::start_job(long job, int64_t starttime) {
end_txn();
}

std::vector<FileReflection> Database::get_job_outputs(MatchingQueryFilters filters,
bool leaves) const {
// Build the job ID subquery using the same filter mechanism as matching()
auto id_query = build_matching_id_query(std::move(filters));

std::string query;
if (leaves) {
// Per-run leaves: outputs from selected jobs that aren't consumed within their respective runs.
// Always excludes hidden jobs from input consideration.
query = R"(
WITH filtered_jobs AS (
)" + id_query +
R"(
),
hidden_jobs AS (
SELECT job_id FROM tags
WHERE uri = 'inspect.visibility' AND content = 'hidden'
),
job_runs AS (
SELECT DISTINCT rj.run_id
FROM run_jobs rj
WHERE rj.job_id IN (SELECT job_id FROM filtered_jobs)
),
run_inputs AS (
SELECT DISTINCT f.path, rj.run_id
FROM filetree t
JOIN files f ON f.file_id = t.file_id
JOIN run_jobs rj ON rj.job_id = t.job_id
LEFT JOIN hidden_jobs hj ON hj.job_id = t.job_id
WHERE rj.run_id IN (SELECT run_id FROM job_runs)
AND t.access IN (0, 1)
AND hj.job_id IS NULL
)
SELECT f.path, f.hash, f.type, f.mode, MAX(t.modified) as modified
FROM filetree t
JOIN files f ON f.file_id = t.file_id
JOIN run_jobs rj ON rj.job_id = t.job_id
LEFT JOIN run_inputs ri ON ri.path = f.path AND ri.run_id = rj.run_id
WHERE t.job_id IN (SELECT job_id FROM filtered_jobs)
AND t.access = 2
AND ri.path IS NULL
GROUP BY f.path
ORDER BY f.path
)";
} else {
// All outputs from selected jobs
query = R"(
WITH filtered_jobs AS (
)" + id_query +
R"(
)
SELECT f.path, f.hash, f.type, f.mode, MAX(t.modified) as modified
FROM filetree t
JOIN files f ON f.file_id = t.file_id
WHERE t.job_id IN (SELECT job_id FROM filtered_jobs)
AND t.access = 2
GROUP BY f.path
ORDER BY f.path
)";
}

sqlite3_stmt *stmt;
if (sqlite3_prepare_v2(imp->db, query.c_str(), -1, &stmt, 0) != SQLITE_OK) {
std::string err =
std::string("sqlite3_prepare_v2 (get_job_outputs): ") + sqlite3_errmsg(imp->db);
std::cerr << err << std::endl;
return {};
}

std::vector<FileReflection> out;
begin_ro_txn();
while (sqlite3_step(stmt) == SQLITE_ROW) {
std::string path = rip_column(stmt, 0);
std::string hash = rip_column(stmt, 1);
std::string type = rip_column(stmt, 2);
long mode = sqlite3_column_int64(stmt, 3);
long modified = sqlite3_column_int64(stmt, 4);
out.emplace_back(std::move(path), std::move(type), std::move(hash), mode, modified);
}
finish_stmt("Could not query job outputs", stmt, imp->debugdb);
sqlite3_finalize(stmt);
end_txn();

return out;
}

std::vector<std::pair<std::string, int>> Database::get_interleaved_output(long job_id) const {
begin_ro_txn();
auto out = get_interleaved_output_impl(this, job_id);
Expand Down
7 changes: 7 additions & 0 deletions src/runtime/database.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,13 @@ struct Database {
// Additional filtering needed to determine if runs are "actually" live.
std::vector<OpenRunJobReflection> matching_open_runs(MatchingQueryFilters filters);

// Returns all output files (access=2) for jobs matching the provided filters.
// If leaves=true, only returns outputs that are not consumed as inputs within each job's run.
// Leaves computation always excludes hidden jobs from input consideration.
// Returns outputs ordered by path, with duplicates removed (last modified wins).
std::vector<FileReflection> get_job_outputs(MatchingQueryFilters filters,
bool leaves = false) const;

std::vector<FileDependency> get_file_dependencies() const;

std::vector<std::pair<std::string, int>> get_interleaved_output(long job_id) const;
Expand Down
1 change: 1 addition & 0 deletions tests/inspection/checkout-to-leaves/.wakeroot
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
27 changes: 27 additions & 0 deletions tests/inspection/checkout-to-leaves/pass.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/sh
# Test --checkout-to with --leaves: with --leaves, only outputs not consumed by
# another job in the same run are materialized.

set -eu

WAKE="${1:+$1/wake}"
WAKE="${WAKE:-wake}"

cleanup() {
rm -rf wake.db* wake.log .wake .build all leaves intermediate.txt final.txt
}
trap cleanup EXIT
cleanup

# 2-job pipeline: job-a -> intermediate.txt -> job-b -> final.txt.
"${WAKE}" -x 'build Unit' >/dev/null

# Without --leaves: both intermediate.txt and final.txt are checked out.
echo "== --checkout-to (no --leaves) =="
"${WAKE}" --checkout-to all
(cd all && find . -mindepth 1 | sort)

# With --leaves: intermediate.txt is consumed by job-b so it's filtered out.
echo "== --checkout-to --leaves =="
"${WAKE}" --checkout-to leaves --leaves
(cd leaves && find . -mindepth 1 | sort)
21 changes: 21 additions & 0 deletions tests/inspection/checkout-to-leaves/test.wake
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package test_wake

from wake import _

# Two-job pipeline: jobA produces intermediate.txt, jobB consumes it and writes final.txt.
# With --leaves, only final.txt is a leaf output; intermediate.txt is consumed within the run.
def jobA _: Result Path Error =
makeExecPlan ("sh", "-c", "printf intermediate-data > intermediate.txt", Nil) Nil
| setPlanLabel "job-a"
| runJobWith defaultRunner
| getJobOutput

export def build _: Result Path Error =
require Pass intermediate = jobA Unit

makeExecPlan
("sh", "-c", "cat intermediate.txt > final.txt && printf X-final >> final.txt", Nil)
(intermediate, Nil)
| setPlanLabel "job-b"
| runJobWith defaultRunner
| getJobOutput
1 change: 1 addition & 0 deletions tests/inspection/checkout-to/.wakeroot
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
39 changes: 39 additions & 0 deletions tests/inspection/checkout-to/pass.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/sh
# Test --checkout-to: materializes job outputs into a destination directory.

set -eu

WAKE="${1:+$1/wake}"
WAKE="${WAKE:-wake}"

cleanup() {
rm -rf wake.db* wake.log .wake .build out out2 nonempty notadir hello.txt
}
trap cleanup EXIT
cleanup

"${WAKE}" -x 'build Unit' >/dev/null

# 1. Destination doesn't exist -- created and populated.
echo "== fresh =="
"${WAKE}" --checkout-to out
(cd out && find . -mindepth 1 | sort)
printf 'content: %s\n' "$(cat out/hello.txt)"

# 2. Empty pre-existing dir -- also accepted.
echo "== empty existing =="
mkdir out2
"${WAKE}" --checkout-to out2
(cd out2 && find . -mindepth 1 | sort)

# 3. Non-empty pre-existing dir -- refused, contents untouched.
echo "== non-empty existing =="
mkdir nonempty
touch nonempty/preexisting
"${WAKE}" --checkout-to nonempty || echo "exit=$?"
(cd nonempty && find . -mindepth 1 | sort)

# 4. Existing path that isn't a directory -- refused.
echo "== not-a-directory =="
touch notadir
"${WAKE}" --checkout-to notadir || echo "exit=$?"
10 changes: 10 additions & 0 deletions tests/inspection/checkout-to/test.wake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package test_wake

from wake import _

# Single-job build that produces "hello.txt" with known content.
export def build _: Result Path Error =
makeExecPlan ("sh", "-c", "printf hello-world > hello.txt", Nil) Nil
| setPlanLabel "build-hello"
| runJobWith defaultRunner
| getJobOutput
17 changes: 17 additions & 0 deletions tools/wake/cli_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ struct CommandLineOptions {
bool clean;
bool list_outputs;
bool include_hidden;
bool leaves;
const char *checkout_to;
std::optional<bool> log_header_align;
std::optional<bool> cache_miss_on_failure;

Expand Down Expand Up @@ -101,6 +103,7 @@ struct CommandLineOptions {
std::vector<std::vector<std::string>> output_files = {};
std::vector<std::vector<std::string>> labels = {};
std::vector<std::vector<std::string>> tags = {};
std::vector<std::vector<std::string>> run_ids = {};

int argc;
char **argv;
Expand All @@ -112,6 +115,7 @@ struct CommandLineOptions {
std::vector<char *> output_files_buffer(argc_in, nullptr);
std::vector<char *> labels_buffer(argc_in, nullptr);
std::vector<char *> tags_buffer(argc_in, nullptr);
std::vector<char *> run_ids_buffer(argc_in, nullptr);

// clang-format off
struct option options[] {
Expand All @@ -138,6 +142,7 @@ struct CommandLineOptions {
{'o', "output", GOPT_ARGUMENT_REQUIRED | GOPT_REPEATABLE_VALUE, output_files_buffer.data(), (unsigned int)argc_in},
{0, "label", GOPT_ARGUMENT_REQUIRED | GOPT_REPEATABLE_VALUE, labels_buffer.data(), (unsigned int)argc_in},
{0, "tag", GOPT_ARGUMENT_REQUIRED | GOPT_REPEATABLE_VALUE, tags_buffer.data(), (unsigned int)argc_in},
{0, "run", GOPT_ARGUMENT_REQUIRED | GOPT_REPEATABLE_VALUE, run_ids_buffer.data(), (unsigned int)argc_in},
{'l', "last", GOPT_ARGUMENT_FORBIDDEN},
{0, "last-used", GOPT_ARGUMENT_FORBIDDEN},
{0, "last-executed", GOPT_ARGUMENT_FORBIDDEN},
Expand Down Expand Up @@ -188,6 +193,8 @@ struct CommandLineOptions {
{0, "user-config", GOPT_ARGUMENT_REQUIRED},
{':', "shebang", GOPT_ARGUMENT_REQUIRED},
{0, "include-hidden", GOPT_ARGUMENT_FORBIDDEN},
{0, "leaves", GOPT_ARGUMENT_FORBIDDEN},
{0, "checkout-to", GOPT_ARGUMENT_REQUIRED},
{0, 0, GOPT_LAST}
};
// clang-format on
Expand Down Expand Up @@ -236,6 +243,9 @@ struct CommandLineOptions {
clean = arg(options, "clean")->count;
list_outputs = arg(options, "list-outputs")->count;
include_hidden = arg(options, "include-hidden")->count;
leaves = arg(options, "leaves")->count;

checkout_to = arg(options, "checkout-to")->argument;

percent_str = arg(options, "percent")->argument;
jobs_str = arg(options, "jobs")->argument;
Expand Down Expand Up @@ -313,6 +323,13 @@ struct CommandLineOptions {
tags.emplace_back(std::move(parts));
}

for (unsigned int i = 0; i < arg(options, "run")->count; i++) {
std::string line(run_ids_buffer[i]);
std::vector<std::string> parts = wcl::split_by_fn(
',', line.begin(), line.end(), [](auto a, auto b) { return std::string(a, b); });
run_ids.emplace_back(std::move(parts));
}

if (!percent_str) {
percent_str = getenv("WAKE_PERCENT");
}
Expand Down
Loading
Loading