From 9fc060adbd4b4a81a9bb3bbe11e78014f0ef7590 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Tue, 14 Jan 2025 07:22:41 -0500 Subject: [PATCH 1/3] feat(ex): Add example showing how to join tasks together. --- docs/joining-tasks.md | 81 +++++++++++++++++++++++++++ docs/quick-start.md | 5 +- examples/joining-tasks/.clang-format | 17 ++++++ examples/joining-tasks/CMakeLists.txt | 27 +++++++++ examples/joining-tasks/src/client.cpp | 75 +++++++++++++++++++++++++ examples/joining-tasks/src/tasks.cpp | 23 ++++++++ examples/joining-tasks/src/tasks.hpp | 28 +++++++++ 7 files changed, 254 insertions(+), 2 deletions(-) create mode 100644 docs/joining-tasks.md create mode 100644 examples/joining-tasks/.clang-format create mode 100644 examples/joining-tasks/CMakeLists.txt create mode 100644 examples/joining-tasks/src/client.cpp create mode 100644 examples/joining-tasks/src/tasks.cpp create mode 100644 examples/joining-tasks/src/tasks.hpp diff --git a/docs/joining-tasks.md b/docs/joining-tasks.md new file mode 100644 index 00000000..8ff0d83a --- /dev/null +++ b/docs/joining-tasks.md @@ -0,0 +1,81 @@ +# Joining tasks + +Spider provides two methods for joining tasks together: + +1. Binding the output(s) of one task to the input(s) of another. +2. Starting a task from within another task. + +(1) is useful when you simply want to assemble a directed acyclic graph (DAG) of tasks. (2) is +useful when you want a task to act like a client itself, running, monitoring, and collecting the +output of Spider jobs. + +## Binding tasks together + +The `spider::Driver::bind` function allows you to bind two or more tasks together. For example, we +can compute the hypotenuse of a right-angle triangle using the DAG of tasks shown in Figure 1 below. + +```mermaid +flowchart TD + square1["square(value) -> int"] + square2["square(value) -> int"] + square_root["square_root(value) -> int"] + sum["sum(x, y) -> int"] + a((a)) + b((b)) + asquared(("a2")) + bsquared(("b2")) + asquaredplusbsquared(("a2 + b2")) + h((hypotenuse)) + + a --> square1 --> asquared --> sum + b --> square2 --> bsquared --> sum + sum --> asquaredplusbsquared --> square_root --> h +``` + +*Figure 1: A DAG of tasks to calculate the hypotenuse of a right-angle triangle. Square blocks +represent tasks and circular blocks represent values.* + +This DAG of tasks is implemented in `examples/joining-tasks/src/`. + +> [!NOTE] +> To build and run the example, you can follow the steps from the quick-start guide, but from inside +> the `examples/joining-tasks` directory. + +`spider::Driver::bind` takes two or more parameters as input: + +- The first parameter is the *target* task or `TaskGraph` that will take (as inputs) the outputs of + any tasks or `TaskGraph`s bound to it. For brevity, we’ll collectively refer to tasks and + `TaskGraph`s as *runnables*. +- Each subsequent parameter is a either a *source* runnable, or a value that conforms to the + `Serializable` or `Data` interfaces. +- If the parameter is a runnable, its outputs will be passed to the inputs of the target runnable + (which is why we call it a source runnable). + +In the example: + +- We first use `bind` to bind the outputs of two `square` tasks to the inputs of the `sum` tasks. + This invocation returns a `TaskGraph` that we store in `sum_of_squares_task_graph`. +- Next, we use `bind` again to bind the output of `sum_of_squares_task_graph` to the input of the + `square_root` task, storing the result in `hypotenuse_task_graph`. `hypotenuse_task_graph` + represents the DAG in Figure 1. +- Finally, we submit `hypotenuse_task_graph` for execution with the inputs `4` & `5`. + +### Ordering of bound inputs + +Notice that the values we pass to `spider::Driver::bind` and `spider::Driver::start` are distributed +to the inputs of the target runnable, from left-to-right. In the example, the output of the first +`square` task is passed to the first input of `sum`, and likewise for the second `square` task and +input. Similarly, in `spider::Driver::start`, `4` is passed to the left `square` task and `5` is +passed to the right `square` task. + +> [!NOTE] +> Unlike `std::bind`, `spider::Driver::bind` doesn’t support placeholder inputs. + +> +> + +## Nesting tasks + +To run a task from within another task, you can use the task’s`TaskContext` parameter (the first +parameter) similar to how we use `spider::Driver`. Specifically, both have `bind` and `start` +methods with equivalent parameters and return values. diff --git a/docs/quick-start.md b/docs/quick-start.md index 96cc07cc..e0d63b1c 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -202,8 +202,9 @@ storage backend URL in the command. # Next steps -In future guides, we'll explain how to write more complex tasks, as well as how to leverage Spider's -support for fault tolerance. +The following guides describe how to leverage Spider to implement more advanced functionality: + +* [Joining tasks](./joining-tasks.md) [Docker]: https://docs.docker.com/engine/install/ [docker-non-root]: https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user diff --git a/examples/joining-tasks/.clang-format b/examples/joining-tasks/.clang-format new file mode 100644 index 00000000..0cde5f4f --- /dev/null +++ b/examples/joining-tasks/.clang-format @@ -0,0 +1,17 @@ +BasedOnStyle: "InheritParentConfig" + +IncludeCategories: + # NOTE: A header is grouped by first matching regex + # Project headers + - Regex: "^\"" + Priority: 4 + # Library headers. Update when adding new libraries. + # NOTE: clang-format retains leading white-space on a line in violation of the YAML spec. + - Regex: "^<(spider)" + Priority: 3 + # C system headers + - Regex: "^<.+\\.h>" + Priority: 1 + # C++ standard libraries + - Regex: "^<.+>" + Priority: 2 diff --git a/examples/joining-tasks/CMakeLists.txt b/examples/joining-tasks/CMakeLists.txt new file mode 100644 index 00000000..9d6a5113 --- /dev/null +++ b/examples/joining-tasks/CMakeLists.txt @@ -0,0 +1,27 @@ +cmake_minimum_required(VERSION 3.22.1) +project(spider_joining_tasks) + +# Add the Spider library +add_subdirectory(../../ spider EXCLUDE_FROM_ALL) + +# Add the tasks library +add_library( + tasks + SHARED + src/tasks.cpp + src/tasks.hpp +) + +# Link the Spider library to the tasks library +target_link_libraries(tasks PRIVATE spider::spider) + +# Add the client +add_executable(client src/client.cpp) + +# Link the Spider and tasks library to the client +target_link_libraries( + client + PRIVATE + spider::spider + tasks +) diff --git a/examples/joining-tasks/src/client.cpp b/examples/joining-tasks/src/client.cpp new file mode 100644 index 00000000..4d931eb0 --- /dev/null +++ b/examples/joining-tasks/src/client.cpp @@ -0,0 +1,75 @@ +#include +#include +#include +#include + +#include + +#include "tasks.hpp" + +namespace { +/** + * @tparam JobOutputType + * @param job + * @param expected + * @return Whether the job was successful. + */ +template +auto validate_job_output(spider::Job& job, JobOutputType const& expected) -> bool { + switch (auto job_status = job.get_status()) { + case spider::JobStatus::Succeeded: { + auto result = job.get_result(); + if (expected == result) { + return true; + } + std::cerr << "job returned unexpected result. Expected: " << expected + << ". Actual: " << result << '\n'; + return false; + } + case spider::JobStatus::Failed: { + std::pair const error_and_fn_name = job.get_error(); + std::cerr << "Job failed in function " << error_and_fn_name.second << " - " + << error_and_fn_name.first << '\n'; + return false; + } + default: + std::cerr << "Job is in unexpected state - " + << static_cast>(job_status) + << '\n'; + return false; + } +} +} + +auto main(int argc, char const* argv[]) -> int { + // Parse the storage backend URL from the command line arguments + if (argc < 2) { + std::cerr << "Usage: ./client " << '\n'; + return 1; + } + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + std::string const storage_url{argv[1]}; + if (storage_url.empty()) { + std::cerr << "storage-backend-url cannot be empty." << '\n'; + return 1; + } + + // Create a driver that connects to the Spider cluster + spider::Driver driver{storage_url}; + + auto sum_of_squares_task_graph = driver.bind(&sum, &square, &square); + auto hypotenuse_task_graph = driver.bind(&square_root, &sum_of_squares_task_graph); + + // Submit the task graph for execution + constexpr int a = 4; + constexpr int b = 5; + auto job = driver.start(hypotenuse_task_graph, a, b); + + job.wait_complete(); + + if (false == validate_job_output(job, std::sqrt(a * a + b * b))) { + return 1; + } + + return 0; +} diff --git a/examples/joining-tasks/src/tasks.cpp b/examples/joining-tasks/src/tasks.cpp new file mode 100644 index 00000000..f019cba0 --- /dev/null +++ b/examples/joining-tasks/src/tasks.cpp @@ -0,0 +1,23 @@ +#include "tasks.hpp" + +#include + +#include + +auto square(spider::TaskContext&, int value) -> int { + return value * value; +} + +auto square_root(spider::TaskContext&, int value) -> double { + return std::sqrt(value); +} + +auto sum(spider::TaskContext&, int x, int y) -> int { + return x + y; +} + +// Register the tasks +// NOLINTNEXTLINE(cert-err58-cpp) +SPIDER_REGISTER_TASK(square); +SPIDER_REGISTER_TASK(square_root); +SPIDER_REGISTER_TASK(sum); diff --git a/examples/joining-tasks/src/tasks.hpp b/examples/joining-tasks/src/tasks.hpp new file mode 100644 index 00000000..8881fdba --- /dev/null +++ b/examples/joining-tasks/src/tasks.hpp @@ -0,0 +1,28 @@ +#ifndef TASKS_HPP +#define TASKS_HPP + +#include + +/** + * @param context + * @param value + * @return The square of the given value. + */ +auto square(spider::TaskContext& context, int value) -> int; + +/** + * @param context + * @param value + * @return The square root of the given value. + */ +auto square_root(spider::TaskContext& context, int value) -> double; + +/** + * @param context + * @param x + * @param y + * @return The sum of x and y. + */ +auto sum(spider::TaskContext& context, int x, int y) -> int; + +#endif // TASKS_HPP From 54797d9c44f7c5a685b859926fe09a863927379c Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Tue, 14 Jan 2025 07:37:34 -0500 Subject: [PATCH 2/3] clang-format. --- examples/joining-tasks/src/client.cpp | 2 +- examples/joining-tasks/src/tasks.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/joining-tasks/src/client.cpp b/examples/joining-tasks/src/client.cpp index 4d931eb0..59f65261 100644 --- a/examples/joining-tasks/src/client.cpp +++ b/examples/joining-tasks/src/client.cpp @@ -39,7 +39,7 @@ auto validate_job_output(spider::Job& job, JobOutputType const& e return false; } } -} +} // namespace auto main(int argc, char const* argv[]) -> int { // Parse the storage backend URL from the command line arguments diff --git a/examples/joining-tasks/src/tasks.cpp b/examples/joining-tasks/src/tasks.cpp index f019cba0..7fc91d64 100644 --- a/examples/joining-tasks/src/tasks.cpp +++ b/examples/joining-tasks/src/tasks.cpp @@ -5,11 +5,11 @@ #include auto square(spider::TaskContext&, int value) -> int { - return value * value; + return value * value; } auto square_root(spider::TaskContext&, int value) -> double { - return std::sqrt(value); + return std::sqrt(value); } auto sum(spider::TaskContext&, int x, int y) -> int { From e587188d424eb539dcfe9f661c80cc85dc25419e Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Tue, 14 Jan 2025 09:03:27 -0500 Subject: [PATCH 3/3] clang-tidy. --- examples/joining-tasks/src/client.cpp | 7 ++++--- examples/joining-tasks/src/tasks.cpp | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/examples/joining-tasks/src/client.cpp b/examples/joining-tasks/src/client.cpp index 59f65261..26432f7e 100644 --- a/examples/joining-tasks/src/client.cpp +++ b/examples/joining-tasks/src/client.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -61,13 +62,13 @@ auto main(int argc, char const* argv[]) -> int { auto hypotenuse_task_graph = driver.bind(&square_root, &sum_of_squares_task_graph); // Submit the task graph for execution - constexpr int a = 4; - constexpr int b = 5; + int const a = 4; + int const b = 5; auto job = driver.start(hypotenuse_task_graph, a, b); job.wait_complete(); - if (false == validate_job_output(job, std::sqrt(a * a + b * b))) { + if (false == validate_job_output(job, std::sqrt((a * a) + (b * b)))) { return 1; } diff --git a/examples/joining-tasks/src/tasks.cpp b/examples/joining-tasks/src/tasks.cpp index 7fc91d64..9180b49c 100644 --- a/examples/joining-tasks/src/tasks.cpp +++ b/examples/joining-tasks/src/tasks.cpp @@ -17,7 +17,9 @@ auto sum(spider::TaskContext&, int x, int y) -> int { } // Register the tasks -// NOLINTNEXTLINE(cert-err58-cpp) +// NOLINTBEGIN(cert-err58-cpp) SPIDER_REGISTER_TASK(square); SPIDER_REGISTER_TASK(square_root); SPIDER_REGISTER_TASK(sum); + +// NOLINTEND(cert-err58-cpp)