Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions c/experimental/stf/include/cccl/c/experimental/stf/stf.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,10 @@ typedef struct stf_dim4
} stf_dim4;

//! \brief Partition (mapper) function: data coordinates -> grid position.
//! Can be implemented in C or provided from Python via ctypes/cffi.
typedef stf_pos4 (*stf_get_executor_fn)(stf_pos4 data_coords, stf_dim4 data_dims, stf_dim4 grid_dims);
//! Writes the result into \p *result. The out-pointer convention is used
//! instead of return-by-value so that the signature is trivially representable
//! in FFI frameworks (ctypes, cffi, Rust) that cannot return C structs.
typedef void (*stf_get_executor_fn)(stf_pos4* result, stf_pos4 data_coords, stf_dim4 data_dims, stf_dim4 grid_dims);

//! \brief Create host execution place (CPU).
stf_exec_place_handle stf_exec_place_host(void);
Expand Down
7 changes: 4 additions & 3 deletions c/experimental/stf/src/stf.cu
Original file line number Diff line number Diff line change
Expand Up @@ -358,9 +358,10 @@ stf_data_place_handle stf_data_place_composite(stf_exec_place_handle grid, stf_g
_CCCL_ASSERT(grid != nullptr, "exec place grid handle must not be null");
_CCCL_ASSERT(mapper != nullptr, "partitioner function (mapper) must not be null");
auto* grid_ptr = from_opaque(grid);
// Distinct function pointer types (C typedef vs C++ alias); not convertible via static_cast under nvcc.
partition_fn_t cpp_mapper = reinterpret_cast<partition_fn_t>(mapper);
auto* dp = stf_try_allocate([cpp_mapper, grid_ptr] {
// Distinct function pointer types (C typedef vs C++ alias) are not
// convertible via static_cast under nvcc.
const partition_fn_t cpp_mapper = reinterpret_cast<partition_fn_t>(mapper);
auto* dp = stf_try_allocate([cpp_mapper, grid_ptr] {
return new data_place(data_place::composite(cpp_mapper, *grid_ptr));
});
return to_opaque(dp);
Expand Down
12 changes: 5 additions & 7 deletions c/experimental/stf/test/test_places.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

// Blocked partition along first dimension: maps data coordinates to grid position.
// Used to exercise composite data place with a grid of execution places.
static stf_pos4 blocked_mapper_1d(stf_pos4 data_coords, stf_dim4 data_dims, stf_dim4 grid_dims)
static void blocked_mapper_1d(stf_pos4* result, stf_pos4 data_coords, stf_dim4 data_dims, stf_dim4 grid_dims)
{
uint64_t extent = data_dims.x;
uint64_t nplaces = grid_dims.x;
Expand All @@ -34,12 +34,10 @@ static stf_pos4 blocked_mapper_1d(stf_pos4 data_coords, stf_dim4 data_dims, stf_
{
place_x = static_cast<int64_t>(nplaces) - 1;
}
stf_pos4 result = {};
result.x = place_x;
result.y = 0;
result.z = 0;
result.t = 0;
return result;
result->x = place_x;
result->y = 0;
result->z = 0;
result->t = 0;
}

C2H_TEST("empty stf tasks", "[task]")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ using ::cuda::experimental::stf::pos4;
// Forward declarations
class exec_place;

//! Function type for computing executor placement from data coordinates
using partition_fn_t = pos4 (*)(pos4, dim4, dim4);
//! Function type for computing executor placement from data coordinates.
//! Uses an out-pointer convention so the signature is trivially representable
//! in FFI frameworks (ctypes, cffi, Rust) that cannot return C structs.
using partition_fn_t = void (*)(pos4* result, pos4 data_coords, dim4 data_dims, dim4 grid_dims);

/**
* @brief Abstract interface for data_place implementations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,9 @@ private:
template <typename F>
pos4 index_to_grid_pos(size_t linearized_index, F&& delinearize)
{
pos4 coords = delinearize(linearized_index);
pos4 eplace_coords = mapper(coords, data_dims, grid.get_dims());
const pos4 coords = delinearize(linearized_index);
pos4 eplace_coords(0);
mapper(&eplace_coords, coords, data_dims, grid.get_dims());
return eplace_coords;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public:
return box<dimensions>(bounds);
}

_CCCL_HOST_DEVICE static pos4 get_executor(pos4 data_coords, dim4 data_dims, dim4 grid_dims)
_CCCL_HOST_DEVICE static void get_executor(pos4* result, pos4 data_coords, dim4 data_dims, dim4 grid_dims)
{
// Find the largest dimension
size_t rank = data_dims.get_rank();
Expand All @@ -120,7 +120,7 @@ public:
// Get the coordinate in the selected dimension
size_t c = data_coords.get(target_dim);

return pos4(c / part_size);
*result = pos4(c / part_size);
}
};

Expand Down Expand Up @@ -150,9 +150,10 @@ UNITTEST("blocked partition with very large data arrays")
pos4 middle_coord(200, 150, 100, 500);
pos4 last_coord(399, 299, 199, 999);

pos4 first_pos = blocked_partition::get_executor(first_coord, massive_4d_dims, grid_dims);
pos4 middle_pos = blocked_partition::get_executor(middle_coord, massive_4d_dims, grid_dims);
pos4 last_pos = blocked_partition::get_executor(last_coord, massive_4d_dims, grid_dims);
pos4 first_pos, middle_pos, last_pos;
blocked_partition::get_executor(&first_pos, first_coord, massive_4d_dims, grid_dims);
blocked_partition::get_executor(&middle_pos, middle_coord, massive_4d_dims, grid_dims);
blocked_partition::get_executor(&last_pos, last_coord, massive_4d_dims, grid_dims);

// part_size = ceil(1000/4) = 250
// t=0 -> block 0, t=500 -> block 2, t=999 -> block 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,10 +247,10 @@ public:
return cyclic_shape<dimensions>(bounds);
}

_CCCL_HOST_DEVICE static pos4 get_executor(pos4 /*unused*/, dim4 /*unused*/, dim4 /*unused*/)
_CCCL_HOST_DEVICE static void get_executor(pos4* result, pos4 /*unused*/, dim4 /*unused*/, dim4 /*unused*/)
{
abort();
return pos4(0);
*result = pos4(0);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,10 @@ public:
return reserved::tiled_mdspan_shape<tile_size, mdspan_shape_t>(in, place_position.x, grid_dims.x);
}

_CCCL_HOST_DEVICE static pos4 get_executor(pos4 data_coords, dim4 /*unused*/, dim4 grid_dims)
_CCCL_HOST_DEVICE static void get_executor(pos4* result, pos4 data_coords, dim4 /*unused*/, dim4 grid_dims)
{
assert(grid_dims.x > 0);
return pos4((data_coords.x / tile_size) % grid_dims.x);
*result = pos4((data_coords.x / tile_size) % grid_dims.x);
}
};

Expand Down Expand Up @@ -178,7 +178,8 @@ UNITTEST("tiled partition with large 1D data")

constexpr size_t tile_size = 1000;

pos4 tile_pos = tiled_partition<tile_size>::get_executor(large_coords, data_dims, grid_dims);
pos4 tile_pos;
tiled_partition<tile_size>::get_executor(&tile_pos, large_coords, data_dims, grid_dims);

EXPECT(tile_pos.x == (test_coord / tile_size) % grid_dims.x);
};
Expand Down
6 changes: 3 additions & 3 deletions docs/cudax/places.rst
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ over the different places of a grid.
template <typename S_out, typename S_in>
static const S_out apply(const S_in& in, pos4 position, dim4 grid_dims);

pos4 get_executor(pos4 data_coords, dim4 data_dims, dim4 grid_dims);
void get_executor(pos4* result, pos4 data_coords, dim4 data_dims, dim4 grid_dims);
};

A partitioning class must implement an ``apply`` method which takes:
Expand All @@ -504,8 +504,8 @@ method which allows localized data allocators. This
method indicates, for each entry of a shape, on which place this entry
should *preferably* be allocated.

``get_executor`` returns a ``pos4`` coordinate in the execution place
grid, and its arguments are:
``get_executor`` writes a ``pos4`` coordinate in the execution place
grid into ``*result``, and its input arguments are:

- a coordinate within the shape described as a ``pos4`` object
- the dimension of the shape expressed as a ``dim4`` object
Expand Down