Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 52 additions & 7 deletions src/runtime/futex.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#include "syscall/abi.h"
#include "syscall/proc.h"

#include "debug/log.h"

/* Interrupt flag: when set, futex_wait returns -EINTR. Used to simulate SIGCHLD
* delivery when all CLONE_THREAD workers exit: wakes the main thread from
* blocking futex_wait without triggering a full exit_group.
Expand Down Expand Up @@ -126,6 +128,11 @@ static inline unsigned futex_hash(uint64_t uaddr)
return (unsigned) ((uaddr >> 2) ^ (uaddr >> 14)) % FUTEX_BUCKETS;
}

static inline bool futex_uaddr_is_aligned(uint64_t uaddr)
{
return (uaddr & 0x3) == 0;
}

/* Unlink a waiter from its bucket's singly-linked list. Caller must hold
* b->lock. Silently returns if the waiter is not in the list (already
* unlinked by a wake/requeue).
Expand Down Expand Up @@ -226,6 +233,8 @@ static int64_t futex_wait(guest_t *g,
{
if (bitset == 0)
return -LINUX_EINVAL;
if (!futex_uaddr_is_aligned(uaddr))
return -LINUX_EINVAL;

unsigned idx = futex_hash(uaddr);
futex_bucket_t *b = &buckets[idx];
Expand Down Expand Up @@ -375,6 +384,8 @@ static int64_t futex_wake(uint64_t uaddr, uint32_t val, uint32_t bitset)
{
if (bitset == 0)
return -LINUX_EINVAL;
if (!futex_uaddr_is_aligned(uaddr))
return -LINUX_EINVAL;

unsigned idx = futex_hash(uaddr);
futex_bucket_t *b = &buckets[idx];
Expand Down Expand Up @@ -422,6 +433,9 @@ static int64_t futex_requeue(guest_t *g,
int do_cmp,
uint32_t expected)
{
if (!futex_uaddr_is_aligned(uaddr) || !futex_uaddr_is_aligned(uaddr2))
return -LINUX_EINVAL;

unsigned idx_src = futex_hash(uaddr);
unsigned idx_dst = futex_hash(uaddr2);
futex_bucket_t *b_src = &buckets[idx_src];
Expand Down Expand Up @@ -521,6 +535,9 @@ static int64_t futex_wake_op(guest_t *g,
uint32_t val2,
uint32_t val3)
{
if (!futex_uaddr_is_aligned(uaddr) || !futex_uaddr_is_aligned(uaddr2))
return -LINUX_EINVAL;

unsigned idx1 = futex_hash(uaddr);
unsigned idx2 = futex_hash(uaddr2);
futex_bucket_t *b1 = &buckets[idx1];
Expand Down Expand Up @@ -699,6 +716,9 @@ static int64_t futex_wake_op(guest_t *g,
*/
static int64_t futex_lock_pi(guest_t *g, uint64_t uaddr, uint64_t timeout_gva)
{
if (!futex_uaddr_is_aligned(uaddr))
return -LINUX_EINVAL;

uint32_t *word = (uint32_t *) guest_ptr_w(g, uaddr);
if (!word)
return -LINUX_EFAULT;
Expand Down Expand Up @@ -891,6 +911,9 @@ static int64_t futex_lock_pi(guest_t *g, uint64_t uaddr, uint64_t timeout_gva)
*/
static int64_t futex_trylock_pi(guest_t *g, uint64_t uaddr)
{
if (!futex_uaddr_is_aligned(uaddr))
return -LINUX_EINVAL;

uint32_t *word = (uint32_t *) guest_ptr_w(g, uaddr);
if (!word)
return -LINUX_EFAULT;
Expand All @@ -916,6 +939,9 @@ static int64_t futex_trylock_pi(guest_t *g, uint64_t uaddr)
*/
static int64_t futex_unlock_pi(guest_t *g, uint64_t uaddr)
{
if (!futex_uaddr_is_aligned(uaddr))
return -LINUX_EINVAL;

uint32_t *word = (uint32_t *) guest_ptr_w(g, uaddr);
if (!word)
return -LINUX_EFAULT;
Expand Down Expand Up @@ -1210,7 +1236,7 @@ int64_t sys_futex_waitv(guest_t *g,
* FUTEX2_SIZE_U32 that is 4-byte alignment; an unaligned futex word
* loses atomicity on aarch64 and matches no kernel-side behavior.
*/
if (elts[i].uaddr & 0x3)
if (!futex_uaddr_is_aligned(elts[i].uaddr))
return -LINUX_EINVAL;
}

Expand Down Expand Up @@ -1402,8 +1428,12 @@ void robust_list_walk(guest_t *g, thread_entry_t *t)
futex_gva = list_ptr + (uint64_t) futex_offset;
else
futex_gva = list_ptr - (uint64_t) (-futex_offset);
if (futex_gva >= g->ipa_base + g->guest_size) {
/* Address out of guest range; skip this entry */
if (futex_gva >= g->ipa_base + g->guest_size ||
!futex_uaddr_is_aligned(futex_gva)) {
/* Out of range or unaligned: skip. Linux's unaligned_p() rejects
* these; emulating the same avoids partial cross-page writes
* leaving the futex word corrupted while the wake is suppressed.
*/
uint64_t next;
if (guest_read_small(g, list_ptr, &next, sizeof(next)) < 0)
break;
Expand All @@ -1422,8 +1452,14 @@ void robust_list_walk(guest_t *g, thread_entry_t *t)
/* Set FUTEX_OWNER_DIED and clear TID */
uint32_t new_val =
(futex_val & ~FUTEX_TID_MASK) | FUTEX_OWNER_DIED;
guest_write_small(g, futex_gva, &new_val, sizeof(new_val));
futex_wake(futex_gva, 1, FUTEX_BITSET_MATCH_ANY);
if (guest_write_small(g, futex_gva, &new_val, sizeof(new_val)) <
0)
log_debug(
"futex: robust list OWNER_DIED write to 0x%llx "
"failed; waiters on this lock may hang",
(unsigned long long) futex_gva);
else
futex_wake(futex_gva, 1, FUTEX_BITSET_MATCH_ANY);
}
}

Expand All @@ -1444,15 +1480,24 @@ void robust_list_walk(guest_t *g, thread_entry_t *t)
futex_gva = pending + (uint64_t) futex_offset;
else
futex_gva = pending - (uint64_t) (-futex_offset);
if (futex_gva >= g->ipa_base + g->guest_size ||
!futex_uaddr_is_aligned(futex_gva))
return;
uint32_t futex_val;
if (guest_read_small(g, futex_gva, &futex_val, sizeof(futex_val)) ==
0) {
uint32_t owner = futex_val & FUTEX_TID_MASK;
if (owner == (uint32_t) t->guest_tid) {
uint32_t new_val =
(futex_val & ~FUTEX_TID_MASK) | FUTEX_OWNER_DIED;
guest_write_small(g, futex_gva, &new_val, sizeof(new_val));
futex_wake(futex_gva, 1, FUTEX_BITSET_MATCH_ANY);
if (guest_write_small(g, futex_gva, &new_val, sizeof(new_val)) <
0)
log_debug(
"futex: robust list pending OWNER_DIED write to "
"0x%llx failed; waiters on this lock may hang",
(unsigned long long) futex_gva);
else
futex_wake(futex_gva, 1, FUTEX_BITSET_MATCH_ANY);
}
}
}
Expand Down
50 changes: 50 additions & 0 deletions tests/test-futex-pi.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ int passes = 0, fails = 0;
/* Linux futex ops */
#define FUTEX_LOCK_PI 6
#define FUTEX_UNLOCK_PI 7
#define FUTEX_TRYLOCK_PI 8
#define FUTEX_PRIVATE 128

/* PI lock word: shared between parent and child thread.
Expand Down Expand Up @@ -234,6 +235,54 @@ static void test_futex_eintr(void)
}
}

static void test_futex_unaligned(void)
{
TEST("futex rejects unaligned uaddr");

uint32_t words[2] = {0};
int *unaligned = (int *) (void *) (((unsigned char *) words) + 1);

long r = raw_futex_wait(unaligned, 0);
if (r != -22) {
printf("FAIL: WAIT expected -EINVAL(-22) got %ld\n", r);
fails++;
return;
}

r = raw_futex_wake(unaligned, 1);
if (r != -22) {
printf("FAIL: WAKE expected -EINVAL(-22) got %ld\n", r);
fails++;
return;
}

r = raw_syscall6(__NR_futex, (long) unaligned,
FUTEX_LOCK_PI | FUTEX_PRIVATE, 0, 0, 0, 0);
if (r != -22) {
printf("FAIL: LOCK_PI expected -EINVAL(-22) got %ld\n", r);
fails++;
return;
}

r = raw_syscall6(__NR_futex, (long) unaligned,
FUTEX_TRYLOCK_PI | FUTEX_PRIVATE, 0, 0, 0, 0);
if (r != -22) {
printf("FAIL: TRYLOCK_PI expected -EINVAL(-22) got %ld\n", r);
fails++;
return;
}

r = raw_syscall6(__NR_futex, (long) unaligned,
FUTEX_UNLOCK_PI | FUTEX_PRIVATE, 0, 0, 0, 0);
if (r != -22) {
printf("FAIL: UNLOCK_PI expected -EINVAL(-22) got %ld\n", r);
fails++;
return;
}

PASS();
}

/* Main */

int main(void)
Expand All @@ -242,6 +291,7 @@ int main(void)

test_pi_lock_unlock();
test_futex_eintr();
test_futex_unaligned();
test_pi_dead_owner(); /* Last: uses CLONE_THREAD which may hang on x64 */

SUMMARY("test-futex-pi");
Expand Down
Loading