From 8dbc13ffacb13405b13813c18877a080dc0ac0e9 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Tue, 5 Dec 2023 23:42:03 +0800 Subject: [PATCH 1/4] Fix typo error Signed-off-by: kerthcet --- gpuallocator/allocator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpuallocator/allocator.go b/gpuallocator/allocator.go index b6ebb0a..88bda36 100644 --- a/gpuallocator/allocator.go +++ b/gpuallocator/allocator.go @@ -19,7 +19,7 @@ type Allocator struct { allocated DeviceSet } -// Policy defines an interface for plugagable allocation policies to be added +// Policy defines an interface for pluggable allocation policies to be added // to an Allocator. type Policy interface { // Allocate is meant to do the heavy-lifting of implementing the actual From fc4b598b1d24184d855609bcd5a4780ba937044d Mon Sep 17 00:00:00 2001 From: kerthcet Date: Wed, 6 Dec 2023 17:01:23 +0800 Subject: [PATCH 2/4] quick return for edge case Signed-off-by: kerthcet --- gpuallocator/besteffort_policy.go | 4 ++++ gpuallocator/besteffort_test.go | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/gpuallocator/besteffort_policy.go b/gpuallocator/besteffort_policy.go index 366ba6a..4c0f0bc 100644 --- a/gpuallocator/besteffort_policy.go +++ b/gpuallocator/besteffort_policy.go @@ -46,6 +46,10 @@ func (p *bestEffortPolicy) Allocate(available []*Device, required []*Device, siz return []*Device{} } + if len(required) > len(available) { + return []*Device{} + } + // Find the highest scoring GPU partition with sets of of size 'size'. // Don't consider partitions that don't have at least one set that contains // all of the GPUs 'required' by the allocation. diff --git a/gpuallocator/besteffort_test.go b/gpuallocator/besteffort_test.go index b36bb00..e614cef 100644 --- a/gpuallocator/besteffort_test.go +++ b/gpuallocator/besteffort_test.go @@ -110,6 +110,14 @@ func TestBestEffortAllocate(t *testing.T) { 4, []int{}, }, + { + "Required too many devices than available", + devices, + []int{0, 1, 2, 3, 4, 5}, + []int{1, 2, 3, 4, 5, 6}, + 1, + []int{}, + }, } RunPolicyAllocTests(t, policy, tests) From ade704c5a05372e40f4d8ef421ab53fb5a622006 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Wed, 6 Dec 2023 20:04:14 +0800 Subject: [PATCH 3/4] Optimize for two cases 1. The required device number is equal to the size 2. Require 1 GPU, in this case, we will pick the device with the minimum sum of scores with available devices. Signed-off-by: kerthcet --- gpuallocator/besteffort_policy.go | 44 +++++++++++++++++++++++++------ gpuallocator/besteffort_test.go | 32 ++++++++++++++++++++++ 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/gpuallocator/besteffort_policy.go b/gpuallocator/besteffort_policy.go index 4c0f0bc..000eb59 100644 --- a/gpuallocator/besteffort_policy.go +++ b/gpuallocator/besteffort_policy.go @@ -50,6 +50,29 @@ func (p *bestEffortPolicy) Allocate(available []*Device, required []*Device, siz return []*Device{} } + // Optimize for the case when we required is actually the `size`. + if size == len(required) { + if gpuPartitionContainsSetWithAll([][]*Device{available}, required) { + return required + } else { + return []*Device{} + } + } + + // Optimize for the case when size == 1. + // We'll pick the device with the minimum sum of scores with available devices. + if size == 1 { + var bestDevice *Device + var minScore int + iterateGPUSetScore(available, func(score int, index int) { + if score < minScore || bestDevice == nil { + minScore = score + bestDevice = available[index] + } + }) + return []*Device{bestDevice} + } + // Find the highest scoring GPU partition with sets of of size 'size'. // Don't consider partitions that don't have at least one set that contains // all of the GPUs 'required' by the allocation. @@ -221,14 +244,6 @@ func iterateGPUPartitions(devices []*Device, size int, callback func([][]*Device return } - // Optimize for the case when size == 1. - if size == 1 { - for _, device := range devices { - callback([][]*Device{{device}}) - } - return - } - // Otherwise, pad the list of available GPUs on the node such that the list // can be evenly partitioned into subsets of size 'size'. This is necessary // to ensure that the recursive solution does not exit early and actually @@ -396,3 +411,16 @@ func calculateGPUPartitionScore(gpuPartition [][]*Device) int { return score } + +func iterateGPUSetScore(gpuSet []*Device, callback func(int, int)) { + for i := range gpuSet { + score := 0 + for j := range gpuSet { + if i == j { + continue + } + score += calculateGPUPairScore(gpuSet[i], gpuSet[j]) + } + callback(score, i) + } +} diff --git a/gpuallocator/besteffort_test.go b/gpuallocator/besteffort_test.go index e614cef..5f9b322 100644 --- a/gpuallocator/besteffort_test.go +++ b/gpuallocator/besteffort_test.go @@ -118,6 +118,38 @@ func TestBestEffortAllocate(t *testing.T) { 1, []int{}, }, + { + "Required devices is equal to the size", + devices, + []int{0, 1, 2, 4, 5, 6}, + []int{0, 1, 2, 5}, + 4, + []int{0, 1, 2, 5}, + }, + { + "Required 1 device exists", + devices, + []int{0, 1, 2, 4, 5, 6}, + []int{2}, + 1, + []int{2}, + }, + { + "Required 1 device not exists", + devices, + []int{0, 1, 2, 4, 5, 6}, + []int{3}, + 1, + []int{}, + }, + { + "Required 1 best effort device", + devices, + []int{0, 1, 2}, + []int{}, + 1, + []int{0}, + }, } RunPolicyAllocTests(t, policy, tests) From bb735398018a8e1b5d2c67273f6e5d59129c5a6c Mon Sep 17 00:00:00 2001 From: kerthcet Date: Tue, 12 Dec 2023 11:31:48 +0800 Subject: [PATCH 4/4] fix comment Signed-off-by: kerthcet --- gpuallocator/besteffort_policy.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gpuallocator/besteffort_policy.go b/gpuallocator/besteffort_policy.go index 000eb59..1ebef62 100644 --- a/gpuallocator/besteffort_policy.go +++ b/gpuallocator/besteffort_policy.go @@ -52,11 +52,10 @@ func (p *bestEffortPolicy) Allocate(available []*Device, required []*Device, siz // Optimize for the case when we required is actually the `size`. if size == len(required) { - if gpuPartitionContainsSetWithAll([][]*Device{available}, required) { + if gpuSetContainsAll(available, required) { return required - } else { - return []*Device{} } + return []*Device{} } // Optimize for the case when size == 1.