Skip to content

Commit b3bb1a5

Browse files
committed
duplicate fix
Signed-off-by: Mauritz Uphoff <mauritz.uphoff@stackit.cloud>
1 parent 2b60ff8 commit b3bb1a5

3 files changed

Lines changed: 132 additions & 0 deletions

File tree

stackit/internal/services/authorization/roleassignments/resource.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"fmt"
77
"strings"
8+
"time"
89

910
"github.com/hashicorp/terraform-plugin-framework/path"
1011
"github.com/hashicorp/terraform-plugin-framework/resource"
@@ -154,6 +155,21 @@ func (r *roleAssignmentResource) Create(ctx context.Context, req resource.Create
154155
ctx = tflog.SetField(ctx, "role", model.Role.ValueString())
155156
ctx = tflog.SetField(ctx, "resource_type", r.apiName)
156157

158+
lockKey := fmt.Sprintf("%s,%s,%s",
159+
model.ResourceId.ValueString(),
160+
model.Role.ValueString(),
161+
model.Subject.ValueString(),
162+
)
163+
164+
// Terraform executes resources in parallel. If two resources define the same
165+
// role assignment, they create a "Check-Then-Act" race condition:
166+
// 1. Thread A creates -> Success
167+
// 2. Thread B creates -> Duplicate Error
168+
// This lock forces Thread B to wait until Thread A completes, allowing
169+
// the subsequent duplicate check to correctly find the existing assignment.
170+
unlock := authorizationUtils.LockAssignment(lockKey)
171+
defer unlock()
172+
157173
listMemberResp, err := r.authorizationClient.ListMembers(ctx, r.apiName, model.ResourceId.ValueString()).Subject(model.Subject.ValueString()).Execute()
158174
if err != nil {
159175
core.LogAndAddError(ctx, &resp.Diagnostics, "Error listing current resource members", fmt.Sprintf("Calling API: %v", err))
@@ -188,6 +204,10 @@ func (r *roleAssignmentResource) Create(ctx context.Context, req resource.Create
188204

189205
err = mapListMembersResponse(listMembersResponse, &model)
190206
if err != nil {
207+
if errors.Is(err, errRoleAssignmentNotFound) {
208+
resp.State.RemoveResource(ctx)
209+
return
210+
}
191211
core.LogAndAddError(ctx, &resp.Diagnostics, fmt.Sprintf("Error creating %s role assignment", r.apiName), fmt.Sprintf("Processing API payload: %v", err))
192212
return
193213
}
@@ -197,6 +217,10 @@ func (r *roleAssignmentResource) Create(ctx context.Context, req resource.Create
197217
if resp.Diagnostics.HasError() {
198218
return
199219
}
220+
221+
// safety sleep due to api cache
222+
time.Sleep(1 * time.Second)
223+
200224
tflog.Info(ctx, fmt.Sprintf("%s role assignment created", r.apiName))
201225
}
202226

stackit/internal/services/authorization/utils/util.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"encoding/json"
66
"fmt"
7+
"sync"
78

89
"github.com/hashicorp/terraform-plugin-framework/diag"
910
"github.com/stackitcloud/stackit-sdk-go/core/config"
@@ -43,3 +44,29 @@ func TypeConverter[R any](data any) (*R, error) {
4344
}
4445
return &result, err
4546
}
47+
48+
// Global map to hold locks for specific assignment IDs
49+
// This ensures that creating the same assignment in parallel waits for the first one to finish
50+
var (
51+
assignmentLocksMu sync.Mutex
52+
assignmentLocks = make(map[string]*sync.Mutex)
53+
)
54+
55+
// LockAssignment acquires a lock for a specific assignment identifier.
56+
// It returns an unlock function that must be deferred.
57+
func LockAssignment(id string) func() {
58+
assignmentLocksMu.Lock()
59+
mu, ok := assignmentLocks[id]
60+
if !ok {
61+
mu = &sync.Mutex{}
62+
assignmentLocks[id] = mu
63+
}
64+
assignmentLocksMu.Unlock()
65+
66+
mu.Lock()
67+
68+
// Return the cleanup function
69+
return func() {
70+
mu.Unlock()
71+
}
72+
}

stackit/internal/services/authorization/utils/util_test.go

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ import (
44
"context"
55
"os"
66
"reflect"
7+
"sync"
78
"testing"
9+
"time"
810

911
"github.com/hashicorp/terraform-plugin-framework/diag"
1012
sdkClients "github.com/stackitcloud/stackit-sdk-go/core/clients"
@@ -146,3 +148,82 @@ func TestTypeConverter(t *testing.T) {
146148
})
147149
}
148150
}
151+
152+
func TestLockAssignment(t *testing.T) {
153+
// Test Case 1: Serialization (Same Key)
154+
// Ensures that two processes cannot hold the lock for the same ID simultaneously.
155+
t.Run("same key serialization", func(t *testing.T) {
156+
key := "uuid,reader,project"
157+
var criticalSectionActive bool
158+
var wg sync.WaitGroup
159+
160+
// Goroutine 1
161+
wg.Add(1)
162+
go func() {
163+
defer wg.Done()
164+
unlock := LockAssignment(key)
165+
defer unlock()
166+
167+
// Mark that we are inside the critical section
168+
criticalSectionActive = true
169+
// Sleep to simulate API work and give G2 a chance to interrupt if the lock is broken
170+
time.Sleep(100 * time.Millisecond)
171+
criticalSectionActive = false
172+
}()
173+
174+
// Goroutine 2
175+
// Wait a tiny bit to ensure G1 has started and acquired the lock
176+
time.Sleep(10 * time.Millisecond)
177+
wg.Add(1)
178+
go func() {
179+
defer wg.Done()
180+
181+
// This should block until G1 releases the lock
182+
unlock := LockAssignment(key)
183+
defer unlock()
184+
185+
// If the lock works, G1 must have finished (setting criticalSectionActive = false)
186+
if criticalSectionActive {
187+
t.Error("LockAssignment failed: entered critical section while another goroutine held the lock")
188+
}
189+
}()
190+
191+
wg.Wait()
192+
})
193+
194+
// Test Case 2: Different Keys
195+
t.Run("different keys parallelism", func(t *testing.T) {
196+
key1 := "uuid_a"
197+
key2 := "uuid_b"
198+
199+
start := time.Now()
200+
var wg sync.WaitGroup
201+
202+
wg.Add(2)
203+
204+
// Worker for Key 1
205+
go func() {
206+
defer wg.Done()
207+
unlock := LockAssignment(key1)
208+
defer unlock()
209+
time.Sleep(100 * time.Millisecond)
210+
}()
211+
212+
// Worker for Key 2
213+
go func() {
214+
defer wg.Done()
215+
unlock := LockAssignment(key2)
216+
defer unlock()
217+
time.Sleep(100 * time.Millisecond)
218+
}()
219+
220+
wg.Wait()
221+
duration := time.Since(start)
222+
223+
// If they ran sequentially, it would take >200ms.
224+
// If parallel, it should take ~100ms. We use 150ms as a safe threshold.
225+
if duration > 150*time.Millisecond {
226+
t.Errorf("LockAssignment with different keys blocked execution. Duration: %v", duration)
227+
}
228+
})
229+
}

0 commit comments

Comments
 (0)