Closed
Description
What happened?
There's a fairly obvious race condition when SMP SRQ affintity is configured and multiple containers are in play. Both the /proc/irq/default_smp_affinity and the IRQ balance config files are read and written without any mutex in place.
What did you expect to happen?
no race occurs
How can we reproduce it (as minimally and precisely as possible)?
#!/bin/bash
set -x
affinity_file="/proc/irq/default_smp_affinity"
expected_reset_affinity="3fff"
expected_mask="3e0f"
echo $expected_reset_affinity > $affinity_file
cat $affinity_file
for i in {0..20}; do
echo "========"
echo "Run ${i}"
echo "========"
kubectl apply -f pod.yaml
kubectl wait --for=condition=Ready pod/qos-demo --timeout=180s
mask=$(cat ${affinity_file} | tr -d '\n')
echo "Got mask: $mask, expected mask: $expected_mask"
if [ "${mask}" != "${expected_mask}" ]; then
exit 1
fi
kubectl delete pod qos-demo
kubectl wait --for=delete pod/qos-demo --timeout=180s
mask=$(cat ${affinity_file} | tr -d '\n')
echo "After reset --- Got mask: $mask, expected mask: $expected_reset_affinity"
if [ "${mask}" != "${expected_reset_affinity}" ]; then
exit 1
fi
done
apiVersion: v1
kind: Pod
metadata:
name: qos-demo
annotations:
irq-load-balancing.crio.io: "disable"
spec:
hostNetwork: true
runtimeClassName: performance-performance
containers:
- name: qos-demo-ctr-1
image: quay.io/akaris/nice-test
command:
- "/bin/sleep"
- "infinity"
resources:
limits:
memory: "100Mi"
cpu: "1"
requests:
memory: "100Mi"
cpu: "1"
- name: qos-demo-ctr-2
image: quay.io/akaris/nice-test
command:
- "/bin/sleep"
- "infinity"
resources:
limits:
memory: "100Mi"
cpu: "1"
requests:
memory: "100Mi"
cpu: "1"
- name: qos-demo-ctr-3
image: quay.io/akaris/nice-test
command:
- "/bin/sleep"
- "infinity"
resources:
limits:
memory: "100Mi"
cpu: "1"
requests:
memory: "100Mi"
cpu: "1"
- name: qos-demo-ctr-4
image: quay.io/akaris/nice-test
command:
- "/bin/sleep"
- "infinity"
resources:
limits:
memory: "100Mi"
cpu: "1"
requests:
memory: "100Mi"
cpu: "1"
- name: qos-demo-ctr-5
image: quay.io/akaris/nice-test
command:
- "/bin/sleep"
- "infinity"
resources:
limits:
memory: "100Mi"
cpu: "1"
requests:
memory: "100Mi"
cpu: "1"
You'll see on pod deletion that containers race while resetting the SMP affinity file, leaving it in an inconsistent state:
+ echo ========
========
+ echo 'Run 1'
Run 1
+ echo ========
========
+ kubectl apply -f pod.yaml
pod/qos-demo created
+ kubectl wait --for=condition=Ready pod/qos-demo --timeout=180s
pod/qos-demo condition met
++ cat /proc/irq/default_smp_affinity
++ tr -d '\n'
+ mask=3e0f
+ echo 'Got mask: 3e0f, expected mask: 3e0f'
Got mask: 3e0f, expected mask: 3e0f
+ '[' 3e0f '!=' 3e0f ']'
+ kubectl delete pod qos-demo
pod "qos-demo" deleted
+ kubectl wait --for=delete pod/qos-demo --timeout=180s
++ cat /proc/irq/default_smp_affinity
++ tr -d '\n'
+ mask=3fbf
+ echo 'After reset --- Got mask: 3fbf, expected mask: 3fff'
After reset --- Got mask: 3fbf, expected mask: 3fff
+ '[' 3fbf '!=' 3fff ']'
+ exit 1
Anything else we need to know?
I'll submit a PR in a bit
CRI-O and Kubernetes version
crio built from upstream branch
# kubectl version --output=json
{
"clientVersion": {
"major": "1",
"minor": "33",
"gitVersion": "v1.33.1",
"gitCommit": "8adc0f041b8e7ad1d30e29cc59c6ae7a15e19828",
"gitTreeState": "clean",
"buildDate": "2025-05-15T08:27:33Z",
"goVersion": "go1.24.2",
"compiler": "gc",
"platform": "linux/amd64"
},
"kustomizeVersion": "v5.6.0",
"serverVersion": {
"major": "1",
"minor": "33",
"emulationMajor": "1",
"emulationMinor": "33",
"minCompatibilityMajor": "1",
"minCompatibilityMinor": "32",
"gitVersion": "v1.33.1",
"gitCommit": "8adc0f041b8e7ad1d30e29cc59c6ae7a15e19828",
"gitTreeState": "clean",
"buildDate": "2025-05-15T08:19:08Z",
"goVersion": "go1.24.2",
"compiler": "gc",
"platform": "linux/amd64"
}
}
OS version
# On Linux:
$ cat /etc/os-release
# paste output here
$ uname -a
# paste output here