diff --git a/internal/lib/container_server.go b/internal/lib/container_server.go index 77ca6725d85..50e6959e38f 100644 --- a/internal/lib/container_server.go +++ b/internal/lib/container_server.go @@ -397,6 +397,11 @@ func (c *ContainerServer) LoadSandbox(ctx context.Context, id string) (sb *sandb } sb.SetCreated() + + if scontainer.State().Status == oci.ContainerStateStopped { + sb.SetStopped(ctx, true) + } + selinux.ReserveLabel(processLabel) if err := c.ctrIDIndex.Add(scontainer.ID()); err != nil { diff --git a/internal/lib/sandbox/sandbox.go b/internal/lib/sandbox/sandbox.go index 7f8756d5250..7926a2a26b5 100644 --- a/internal/lib/sandbox/sandbox.go +++ b/internal/lib/sandbox/sandbox.go @@ -59,15 +59,18 @@ type Sandbox struct { nsOpts *types.NamespaceOption dnsConfig *types.DNSConfig stopMutex sync.RWMutex - created bool - stopped bool - networkStopped bool - privileged bool - hostNetwork bool - usernsMode string - containerEnvPath string - podLinuxOverhead *types.LinuxContainerResources - podLinuxResources *types.LinuxContainerResources + // stateMutex protects the use of created, stopped and networkStopped bools + // which are all fields that can change at runtime + stateMutex sync.RWMutex + created bool + stopped bool + networkStopped bool + privileged bool + hostNetwork bool + usernsMode string + containerEnvPath string + podLinuxOverhead *types.LinuxContainerResources + podLinuxResources *types.LinuxContainerResources } // DefaultShmSize is the default shm size. @@ -329,6 +332,9 @@ func (s *Sandbox) SetStopped(ctx context.Context, createFile bool) { ctx, span := log.StartSpan(ctx) defer span.End() + s.stateMutex.Lock() + defer s.stateMutex.Unlock() + if s.stopped { return } @@ -344,16 +350,24 @@ func (s *Sandbox) SetStopped(ctx context.Context, createFile bool) { // Stopped returns whether the sandbox state has been // set to stopped. func (s *Sandbox) Stopped() bool { + s.stateMutex.RLock() + defer s.stateMutex.RUnlock() + return s.stopped } // SetCreated sets the created status of sandbox to true. func (s *Sandbox) SetCreated() { + s.stateMutex.Lock() + defer s.stateMutex.Unlock() s.created = true } // NetworkStopped returns whether the network has been stopped. func (s *Sandbox) NetworkStopped() bool { + s.stateMutex.RLock() + defer s.stateMutex.RUnlock() + return s.networkStopped } @@ -368,6 +382,9 @@ func (s *Sandbox) SetNetworkStopped(ctx context.Context, createFile bool) error ctx, span := log.StartSpan(ctx) defer span.End() + s.stateMutex.Lock() + defer s.stateMutex.Unlock() + if s.networkStopped { return nil } @@ -404,6 +421,7 @@ func (s *Sandbox) SetContainerEnvFile(ctx context.Context) error { return nil } +// This function assumes the state lock has been taken for this sandbox. func (s *Sandbox) createFileInInfraDir(ctx context.Context, filename string) error { // If the sandbox is not yet created, // this function is being called when @@ -433,6 +451,9 @@ func (s *Sandbox) createFileInInfraDir(ctx context.Context, filename string) err } func (s *Sandbox) RestoreStopped() { + s.stateMutex.Lock() + defer s.stateMutex.Unlock() + if s.fileExistsInInfraDir(sbStoppedFilename) { s.stopped = true } @@ -459,11 +480,14 @@ func (s *Sandbox) fileExistsInInfraDir(filename string) bool { // Created returns the created status of sandbox. func (s *Sandbox) Created() bool { + s.stateMutex.RLock() + defer s.stateMutex.RUnlock() + return s.created } func (s *Sandbox) State() types.PodSandboxState { - if s.Ready(false) { + if s.Ready() { return types.PodSandboxState_SANDBOX_READY } @@ -475,23 +499,9 @@ func (s *Sandbox) State() types.PodSandboxState { // `takeLock` should be set if we need to take the lock to get the infra container's state. // If there is no infra container, it is never considered ready. // If the infra container is spoofed, the pod is considered ready when it has been created, but not stopped. -func (s *Sandbox) Ready(takeLock bool) bool { - podInfraContainer := s.InfraContainer() - if podInfraContainer == nil { - return false - } - - if podInfraContainer.Spoofed() { - return s.created && !s.stopped - } - // Assume the sandbox is ready, unless it has an infra container that - // isn't running - var cState *oci.ContainerState - if takeLock { - cState = podInfraContainer.State() - } else { - cState = podInfraContainer.StateNoLock() - } +func (s *Sandbox) Ready() bool { + s.stateMutex.RLock() + defer s.stateMutex.RUnlock() - return cState.Status == oci.ContainerStateRunning + return s.created && !s.stopped } diff --git a/internal/lib/sandbox/sandbox_test.go b/internal/lib/sandbox/sandbox_test.go index d1d4422803c..449296077d8 100644 --- a/internal/lib/sandbox/sandbox_test.go +++ b/internal/lib/sandbox/sandbox_test.go @@ -139,6 +139,7 @@ var _ = t.Describe("Sandbox", func() { // Then Expect(testSandbox.Stopped()).To(BeTrue()) + Expect(testSandbox.Ready()).To(BeFalse()) }) }) @@ -184,6 +185,7 @@ var _ = t.Describe("Sandbox", func() { // Then Expect(testSandbox.Created()).To(BeTrue()) + Expect(testSandbox.Ready()).To(BeTrue()) }) }) diff --git a/server/container_portforward.go b/server/container_portforward.go index 7c59c350232..6d97d17e1e3 100644 --- a/server/container_portforward.go +++ b/server/container_portforward.go @@ -51,7 +51,7 @@ func (s *StreamService) PortForward(ctx context.Context, podSandboxID string, po return fmt.Errorf("could not find sandbox %s", podSandboxID) } - if !sb.Ready(true) { + if !sb.Ready() { return fmt.Errorf("sandbox %s is not running", podSandboxID) } diff --git a/server/sandbox_list_test.go b/server/sandbox_list_test.go index c17a89fd250..1f1a1e35afa 100644 --- a/server/sandbox_list_test.go +++ b/server/sandbox_list_test.go @@ -96,8 +96,9 @@ var _ = t.Describe("ListPodSandbox", func() { // Given mockDirs(testManifest) createDummyState() - _, err := sut.LoadSandbox(context.Background(), sandboxID) + sb, err := sut.LoadSandbox(context.Background(), sandboxID) Expect(err).ToNot(HaveOccurred()) + sb.SetStopped(context.Background(), false) // When response, err := sut.ListPodSandbox(context.Background(), diff --git a/server/sandbox_status.go b/server/sandbox_status.go index aa0bec8bb0c..2ee1289ad25 100644 --- a/server/sandbox_status.go +++ b/server/sandbox_status.go @@ -25,10 +25,7 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *types.PodSandboxStat return nil, status.Errorf(codes.NotFound, "could not find pod %q: %v", req.PodSandboxId, err) } - rStatus := types.PodSandboxState_SANDBOX_NOTREADY - if sb.Ready(true) { - rStatus = types.PodSandboxState_SANDBOX_READY - } + rStatus := sb.State() var linux *types.LinuxPodSandboxStatus if sb.NamespaceOptions() != nil { diff --git a/server/server.go b/server/server.go index e195873c9e5..07d568d15ae 100644 --- a/server/server.go +++ b/server/server.go @@ -859,6 +859,12 @@ func (s *Server) handleExit(ctx context.Context, event fsnotify.Event) { c = sb.InfraContainer() resource = "sandbox infra" + // We discovered the infra container stopped (potentially unexpectedly). + // Since sandboxes status is now being judged by the sb.stopped boolean, + // rather than the infra container's status, we have to manually set stopped here. + // It's likely we're doing double the work here, but that's better than missing it + // if the infra container crashed. + sb.SetStopped(ctx, true) } else { sb = s.GetSandbox(c.Sandbox()) }