这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ kind: HumioCluster
metadata:
name: humiocluster-sample
spec:
image: humio/humio-core
version: "1.9.0"
image: "humio/humio-core:1.9.1"
targetReplicationFactor: 2
```

10 changes: 6 additions & 4 deletions deploy/crds/core.humio.com_humioclusters_crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ spec:
type: object
type: array
image:
description: Desired container image
description: Desired container image including the image tag
type: string
nodeCount:
description: Desired number of nodes
Expand All @@ -144,16 +144,18 @@ spec:
targetReplicationFactor:
description: Desired number of replicas of both storage and ingest partitions
type: integer
version:
description: Desired version of Humio nodes
type: string
type: object
status:
description: HumioClusterStatus defines the observed state of HumioCluster
properties:
allDataAvailable:
description: Current state set by operator.
type: string
clusterState:
description: 'ClusterState will be empty before the cluster is bootstrapped.
From there it can be "Bootstrapping" or "Operational" TODO: other
states?'
type: string
stateLastUpdated:
format: int64
type: integer
Expand Down
3 changes: 1 addition & 2 deletions deploy/crds/core.humio.com_v1alpha1_humiocluster_cr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ kind: HumioCluster
metadata:
name: example-humiocluster
spec:
image: humio/humio-core
version: "1.9.0"
image: "humio/humio-core:1.9.1"
targetReplicationFactor: 2
storagePartitionsCount: 24
environmentVariables:
Expand Down
4 changes: 2 additions & 2 deletions hack/restart-k8s.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ kind load docker-image --name kind docker.io/confluentinc/cp-zookeeper:5.4.1
kind load docker-image --name kind solsson/kafka-prometheus-jmx-exporter@sha256:6f82e2b0464f50da8104acd7363fb9b995001ddff77d248379f8788e78946143

# Pre-load humio images
docker pull humio/humio-core:1.9.0
kind load docker-image --name kind humio/humio-core:1.9.0
docker pull humio/humio-core:1.9.1
kind load docker-image --name kind humio/humio-core:1.9.1

# Use helm 3 to start up Kafka and Zookeeper
mkdir ~/git
Expand Down
4 changes: 1 addition & 3 deletions pkg/apis/core/v1alpha1/humiocluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ import (

// HumioClusterSpec defines the desired state of HumioCluster
type HumioClusterSpec struct {
// Desired container image
// Desired container image including the image tag
Image string `json:"image,omitempty"`
// Desired version of Humio nodes
Version string `json:"version,omitempty"`
// Desired number of replicas of both storage and ingest partitions
TargetReplicationFactor int `json:"targetReplicationFactor,omitempty"`
// Desired number of storage partitions
Expand Down
3 changes: 1 addition & 2 deletions pkg/controller/humiocluster/cluster_auth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ func TestGetJWTForSingleUser(t *testing.T) {
Namespace: "logging",
},
Spec: corev1alpha1.HumioClusterSpec{
Image: "humio/humio-core",
Version: "1.9.0",
Image: "humio/humio-core:1.9.1",
TargetReplicationFactor: 3,
NodeCount: 3,
},
Expand Down
3 changes: 1 addition & 2 deletions pkg/controller/humiocluster/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ import (
const (
name = "humiocluster"
namespace = "logging"
image = "humio/humio-core"
version = "1.9.0"
image = "humio/humio-core:1.9.1"
targetReplicationFactor = 2
storagePartitionsCount = 24
digestPartitionsCount = 24
Expand Down
9 changes: 9 additions & 0 deletions pkg/controller/humiocluster/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,12 @@ func matchingLabelsForHumio(clusterName string) client.MatchingLabels {
matchingLabels = labelsForHumio(clusterName)
return matchingLabels
}

func labelListContainsLabel(labelList map[string]string, label string) bool {
for labelName := range labelList {
if labelName == label {
return true
}
}
return false
}
85 changes: 70 additions & 15 deletions pkg/controller/humiocluster/humiocluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,10 @@ func (r *ReconcileHumioCluster) Reconcile(request reconcile.Request) (reconcile.
// Set defaults
setDefaults(humioCluster)

// Set cluster status
// Assume we are bootstrapping if no cluster state is set.
// TODO: this is a workaround for the issue where humio pods cannot start up at the same time during the first boot
if humioCluster.Status.ClusterState == "" {
humioCluster.Status.ClusterState = "Bootstrapping"
r.setClusterStatus(context.TODO(), "Boostrapping", humioCluster)
}

// Ensure developer password is a k8s secret
Expand All @@ -139,13 +140,22 @@ func (r *ReconcileHumioCluster) Reconcile(request reconcile.Request) (reconcile.
return reconcile.Result{}, err
}

// Ensure pods exist. Will requeue if not all pods are created and ready
emptyResult := reconcile.Result{}
result, err := r.ensurePodsExist(context.TODO(), humioCluster)

// Ensure pods that does not run the desired version are deleted.
result, err := r.ensureMismatchedPodVersionsAreDeleted(context.TODO(), humioCluster)
if result != emptyResult || err != nil {
return result, err
}

// Ensure pods exist. Will requeue if not all pods are created and ready
result, err = r.ensurePodsExist(context.TODO(), humioCluster)
if result != emptyResult || err != nil {
return result, err
}

r.setClusterStatus(context.TODO(), "Running", humioCluster)

// Ensure service exists
err = r.ensureServiceExists(context.TODO(), humioCluster)
if err != nil {
Expand Down Expand Up @@ -173,6 +183,13 @@ func (r *ReconcileHumioCluster) Reconcile(request reconcile.Request) (reconcile.
return reconcile.Result{Requeue: true, RequeueAfter: time.Second * 30}, nil
}

// setClusterStatus is used to change the cluster status
// TODO: we use this to determine if we should have a delay between startup of humio pods during bootstrap vs starting up pods during an image update
func (r *ReconcileHumioCluster) setClusterStatus(context context.Context, clusterState string, humioCluster *corev1alpha1.HumioCluster) error {
humioCluster.Status.ClusterState = clusterState
return r.client.Update(context, humioCluster)
}

func (r *ReconcileHumioCluster) ensurePodLabels(context context.Context, hc *corev1alpha1.HumioCluster) error {
r.logger.Info("ensuring pod labels")
cluster, err := r.humioClient.GetClusters()
Expand All @@ -184,7 +201,7 @@ func (r *ReconcileHumioCluster) ensurePodLabels(context context.Context, hc *cor

for _, pod := range foundPodList {
// Skip pods that already have a label
if podHasLabel(pod.GetLabels(), "node_id") {
if labelListContainsLabel(pod.GetLabels(), "node_id") {
continue
}
// If pod does not have an IP yet it is probably pending
Expand All @@ -208,15 +225,6 @@ func (r *ReconcileHumioCluster) ensurePodLabels(context context.Context, hc *cor
return nil
}

func podHasLabel(labels map[string]string, label string) bool {
for labelName := range labels {
if labelName == label {
return true
}
}
return false
}

func (r *ReconcileHumioCluster) ensurePartitionsAreBalanced(humioClusterController humio.ClusterController, hc *corev1alpha1.HumioCluster) error {
partitionsBalanced, err := humioClusterController.AreStoragePartitionsBalanced(hc)
if err != nil {
Expand Down Expand Up @@ -258,6 +266,53 @@ func (r *ReconcileHumioCluster) ensureServiceExists(context context.Context, hc
return nil
}

// ensureMismatchedPodVersionsAreDeleted is used to delete pods which container image does not match the desired image from the HumioCluster.
// If a pod is deleted, this will requeue immediately and rely on the next reconciliation to delete the next pod.
// The method only returns an empty result and no error if all pods are running the desired version,
// and no pod is currently being deleted.
func (r *ReconcileHumioCluster) ensureMismatchedPodVersionsAreDeleted(conetext context.Context, humioCluster *corev1alpha1.HumioCluster) (reconcile.Result, error) {
foundPodList, err := ListPods(r.client, humioCluster)
if err != nil {
return reconcile.Result{}, err
}

// if we do not have any pods running we have nothing to clean up, or wait until they have been deleted
if len(foundPodList) == 0 {
return reconcile.Result{}, nil
}

podBeingDeleted := false
for _, pod := range foundPodList {
// TODO: can we assume we always only have one pod?
// Probably not if running in a service mesh with sidecars injected.
// Should have a container name variable and match this here.

// only consider pods not already being deleted
if pod.DeletionTimestamp == nil {

// if container image versions of a pod differs, we want to delete it
if pod.Spec.Containers[0].Image != humioCluster.Spec.Image {
// TODO: figure out if we should only allow upgrades and not downgrades
r.logger.Info(fmt.Sprintf("deleting pod %s", pod.Name))
err = DeletePod(r.client, pod)
if err != nil {
return reconcile.Result{}, fmt.Errorf("could not delete pod %s, got err: %v", pod.Name, err)
}
return reconcile.Result{Requeue: true}, nil
}
} else {
podBeingDeleted = true
}

}
// if we have pods being deleted, requeue after a short delay
if podBeingDeleted {
return reconcile.Result{Requeue: true, RequeueAfter: time.Second * 10}, nil
}
// return empty result and no error indicating that everything was in the state we wanted it to be
return reconcile.Result{}, nil
}

// TODO: change to create 1 pod at a time, return Requeue=true and RequeueAfter.
// check that other pods, if they exist, are in a ready state
func (r *ReconcileHumioCluster) ensurePodsExist(conetext context.Context, humioCluster *corev1alpha1.HumioCluster) (reconcile.Result, error) {
Expand Down Expand Up @@ -286,7 +341,7 @@ func (r *ReconcileHumioCluster) ensurePodsExist(conetext context.Context, humioC
return reconcile.Result{}, nil
}

if podsNotReadyCount > 0 {
if podsNotReadyCount > 0 && humioCluster.Status.ClusterState == "Bootstrapping" {
r.logger.Info(fmt.Sprintf("there are %d humio pods that are not ready. all humio pods must report ready before reconciliation can continue", podsNotReadyCount))
return reconcile.Result{Requeue: true, RequeueAfter: time.Second * 5}, nil
}
Expand Down
Loading