I am using the latest versions of bundle.yaml and cr.yaml from the main branch of the GitHub repo.
GKE Kubernetes version: 1.25.13-gke.200
Problem:
The cluster is in state "initializing" forever.
Watch the logs of the operator. The following stack trace is being reported constantly with a short interval of several seconds:
2023-09-19T09:38:01.178Z ERROR failed to reconcile cluster {"controller": "psmdb-controller", "object":
{"name":"my-cluster-name","namespace":"default"}
, "namespace": "default", "name": "my-cluster-name", "reconcileID": "bdfb8873-1a9a-4699-bb53-ec1d02d29c3c", "replset": "rs0", "error": "dial: ping mongo: server selection error: context deadline exceeded, current topology: { Type: ReplicaSetNoPrimary, Servers: [
{ Addr: my-cluster-name-rs0-0.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-0.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host }
, { Addr: my-cluster-name-rs0-1.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-1.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host }, { Addr: my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host }, ] }", "errorVerbose": "server selection error: context deadline exceeded, current topology: { Type: ReplicaSetNoPrimary, Servers: [
{ Addr: my-cluster-name-rs0-0.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-0.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host }
, { Addr: my-cluster-name-rs0-1.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-1.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host }, { Addr: my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host }, ] }\nping mongo\ngithub.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo.Dial\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo/mongo.go:111\ngithub.com/percona/percona-server-mongodb-operator/pkg/psmdb.MongoClient\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/psmdb/client.go:52\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*mongoClientProvider).Mongo\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/connections.go:37\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).mongoClientWithRole\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/connections.go:59\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).reconcileCluster\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/mgo.go:87\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:493\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:316\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:266\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:227\nruntime.goexit\n\t/usr/local/go/src/runtime/asm_amd64.s:1598\ndial\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).reconcileCluster\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/mgo.go:93\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:493\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:316\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:266\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:227\nruntime.goexit\n\t/usr/local/go/src/runtime/asm_amd64.s:1598"}
github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile
/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:495
sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile
/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:119
sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler
/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:316
sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem
/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:266
sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2
/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:227
More Details:
k describe pod/my-cluster-name-rs0-0
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Pulling 31m (x13 over 96m) kubelet Pulling image "percona/percona-server-mongodb:6.0.4-3"
Warning Unhealthy 21m (x6 over 24m) kubelet Liveness probe errored: command "/opt/percona/mongodb-healthcheck k8s liveness --ssl --sslInsecure --sslCAFile /etc/mongodb-ssl/ca.crt --sslPEMKeyFile /tmp/tls.pem --startupDelaySeconds 7200" timed out
Warning BackOff 88s (x281 over 80m) kubelet Back-off restarting failed container
k logs -f my-cluster-name-rs0-0
luster.local:27017","error":"HostUnreachable: Error connecting to my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017 :: caused by :: Could not find address for my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017: SocketException: Host not found (authoritative)","replicaSet":"rs0","response":{}}}
{"t":{"$date":"2023-09-19T12:34:28.959+00:00"},"s":"I", "c":"NETWORK", "id":4712102, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"Host failed in replica set","attr":{"replicaSet":"rs0","host":"my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017","error":{"code":6,"codeName":"HostUnreachable","errmsg":"Error connecting to my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017 :: caused by :: Could not find address for my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017: SocketException: Host not found (authoritative)"},"action":{"dropConnections":true,"requestImmediateCheck":false,"outcome":{"host":"my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017","success":false,"errorMessage":"HostUnreachable: Error connecting to my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017 :: caused by :: Could not find address for my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017: SocketException: Host not found (authoritative)"}}}}
{"t":{"$date":"2023-09-19T12:34:28.972+00:00"},"s":"I", "c":"REPL_HB", "id":23974, "ctx":"ReplCoord-18","msg":"Heartbeat failed after max retries","attr":{"target":"my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017","maxHeartbeatRetries":2,"error":{"code":6,"codeName":"HostUnreachable","errmsg":"Error connecting to my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017 :: caused by :: Could not find address for my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017: SocketException: Host not found (authoritative)"}}}
{"t":{"$date":"2023-09-19T12:34:29.459+00:00"},"s":"I", "c":"CONNPOOL", "id":22576, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"Connecting","attr":{"hostAndPort":"my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017"}}
Environment
None
Activity
Pavel Tankov September 21, 2023 at 11:53 AM
this was OOM (Out Of Memory) problem and is now fixed
Steps:
Have some Kubernetes cluster somewhere, e.g. in GKE (Google Kubernetes Engine)
Install Percona Server for MySQL on Google Kubernetes Engine (GKE):
kubectl apply --server-side -f deploy/bundle.yaml kubectl apply -f deploy/cr.yaml
NOTE:
I am using the latest versions of
bundle.yaml
andcr.yaml
from the main branch of the GitHub repo.GKE Kubernetes version: 1.25.13-gke.200
Problem:
The cluster is in state "initializing" forever.
Watch the logs of the operator. The following stack trace is being reported constantly with a short interval of several seconds:
2023-09-19T09:38:01.178Z ERROR failed to reconcile cluster {"controller": "psmdb-controller", "object": {"name":"my-cluster-name","namespace":"default"} , "namespace": "default", "name": "my-cluster-name", "reconcileID": "bdfb8873-1a9a-4699-bb53-ec1d02d29c3c", "replset": "rs0", "error": "dial: ping mongo: server selection error: context deadline exceeded, current topology: { Type: ReplicaSetNoPrimary, Servers: [ { Addr: my-cluster-name-rs0-0.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-0.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host } , { Addr: my-cluster-name-rs0-1.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-1.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host }, { Addr: my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host }, ] }", "errorVerbose": "server selection error: context deadline exceeded, current topology: { Type: ReplicaSetNoPrimary, Servers: [ { Addr: my-cluster-name-rs0-0.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-0.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host } , { Addr: my-cluster-name-rs0-1.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-1.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host }, { Addr: my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017, Type: Unknown, Last error: dial tcp: lookup my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local on 10.32.16.10:53: no such host }, ] }\nping mongo\ngithub.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo.Dial\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo/mongo.go:111\ngithub.com/percona/percona-server-mongodb-operator/pkg/psmdb.MongoClient\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/psmdb/client.go:52\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*mongoClientProvider).Mongo\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/connections.go:37\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).mongoClientWithRole\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/connections.go:59\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).reconcileCluster\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/mgo.go:87\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:493\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:316\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:266\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:227\nruntime.goexit\n\t/usr/local/go/src/runtime/asm_amd64.s:1598\ndial\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).reconcileCluster\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/mgo.go:93\ngithub.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile\n\t/go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:493\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:316\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:266\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:227\nruntime.goexit\n\t/usr/local/go/src/runtime/asm_amd64.s:1598"} github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb.(*ReconcilePerconaServerMongoDB).Reconcile /go/src/github.com/percona/percona-server-mongodb-operator/pkg/controller/perconaservermongodb/psmdb_controller.go:495 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:119 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:316 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:266 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.16.1/pkg/internal/controller/controller.go:227
More Details:
k describe pod/my-cluster-name-rs0-0
Events: Type Reason Age From Message ---- ------ ---- ---- ------- Normal Pulling 31m (x13 over 96m) kubelet Pulling image "percona/percona-server-mongodb:6.0.4-3" Warning Unhealthy 21m (x6 over 24m) kubelet Liveness probe errored: command "/opt/percona/mongodb-healthcheck k8s liveness --ssl --sslInsecure --sslCAFile /etc/mongodb-ssl/ca.crt --sslPEMKeyFile /tmp/tls.pem --startupDelaySeconds 7200" timed out Warning BackOff 88s (x281 over 80m) kubelet Back-off restarting failed container
k logs -f my-cluster-name-rs0-0
luster.local:27017","error":"HostUnreachable: Error connecting to my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017 :: caused by :: Could not find address for my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017: SocketException: Host not found (authoritative)","replicaSet":"rs0","response":{}}} {"t":{"$date":"2023-09-19T12:34:28.959+00:00"},"s":"I", "c":"NETWORK", "id":4712102, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"Host failed in replica set","attr":{"replicaSet":"rs0","host":"my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017","error":{"code":6,"codeName":"HostUnreachable","errmsg":"Error connecting to my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017 :: caused by :: Could not find address for my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017: SocketException: Host not found (authoritative)"},"action":{"dropConnections":true,"requestImmediateCheck":false,"outcome":{"host":"my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017","success":false,"errorMessage":"HostUnreachable: Error connecting to my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017 :: caused by :: Could not find address for my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017: SocketException: Host not found (authoritative)"}}}} {"t":{"$date":"2023-09-19T12:34:28.972+00:00"},"s":"I", "c":"REPL_HB", "id":23974, "ctx":"ReplCoord-18","msg":"Heartbeat failed after max retries","attr":{"target":"my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017","maxHeartbeatRetries":2,"error":{"code":6,"codeName":"HostUnreachable","errmsg":"Error connecting to my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017 :: caused by :: Could not find address for my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017: SocketException: Host not found (authoritative)"}}} {"t":{"$date":"2023-09-19T12:34:29.459+00:00"},"s":"I", "c":"CONNPOOL", "id":22576, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"Connecting","attr":{"hostAndPort":"my-cluster-name-rs0-2.my-cluster-name-rs0.default.svc.cluster.local:27017"}}