Pre-upgrade-script
To improve the experience during your upcoming upgrades, we recommend running our pre-check script before initiating the process. This proactive step is designed to identify and address potential blockers that may arise during the upgrade
The bash script has following prerequesites
- kubectl binaries and access
- jq binaries
#!/bin/bash
#
# This Pre-upgrade check script can be used by Swisscom CaaS customers
# before upgrading Kubernetes clusters on CaaS to see if the cluster
# has some common conditions which can lead to issues during the upgrade.
#
# Set to yes to print additional debug messages
debug=no
function warn() {
echo ""
echo "Warning! $*"
}
function die() {
echo "Error! $*"
echo "Exiting."
exit 1
}
function debug() {
[[ $debug == "yes" ]] && echo "Debug: $*"
}
echo "This script will check the k8s cluster before upgrade for some common conditions"
echo "which often require operator attention before proceeding with the upgrade."
echo ""
echo "If you see warnings below, it is likely that an issue is possible during the upgrade."
echo "Please take your time to understand exactly what is happening, and take appropriate action."
echo ""
echo "This script checks for some of the most common conditions experiences by Swisscom customers."
echo "It is not an exhaustive list. The operator should excercise their judgement before"
echo "proceeding with the upgrade."
echo ""
# Check pre-requisites
[[ ! -x $(which kubectl) ]] && die "This script requires kubectl to query your cluster."
[[ ! -x $(which jq) ]] && die "This script requires jq. Please install before proceeding: https://jqlang.github.io/jq/download/"
# Check kube-api connection and list namespaces
set -o pipefail
context=$(kubectl config current-context)
[[ "$context" == "" || $? != 0 ]] && die "The script is unable to find a kubectl context to connect to."
echo "==== Connecting to k8s cluster context $context..."
# Get the server version using kubectl and extract the GitVersion part
server_version=$(kubectl version --short | grep Server | awk '{print $3}')
major_version=$(echo $server_version | cut -d. -f1 | sed 's/v//')
minor_version=$(echo $server_version | cut -d. -f2)
echo "Detected Kubernetes cluster version: $server_version"
# Get namespaces
namespaces=$(kubectl get ns -o json | jq -r ".items[].metadata.name")
[[ "$namespaces" == "" || $? != 0 ]] && die "The script is unable to find any k8s namespaces."
echo "Retreived a list of cluster namespaces:" $namespaces
# Check for PDB config requiring manual intervention during upgrades
echo ""
echo "==== Checking for Pod Disruption Budget config requiring manual intervention during upgrades..."
for namespace in $namespaces ; do
debug "Namespace: $namespace"
pdbs=$(kubectl get poddisruptionbudgets -n $namespace -o json | jq -r ".items[].metadata.name")
[[ "$pdbs" == "" || $? != 0 ]] && echo "No PDBs found in namespace '$namespace'"
debug "PDBs:" $pdbs
for pdb in $pdbs ; do
echo -n "Checking PDB '$pdb' in namespace '$namespace': "
disruptions_allowed=$(kubectl get poddisruptionbudgets $pdb -n $namespace -o json | \
jq -r ".status.disruptionsAllowed")
if [[ $disruptions_allowed == "0" ]] ; then
warn "Pod disruption budget '$pdb' has disruptionsAllowed at zero."
echo " This means that it is not possible to upgrade the cluster without downtime."
echo " Automatic draining of the node during the upgrade will not be possible."
echo " Consult the documentation at https://kubernetes.io/docs/tasks/run-application/configure-pdb/ for details"
echo ""
else
echo "OK"
fi
done
done
# Check for volumes pending deletion
echo ""
echo "==== Checking for for volumes pending deletion..."
for namespace in $namespaces ; do
debug "Namespace: $namespace"
pvcs=$(kubectl get pvc -n $namespace -o json | jq -r ".items[].metadata.name")
[[ "$pvcs" == "" || $? != 0 ]] && echo "No PVCs found in namespace '$namespace'"
debug "PVCs: $pvcs"
for pvc in $pvcs ; do
echo -n "Checking PVC '$pvc' in namespace '$namespace': "
pvc_state=$(kubectl get pvc $pvc -n $namespace | grep $pvc | awk '{print $2}')
if [[ $pvc_state == "Terminating" ]] ; then
warn "PersistentVolumeClaim '$pvc' is in Terminating state."
echo " This means that the associated volume will likely get deleted"
echo " during the upgrade once the node is drained."
echo " Consult the documentation for details:"
echo " https://kubernetes.io/docs/concepts/storage/persistent-volumes/#storage-object-in-use-protection"
echo ""
else
echo "OK"
fi
done
done
# Check for PodSecurityPolicy
system_psp_names="kube-system-psp|pks-privileged|pks-restricted|a-vrops-psp|a-wavefront-psp|cert-generator|event-controller|fluent-bit|metric-controller|node-exporter|observability-manager|sink-controller|telegraf|validator|vsphere-csi-webhook"
system_psp_names_for_users="pks-privileged|pks-restricted"
# System namespaces to ignore
system_namespaces="kube-node-lease|kube-public|kube-system|nsx-system|pks-system|pks-system-host-monitoring|vmware-system-csi"
# Function to check PSP usage in RoleBindings
check_psp_usage_rolebinding() {
local psp="$1"
psp_binding_found=false
while IFS= read -r line; do
local namespace=$(echo "$line" | awk '{print $1}')
local name=$(echo "$line" | awk '{print $2}')
local role=$(echo "$line" | awk '{print $3}')
# Skip system namespaces
if [[ ! $system_namespaces =~ $namespace ]]; then
echo " Found RoleBinding for PSP '$psp': Namespace: $namespace, Name: $name, Role/SA: $role"
psp_binding_found=true
fi
done < <(kubectl get rolebinding --all-namespaces --no-headers | grep -E "$psp")
[[ $psp_binding_found == true ]] && return 0 || return 1
}
# Function to check PSP usage in ClusterRoleBindings
check_psp_usage_clusterrolebinding() {
local psp="$1"
psp_binding_found=false
while IFS= read -r line; do
local name=$(echo "$line" | awk '{print $1}')
local role=$(echo "$line" | awk '{print $3}')
echo " Found ClusterRoleBinding for PSP '$psp': Namespace: $namespace, Role/SA: $role"
psp_binding_found=true
done < <(kubectl get clusterrolebinding --no-headers | grep -E "$psp")
[[ $psp_binding_found == true ]] && return 0 || return 1
}
echo ""
echo "==== Checking PodSecurityPolicy..."
psp_output="$(kubectl get podsecuritypolicies --no-headers 2>&1 | grep -vE "^Warning" | awk '{print $1}')"
exit_status=$?
if [ $exit_status -ne 0 ] || [ -z "$psp_output" ]; then
if [ "$major_version" -eq 1 ] && [ "$minor_version" -le 24 ]; then
warn "No PodSecurityPolicies detected in this cluster."
echo " This is unusual, as a TKGi 1.15 / K8s 1.24 or earlier cluster normally should have at least system PSPs defined."
echo " Please check your cluster manually."
else
echo "No PodSecurityPolicies detected in this cluster."
echo " This is normal, as TKGi 1.16 / K8s 1.25 or later clusters do not support PSPs."
fi
exit
fi
# Convert PSP output to an array of names
IFS=$'\n' read -r -d '' -a psp_names <<< "$psp_output"
user_psps_found=false
pre_created_psp_use_found=false
for psp_name in "${psp_names[@]}"; do
if echo "$psp_name" | grep -vEq "^($system_psp_names)"; then
echo " Found user-defined PodSecurityPolicy '$psp_name'"
check_psp_usage_rolebinding "$psp_name"
check_psp_usage_clusterrolebinding "$psp_name"
user_psps_found=true
elif echo "$psp_name" | grep -Eq "^($system_psp_names_for_users)"; then
echo " Found TKGi pre-created PodSecurityPolicy '$psp_name'"
check_psp_usage_rolebinding "$psp_name" && pre_created_psp_use_found=true
check_psp_usage_clusterrolebinding "$psp_name" && pre_created_psp_use_found=true
else
debug "Found TKGi pre-created PodSecurityPolicy '$psp_name'. This is a system policy and will be safely ignored."
fi
done
if [[ $pre_created_psp_use_found == true ]] ; then
warn "This cluster is found to be using TKGi pre-created PodSecurityPolicies pks-privileged and/or pks-restricted"
echo " Once upgraded to TKGi 1.16 / K8s 1.25, this binding will have no effect, which may result in disruption of the workloads."
else
echo " This cluster has not been found to be using TKGi pre-created PodSecurityPolicies pks-privileged and pks-restricted"
fi
if [[ $user_psps_found == true ]] ; then
warn "This cluster contains PodSecurityPolicies created by the cluster operator."
echo " VMware does not support upgrade of clusters to TKGi 1.16 / K8s 1.25 when user-defined PSPs are still present in the cluster."
else
echo " This cluster has not been found to contain PodSecurityPolicies created by the cluster operator."
fi
if [[ $pre_created_psp_use_found == true ]] || [[ $user_psps_found == true ]] ; then
echo " PodSecurityPolicy in this cluster will need to be migrated before proceeding with upgrade to TKGi 1.16."
echo " Consult the documentation at https://kubernetes.io/docs/tasks/configure-pod-container/migrate-from-psp/ for details"
fi