# Script to detect "orphaned" TLS secrets when Cert manager (cainjector) complains about "unable to fetch certificate that owns the secret", because deleting a Certificate will not (default) delete the…

> Source: <https://gist.github.com/Aergonus/a0d6cdf6692ccfe4d71615acf4186a99>
> Published: 2024-11-24 19:40:18+00:00

cleanup-cert-manager-secrets.sh

      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      
Learn more about bidirectional Unicode characters

 
    Show hidden characters

#!/bin/bash

# inspired by https://gist.github.com/lisawolderiksen/8c6026ef55f04e4f5d6a288b5e53214b

# Apache 2.0 License

usage() {

    cat << EOF

This script detects TLS secrets which refer to certificates that don't exist (anymore).

This is the case when error "unable to fetch certificate that owns the secret" occurs in cert-manager (cainjector) logs.

The reason is that a certificate has been removed without the secret being deleted.

The solution is to clean up by deleting any secret which belonged to a certificate that no longer exists.

Usage: $0 [-n namespace] [-v]

$0

    -n | --namespace <namespace>  (optional, default="all")

    -d | --dry-run <true/false>   (optional, default=true)

    -v | --verbose                (optional)

    -h | --help                   (displays help and exits)

Examples:

Check the TLS secrets and matching certificates in all namespaces, print only the end result

    $0

Check the TLS secrets and matching certificates in the "mynamespace" namespace, print only the end result

    $0 -n mynamespace

Check the TLS secrets and matching certificates in all namespaces, print info about all TLS secrets and certificates

    $0 -v

Check the TLS secrets and matching certificates in all namespaces, delete all "safe" secrets

    $0 -d false

EOF

}

namespace="all"

dry_run=true

printfLog='silentPrintf'

# Read commandline arguments

while [ "$1" != "" ]

do

    case $1 in

        -n | --namespace )

            shift

            namespace=$1

            ;;

        -d | --dry-run )

            shift

            if [ "$1" == "true" ]; then

                dry_run=true

            elif [ "$1" == "false" ]; then

                dry_run=false

            else

                echo "Invalid value for -d|--dry-run. Use 'true' or 'false'."

                exit 1

            fi

            ;;

        -v | --verbose )

            shift

            printfLog='printf'

            ;;

        -h | --help )

            usage

            exit

            ;;

        * )

            usage

            exit 1

    esac

    shift

done

# Don't print a string.

# This method is used when -v/--verbose is NOT specified. 

silentPrintf() {

    :

}

# Function to check if a TLS certificate is still valid

check_tls_cert_expired() {

    local secret_name=$1

    local namespace=$2

    # Extract the certificate from the secret

    cert_data=$(kubectl get secret "$secret_name" -n "$namespace" -o jsonpath='{.data.tls\.crt}' | base64 --decode)

    if [ -z "$cert_data" ]; then

        echo "Secret $secret_name in namespace $namespace does not contain a valid TLS certificate."

        return 1

    fi

    # Check if the certificate is valid using openssl

    if ! echo "$cert_data" | openssl x509 -noout -text > /dev/null 2>&1; then

        echo "Secret $secret_name in namespace $namespace contains an invalid TLS certificate."

        return 1

    fi

    # Check if the certificate is expired

    expiration_date=$(echo "$cert_data" | openssl x509 -noout -enddate | cut -d= -f2)

 

    # Convert expiration date to epoch time

    if [[ "$OSTYPE" == "darwin"* ]]; then

        # macOS

        expiration_epoch=$(date -j -f "%b %d %T %Y %Z" "$expiration_date" "+%s")

    else

        # Linux

        expiration_epoch=$(date -d "$expiration_date" "+%s")

    fi

    current_epoch=$(date "+%s")

    if [ -z "$expiration_epoch" ]; then

        echo "Failed to parse expiration date for secret $secret_name."

        return 1

    fi

    [ "$current_epoch" -gt "$expiration_epoch" ]

    return $?

}

# Variables to store the last queried namespace and its resources

last_namespace=""

last_namespace_resources=""

check_tls_cert_referenced() {

    local secret_name=$1

    local namespace=$2

    # Check if the resources for the current namespace are already cached

    if [[ "$namespace" != "$last_namespace" ]]; then

        printf "Fetching resources for namespace %s ...\n" "$namespace"

        # Fetch all resources in the namespace and cache them

        last_namespace_resources=$(kubectl get all -n "$namespace" -o json)

        last_namespace="$namespace"

    fi

    printf "Searching for secret %s in namespace %s ...\n" "$secret_name" "$namespace"

    resources="$last_namespace_resources"

    # Check if the secret is referenced anywhere in the JSON output

    if echo "$resources" | grep -q "$secret_name"; then

        printf "Secret %s is used in the namespace %s\n" "$secret_name" "$namespace"

        return 0

    else

        printf "Secret %s is not used in the namespace %s\n" "$secret_name" "$namespace"

        return 1

    fi

}

# Function to check TLS secrets and certificates in a given namespace

check_tls_secrets_and_certs() {

    local namespace=$1

    printf "\nProcessing namespace: %s\n" "$namespace"

    secrets_with_certificate_name=$(kubectl get secret -n "$namespace" -o jsonpath="{range .items[?(.type=='kubernetes.io/tls')]}{.metadata.name},{.metadata.annotations.cert-manager\.io/certificate-name} {end}")

    $printfLog "Listing secrets (of type kubernetes.io/tls) with their annotated certificate:\n"

    for i in ${secrets_with_certificate_name[@]}; do

        $printfLog "$i \n"

    done

    $printfLog "\nListing certificates:\n"

    $printfLog "$(kubectl get certificate -n $namespace|awk '{print $1}'|grep -v "NAME")\n"

    secrets_without_certs=()

    $printfLog "\nLooking for certificates matching the secrets:\n"

    for secret_with_cert_name in ${secrets_with_certificate_name[@]}; do

        IFS="," read -r -a details <<< "$secret_with_cert_name"

        secret_name=${details[0]}

        cert=${details[1]}

        $printfLog "Secret $secret_name is made using cert $cert. Looking for that cert...\n"

        certmatch=$(kubectl get certificate -n "$namespace" -o jsonpath="{range .items[?(.metadata.name=='$cert')]}{.metadata.name} {end}")

        if [[ $certmatch == "" ]]; then

            secrets_without_certs+=("$secret_name")

            $printfLog "\n##### WARNING: Secret $secret_name has no matching certificate ($cert). #####\n\n"

        else

            $printfLog "Certificate found.\n"

        fi

    done

    # Just because there is no associated Certificate doesn't mean it's not in use

    # Other services could create secrets without associated Certificates that are not managed by cert-manager,

    # sources could be from helm installs/nginx ingress

    expired_secrets_without_certs=()

    secrets_seemingly_unused=()

    secrets_in_use=()

    $printfLog "\nChecking secrets in namespace $namespace that have no matching certificates:\n"

    for secret in ${secrets_without_certs[@]}; do

        # A simple safety check to see if the tls cert is expired

        if check_tls_cert_expired "$secret" "$namespace"; then

            expired_secrets_without_certs+=("$secret")

        # We can also check if the secret is used/reference anywhere like the k9s functionality

        elif ! check_tls_cert_referenced "$secret" "$namespace"; then

            secrets_seemingly_unused+=("$secret")

        else

            secrets_in_use+=("$secret")

        fi

    done

    if [ ${#secrets_in_use[@]} -ne 0 ]; then

        printf "\nSecrets still in use with no matching certificates (should not be deleted):\n"

        for secret in ${secrets_in_use[@]}; do

            printf "%s \n" "$secret"

        done

    fi

    if [ ${#secrets_seemingly_unused[@]} -ne 0 ]; then

        printf "\nSecrets probably not in use with no matching certificates (possibly could be deleted):\n"

        if [ "$dry_run" = true ]; then

            for secret in ${secrets_seemingly_unused[@]}; do

                printf "%s \n" "$secret"

            done

        else

            for secret in ${secrets_seemingly_unused[@]}; do

                kubectl delete secret -n "$namespace" "$secret"

            done

        fi

    fi

    if [ ${#expired_secrets_without_certs[@]} -ne 0 ]; then

        printf "\nExpired Secrets with no matching certificates (probably can be deleted):\n"

        if [ "$dry_run" = true ]; then

            for secret in ${expired_secrets_without_certs[@]}; do

                printf "%s \n" "$secret"

            done

        else

            for secret in ${expired_secrets_without_certs[@]}; do

                kubectl delete secret -n "$namespace" "$secret"

            done

        fi

    fi

}

if [ "$namespace" == "all" ]; then

    namespaces=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}')

    for ns in $namespaces; do

        check_tls_secrets_and_certs "$ns"

    done

else

    check_tls_secrets_and_certs "$namespace"

fi
