From 1b1877f025c4f7a9cfb2a1d289a33d1a06409dbe Mon Sep 17 00:00:00 2001 From: Brennan Kinney <5098581+polarathene@users.noreply.github.com> Date: Mon, 18 Apr 2022 22:52:50 +1200 Subject: [PATCH] refactor: letsencrypt implicit location discovery (#2525) * chore: Extract letsencrypt logic into methods This allows other scripts to share the functionality to discover the correct letsencrypt folder from the 3 possible locations (where specific order is important). As these methods should now return a string value, the `return 1` after a panic is now dropped. * chore: Update comments The todo is resolved with this PR, `_setup_ssl` will be called by both cert conditional statements with purpose for each better documented to maintainers at the start of the logic block. * refactor: Defer most logic to helper/ssl.sh The loop is no longer required, extraction is delegated to `_setup_ssl` now. For the change event prevention, we retrieve the relevant FQDN via the new helper method, beyond that it's just indentation diff. `check-for-changes.sh` adjusted to allow locally scoped var declarations by wrapping a function. Presently no loop control flow is needed so this seems fine. Made it clear that `CHANGED` is local and `CHKSUM_FILE` is not. Panic scope doesn't require `SSL_TYPE` for context, it's clearly`letsencrypt`. * fix: Correctly match wildcard results Now that the service configs are properly updated, when the services restart they will return a cert with the SAN `DNS:*.example.test`, which is valid for `mail.example.test`, however the test function did not properly account for this in the regexp query. Resolved by truncating the left-most DNS label from FQDN and adding a third check to match a returned wildcard DNS result. Extracted out the common logic to create the regexp query and renamed the methods to communicate more clearly that they check the FQDN is supported, not necessarily explicitly listed by the cert. * tests(letsencrypt): Enable remaining tests These will now pass. Adjusted comments accordingly. Added an additional test on a fake FQDN that should still be valid to a wildcard cert (SNI validation in a proper setup would reject the connection afterwards). Co-authored-by: Georg Lauterbach <44545919+georglauterbach@users.noreply.github.com> --- target/scripts/check-for-changes.sh | 47 +++++++--------- target/scripts/helpers/ssl.sh | 84 ++++++++++++++++++----------- test/mail_ssl_letsencrypt.bats | 28 +++++----- test/test_helper/tls.bash | 22 ++++---- 4 files changed, 97 insertions(+), 84 deletions(-) diff --git a/target/scripts/check-for-changes.sh b/target/scripts/check-for-changes.sh index 11157c08..4516529d 100755 --- a/target/scripts/check-for-changes.sh +++ b/target/scripts/check-for-changes.sh @@ -3,8 +3,10 @@ # TODO: Adapt for compatibility with LDAP # Only the cert renewal change detection may be relevant for LDAP? +# CHKSUM_FILE global is imported from this file: # shellcheck source=./helpers/index.sh source /usr/local/bin/helpers/index.sh + # This script requires some environment variables to be properly set. This # includes POSTMASTER_ADDRESS (for alias (re-)generation), HOSTNAME and # DOMAINNAME (in ssl.sh). @@ -46,8 +48,8 @@ sleep 10 _log_with_date 'debug' "Chagedetector is ready" -while true -do +function _check_for_changes +{ # get chksum and check it, no need to lock config yet _monitored_files_checksums >"${CHKSUM_FILE}.new" cmp --silent -- "${CHKSUM_FILE}" "${CHKSUM_FILE}.new" @@ -60,12 +62,17 @@ do then _log_with_date 'info' 'Change detected' _create_lock # Shared config safety lock + local CHANGED CHANGED=$(grep -Fxvf "${CHKSUM_FILE}" "${CHKSUM_FILE}.new" | sed 's/^[^ ]\+ //') # TODO Perform updates below conditionally too # Also note that changes are performed in place and are not atomic # We should fix that and write to temporary files, stop, swap and start + # _setup_ssl is required for: + # manual - copy to internal DMS_TLS_PATH (/etc/dms/tls) that Postfix and Dovecot are configured to use. + # acme.json - presently uses /etc/letsencrypt/live/ instead of DMS_TLS_PATH, + # path may change requiring Postfix/Dovecot config update. if [[ ${SSL_TYPE} == 'manual' ]] then # only run the SSL setup again if certificates have really changed. @@ -75,9 +82,6 @@ do || [[ ${CHANGED} =~ ${SSL_ALT_KEY_PATH:-${REGEX_NEVER_MATCH}} ]] then _log_with_date 'debug' 'Manual certificates have changed - extracting certificates' - # we need to run the SSL setup again, because the - # certificates DMS is working with are copies of - # the (now changed) files _setup_ssl fi # `acme.json` is only relevant to Traefik, and is where it stores the certificates it manages. @@ -86,34 +90,19 @@ do elif [[ ${CHANGED} =~ /etc/letsencrypt/acme.json ]] then _log_with_date 'debug' "'/etc/letsencrypt/acme.json' has changed - extracting certificates" + _setup_ssl - # This breaks early as we only need the first successful extraction. - # For more details see the `SSL_TYPE=letsencrypt` case handling in `setup-stack.sh`. - # - # NOTE: HOSTNAME is set via `helpers/dns.sh`, it is not the original system HOSTNAME ENV anymore. - # TODO: SSL_DOMAIN is Traefik specific, it no longer seems relevant and should be considered for removal. - FQDN_LIST=("${SSL_DOMAIN}" "${HOSTNAME}" "${DOMAINNAME}") - for CERT_DOMAIN in "${FQDN_LIST[@]}" - do - _log_with_date 'trace' "Attempting to extract for '${CERT_DOMAIN}'" + # Prevent an unnecessary change detection from the newly extracted cert files by updating their hashes in advance: + local CERT_DOMAIN + CERT_DOMAIN="$(_find_letsencrypt_domain)" + ACME_CERT_DIR="/etc/letsencrypt/live/${CERT_DOMAIN}" - if _extract_certs_from_acme "${CERT_DOMAIN}" - then - # Prevent an unnecessary change detection from the newly extracted cert files by updating their hashes in advance: - CERT_DOMAIN=$(_strip_wildcard_prefix "${CERT_DOMAIN}") - ACME_CERT_DIR="/etc/letsencrypt/live/${CERT_DOMAIN}" - - sed -i "\|${ACME_CERT_DIR}|d" "${CHKSUM_FILE}.new" - sha512sum "${ACME_CERT_DIR}"/*.pem >> "${CHKSUM_FILE}.new" - - break - fi - done + sed -i "\|${ACME_CERT_DIR}|d" "${CHKSUM_FILE}.new" + sha512sum "${ACME_CERT_DIR}"/*.pem >> "${CHKSUM_FILE}.new" fi # If monitored certificate files in /etc/letsencrypt/live have changed and no `acme.json` is in use, # They presently have no special handling other than to trigger a change that will restart Postfix/Dovecot. - # TODO: That should be all that's required, unless the cert file paths have also changed (Postfix/Dovecot configs then need to be updated). # regenerate postfix accounts [[ ${SMTP_ONLY} -ne 1 ]] && _create_accounts @@ -146,7 +135,11 @@ do # mark changes as applied mv "${CHKSUM_FILE}.new" "${CHKSUM_FILE}" +} +while true +do + _check_for_changes sleep 2 done diff --git a/target/scripts/helpers/ssl.sh b/target/scripts/helpers/ssl.sh index 172dcad9..daca4b6d 100644 --- a/target/scripts/helpers/ssl.sh +++ b/target/scripts/helpers/ssl.sh @@ -185,38 +185,10 @@ function _setup_ssl _traefik_support - # letsencrypt folders and files mounted in /etc/letsencrypt - local LETSENCRYPT_DOMAIN - local LETSENCRYPT_KEY - - # Identify a valid letsencrypt FQDN folder to use. - if [[ -n ${SSL_DOMAIN} ]] && [[ -e /etc/letsencrypt/live/$(_strip_wildcard_prefix "${SSL_DOMAIN}")/fullchain.pem ]] - then - LETSENCRYPT_DOMAIN=$(_strip_wildcard_prefix "${SSL_DOMAIN}") - elif [[ -e /etc/letsencrypt/live/${HOSTNAME}/fullchain.pem ]] - then - LETSENCRYPT_DOMAIN=${HOSTNAME} - elif [[ -e /etc/letsencrypt/live/${DOMAINNAME}/fullchain.pem ]] - then - LETSENCRYPT_DOMAIN=${DOMAINNAME} - else - _log 'warn' "Cannot find a valid DOMAIN for '/etc/letsencrypt/live//', tried: '${SSL_DOMAIN}', '${HOSTNAME}', '${DOMAINNAME}'" - dms_panic__misconfigured 'LETSENCRYPT_DOMAIN' "${SCOPE_SSL_TYPE}" - return 1 - fi - - # Verify the FQDN folder also includes a valid private key (`privkey.pem` for Certbot, `key.pem` for extraction by Traefik) - if [[ -e /etc/letsencrypt/live/${LETSENCRYPT_DOMAIN}/privkey.pem ]] - then - LETSENCRYPT_KEY='privkey' - elif [[ -e /etc/letsencrypt/live/${LETSENCRYPT_DOMAIN}/key.pem ]] - then - LETSENCRYPT_KEY='key' - else - _log 'warn' "Cannot find key file ('privkey.pem' or 'key.pem') in '/etc/letsencrypt/live/${LETSENCRYPT_DOMAIN}/'" - dms_panic__misconfigured 'LETSENCRYPT_KEY' "${SCOPE_SSL_TYPE}" - return 1 - fi + # checks folders in /etc/letsencrypt/live to identify which one to implicitly use: + local LETSENCRYPT_DOMAIN LETSENCRYPT_KEY + LETSENCRYPT_DOMAIN="$(_find_letsencrypt_domain)" + LETSENCRYPT_KEY="$(_find_letsencrypt_key "${LETSENCRYPT_DOMAIN}")" # Update relevant config for Postfix and Dovecot _log 'trace' "Adding ${LETSENCRYPT_DOMAIN} SSL certificate to the postfix and dovecot configuration" @@ -408,6 +380,54 @@ function _setup_ssl esac } + +# Identify a valid letsencrypt FQDN folder to use. +function _find_letsencrypt_domain +{ + local LETSENCRYPT_DOMAIN + + if [[ -n ${SSL_DOMAIN} ]] && [[ -e /etc/letsencrypt/live/$(_strip_wildcard_prefix "${SSL_DOMAIN}")/fullchain.pem ]] + then + LETSENCRYPT_DOMAIN=$(_strip_wildcard_prefix "${SSL_DOMAIN}") + elif [[ -e /etc/letsencrypt/live/${HOSTNAME}/fullchain.pem ]] + then + LETSENCRYPT_DOMAIN=${HOSTNAME} + elif [[ -e /etc/letsencrypt/live/${DOMAINNAME}/fullchain.pem ]] + then + LETSENCRYPT_DOMAIN=${DOMAINNAME} + else + _log 'error' "Cannot find a valid DOMAIN for '/etc/letsencrypt/live//', tried: '${SSL_DOMAIN}', '${HOSTNAME}', '${DOMAINNAME}'" + dms_panic__misconfigured 'LETSENCRYPT_DOMAIN' '_find_letsencrypt_domain' + fi + + echo "${LETSENCRYPT_DOMAIN}" +} + +# Verify the FQDN folder also includes a valid private key (`privkey.pem` for Certbot, `key.pem` for extraction by Traefik) +function _find_letsencrypt_key +{ + local LETSENCRYPT_KEY + + local LETSENCRYPT_DOMAIN=${1} + if [[ -z ${LETSENCRYPT_DOMAIN} ]] + then + dms_panic__misconfigured 'LETSENCRYPT_DOMAIN' '_find_letsencrypt_key' + fi + + if [[ -e /etc/letsencrypt/live/${LETSENCRYPT_DOMAIN}/privkey.pem ]] + then + LETSENCRYPT_KEY='privkey' + elif [[ -e /etc/letsencrypt/live/${LETSENCRYPT_DOMAIN}/key.pem ]] + then + LETSENCRYPT_KEY='key' + else + _log 'error' "Cannot find key file ('privkey.pem' or 'key.pem') in '/etc/letsencrypt/live/${LETSENCRYPT_DOMAIN}/'" + dms_panic__misconfigured 'LETSENCRYPT_KEY' '_find_letsencrypt_key' + fi + + echo "${LETSENCRYPT_KEY}" +} + function _extract_certs_from_acme { local CERT_DOMAIN=${1} diff --git a/test/mail_ssl_letsencrypt.bats b/test/mail_ssl_letsencrypt.bats index 87e2e113..594fad96 100644 --- a/test/mail_ssl_letsencrypt.bats +++ b/test/mail_ssl_letsencrypt.bats @@ -59,7 +59,7 @@ function teardown() { #test hostname has certificate files _should_have_valid_config "${TARGET_DOMAIN}" 'privkey.pem' 'fullchain.pem' _should_succesfully_negotiate_tls "${TARGET_DOMAIN}" - _should_not_have_fqdn_in_cert 'example.test' + _should_not_support_fqdn_in_cert 'example.test' } @@ -79,12 +79,11 @@ function teardown() { #test domain has certificate files _should_have_valid_config "${TARGET_DOMAIN}" 'privkey.pem' 'fullchain.pem' _should_succesfully_negotiate_tls "${TARGET_DOMAIN}" - _should_not_have_fqdn_in_cert 'mail.example.test' + _should_not_support_fqdn_in_cert 'mail.example.test' } # When using `acme.json` (Traefik) - a wildcard cert `*.example.test` (SSL_DOMAIN) # should be extracted and be chosen over an existing FQDN `mail.example.test` (HOSTNAME): -# _acme_wildcard should verify the FQDN `mail.example.test` is negotiated, not `example.test`. # # NOTE: Currently all of the `acme.json` configs have the FQDN match a SAN value, # all Subject CN (`main` in acme.json) are `Smallstep Leaf` which is not an FQDN. @@ -92,7 +91,7 @@ function teardown() { @test "ssl(letsencrypt): Traefik 'acme.json' (*.example.test)" { # This test group changes to certs signed with an RSA Root CA key, # These certs all support both FQDNs: `mail.example.test` and `example.test`, - # Except for the wildcard cert `*.example.test`, which should not support `example.test`. + # Except for the wildcard cert `*.example.test`, which intentionally excluded `example.test` when created. # We want to maintain the same FQDN (mail.example.test) between the _acme_ecdsa and _acme_rsa tests. local LOCAL_BASE_PATH="${PWD}/test/test-files/ssl/example.test/with_ca/rsa" @@ -154,28 +153,27 @@ function teardown() { _should_extract_on_changes 'example.test' "${LOCAL_BASE_PATH}/wildcard/rsa.acme.json" _should_have_service_restart_count '2' - # note: https://github.com/docker-mailserver/docker-mailserver/pull/2404 solves this - # TODO: Make this pass. - # As the FQDN has changed since startup, the configs need to be updated accordingly. - # This requires the `changedetector` service event to invoke the same function for TLS configuration - # that is used during container startup to work correctly. A follow up PR will refactor `setup-stack.sh` for supporting this. - # _should_have_valid_config 'example.test' 'key.pem' 'fullchain.pem' + # As the FQDN has changed since startup, the Postfix + Dovecot configs should be updated: + _should_have_valid_config 'example.test' 'key.pem' 'fullchain.pem' local WILDCARD_KEY_PATH="${LOCAL_BASE_PATH}/wildcard/key.rsa.pem" local WILDCARD_CERT_PATH="${LOCAL_BASE_PATH}/wildcard/cert.rsa.pem" _should_have_expected_files 'example.test' "${WILDCARD_KEY_PATH}" "${WILDCARD_CERT_PATH}" - # Verify this works for wildcard certs, it should use `*.example.test` for `mail.example.test` (NOT `example.test`): + # These two tests will confirm wildcard support is working, the supported SANs changed: + # Before (_acme_rsa cert): `DNS:example.test, DNS:mail.example.test` + # After (_acme_wildcard cert): `DNS:*.example.test` + # The difference in support is: + # - `example.test` should no longer be valid. + # - `mail.example.test` should remain valid, but also allow any other subdomain/hostname. _should_succesfully_negotiate_tls 'mail.example.test' - # WARNING: This should fail...but requires resolving the above TODO. - # _should_not_have_fqdn_in_cert 'example.test' + _should_support_fqdn_in_cert 'fake.example.test' + _should_not_support_fqdn_in_cert 'example.test' } _prepare # Unleash the `acme.json` tests! - # NOTE: Test failures aren't as helpful here as bats will only identify function calls at this top-level, - # rather than the actual failing nested function call.. # TODO: Extract methods to separate test cases. _acme_ecdsa _acme_rsa diff --git a/test/test_helper/tls.bash b/test/test_helper/tls.bash index da8b0942..ae4978c0 100644 --- a/test/test_helper/tls.bash +++ b/test/test_helper/tls.bash @@ -52,7 +52,7 @@ function _negotiate_tls() { run docker exec "${CONTAINER_NAME}" sh -c "${CMD_OPENSSL_VERIFY}" assert_output --partial 'Verification: OK' - _should_have_fqdn_in_cert "${FQDN}" "${PORT}" + _should_support_fqdn_in_cert "${FQDN}" "${PORT}" } function _generate_openssl_cmd() { @@ -86,21 +86,23 @@ function _generate_openssl_cmd() { # ? --------------------------------------------- Verify FQDN - -function _should_have_fqdn_in_cert() { +function _get_fqdn_match_query() { local FQDN FQDN=$(escape_fqdn "${1}") - _get_fqdns_for_cert "$@" - assert_output --regexp "Subject: CN = ${FQDN}|DNS:${FQDN}" + # 3rd check is for wildcard support by replacing the 1st DNS label of the FQDN with a `*`, + # eg: `mail.example.test` will become `*.example.test` matching `DNS:*.example.test`. + echo "Subject: CN = ${FQDN}|DNS:${FQDN}|DNS:\*\.${FQDN#*.}" } -function _should_not_have_fqdn_in_cert() { - local FQDN - FQDN=$(escape_fqdn "${1}") - +function _should_support_fqdn_in_cert() { _get_fqdns_for_cert "$@" - refute_output --regexp "Subject: CN = ${FQDN}|DNS:${FQDN}" + assert_output --regexp "$(_get_fqdn_match_query "${1}")" +} + +function _should_not_support_fqdn_in_cert() { + _get_fqdns_for_cert "$@" + refute_output --regexp "$(_get_fqdn_match_query "${1}")" } # Escapes `*` and `.` so the FQDN literal can be used in regex queries