Skip to content

production-heartbeat #4611

production-heartbeat

production-heartbeat #4611

name: production-heartbeat
on:
workflow_dispatch: # As per documentation, the colon is necessary even though no config is required.
schedule:
# Cron syntax is "minute[0-59] hour[0-23] date[1-31] month[1-12] day[0-6]". '*' is 'any value', and multiple values
# can be specified with comma-separated lists. All times are UTC.
# So this expression means "run at 45 minutes past 1, 5, and 9 AM/PM UTC". The hours were chosen so that
# the jobs run only close to business hours of Central Time.
# Days were chosen to run only from Monday through Friday.
- cron: '45 13,17,21 * * 1,2,3,4,5'
jobs:
production-heartbeat:
strategy:
# By default, if any job in a matrix fails, all other jobs are immediately cancelled. This makes the jobs run to completion instead.
fail-fast: false
matrix:
os: [{vm: ubuntu-latest, exe: .sh}, {vm: windows-2019, exe: .cmd}]
node: ['lts/*']
runs-on: ${{ matrix.os.vm }}
timeout-minutes: 60
steps:
# === Setup. We need to get the code, set up nodejs, and create the results directory. ===
- uses: actions/checkout@v4
with:
ref: 'release'
- uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node }}
- run: mkdir smoke-test-results
# === Set our environment variables, either using default values or the repo's secrets ===
- name: Set environment variables
id: env_var_setup
# We'll want to use bash for this, to avoid any cross-platform shenanigans
shell: bash
run: |
# In the following script, the use of the `echo "name=value" >> $GITHUB_ENV` structure is used to set/update
# environment variables. Such updates are visible to all subsequent steps.
#
# If the CLI_VERSION repo secret is set, we want to install that version ofsf-cli, so we set an environment
# variable. Otherwise, we leave the environment variable unset, so it implicitly defaults to `latest`.
# Note: This can be used to intentionally fail the GHA by providing an invalid version number.
if [[ -n "${{ secrets.CLI_VERSION }}" ]]; then
echo "CLI_VERSION=@${{ secrets.CLI_VERSION}}" >> $GITHUB_ENV
fi
# If the SCANNER_VERSION repo secret is set, we want to install that version of sfdx-scanner, so we set an
# environment variable. Otherwise, we leave the environment variable unset, so it implicitly defaults to `latest`.
# Note: This can be used to intentionally fail the GHA by providing an invalid version number.
if [[ -n "${{ secrets.SCANNER_VERSION }}" ]]; then
echo "SCANNER_VERSION=@${{ secrets.SCANNER_VERSION }}" >> $GITHUB_ENV
fi
# If the FAIL_SMOKE_TESTS repo secret is set to ANY value, we should respond by deleting the `test/test-jars`
# folder. The smoke tests expect this folder's contents to exist, so an invocation of `scanner:rule:add` should
# fail, thereby failing the smoke tests as a whole.
# Note: This serves no purpose aside from providing a way to simulate a smoke test failure.
if [[ -n "${{ secrets.FAIL_SMOKE_TESTS }}" ]]; then
rm -rf ./test/test-jars
fi
# === Make three attempts to install SF through npm ===
- name: Install SF
id: sf_install
# If the first attempt fails, wait a minute and try again. After a second failure, wait 5 minutes then try again. Then give up.
# Set an output parameter, `retry_count`, indicating the number of retry attempts that were made.
run: |
(echo "::set-output name=retry_count::0" && npm install -g @salesforce/cli${{ env.CLI_VERSION }}) ||
(echo "::set-output name=retry_count::1" && sleep 60 && npm install -g @salesforce/cli${{ env.CLI_VERSION }}) ||
(echo "::set-output name=retry_count::2" && sleep 300 && npm install -g @salesforce/cli${{ env.CLI_VERSION }})
# === Make three attempts to install the scanner plugin through sf ===
- name: Install Scanner Plugin
id: scanner_install
# If the first attempt fails, wait a minute and try again. After a second failure, wait 5 minutes then try again. Then give up.
# Set an output parameter, `retry_count`, indicating the number of retry attempts that were made.
run: |
(echo "::set-output name=retry_count::0" && sf plugins install @salesforce/sfdx-scanner${{ env.SCANNER_VERSION }}) ||
(echo "::set-output name=retry_count::1" && sleep 60 && sf plugins install @salesforce/sfdx-scanner${{ env.SCANNER_VERSION }}) ||
(echo "::set-output name=retry_count::2" && sleep 300 && sf plugins install @salesforce/sfdx-scanner${{ env.SCANNER_VERSION }})
# === Log the installed plugins for easier debugging ===
- name: Log plugins
run: sf plugins
# === Attempt to execute the smoke tests ===
- name: Run smoke tests
id: smoke_tests
run: smoke-tests/smoke-test${{ matrix.os.exe }} sf
# === Upload the smoke-test-results folder as an artifact ===
- name: Upload smoke-test-results folder as artifact
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: smoke-test-results-${{ runner.os }}
path: smoke-test-results
# === Report any problems ===
- name: Report problems
# There are problems if any step failed or was skipped.
# Note that the `join()` call omits null values, so if any steps were skipped, they won't have a corresponding
# value in the string.
if: ${{ failure() || cancelled() }}
shell: bash
env:
# If we're here because steps failed or were skipped, then that's a critical problem. Otherwise it's a normal one.
# We can't use the `failure()` or `cancelled()` convenience methods outside of the `if` condition, hence the
# `contains()` calls.
IS_CRITICAL: ${{ contains(join(steps.*.outcome), 'failure') || contains(join(steps.*.outcome), 'skipped') }}
# Build the status strings for each step as environment variables to save space later. Null retry_count values
# will be replaced with `n/a` to maintain readability in the alert.
CLI_INSTALL_STATUS: ${{ steps.sf_install.outcome }} after ${{ steps.sf_install.outputs.retry_count || 'n/a' }} retries
SCANNER_INSTALL_STATUS: ${{ steps.scanner_install.outcome }} after ${{ steps.scanner_install.outputs.retry_count || 'n/a' }} retries
SMOKE_TESTS_STATUS: ${{ steps.smoke_tests.outcome }}
# A link to this run, so the PagerDuty assignee can quickly get here.
RUN_LINK: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
# GHA env-vars don't have robust conditional logic, so we'll use this if-else branch to define some bash env-vars.
if [[ ${{ env.IS_CRITICAL }} == true ]]; then
ALERT_SEV="critical"
ALERT_SUMMARY="Production heartbeat script failed on ${{ runner.os }}"
else
ALERT_SEV="info"
ALERT_SUMMARY="Production heartbeat script succeeded with retries on ${{ runner.os }}"
fi
# Define a helper function to create our POST request's data, to sidestep issues with nested quotations.
generate_post_data() {
# This is known as a HereDoc, and it lets us declare multi-line input ending when the specified limit string,
# in this case EOF, is encountered.
cat <<EOF
{"payload": {
"summary": "${ALERT_SUMMARY}",
"source": "Github Actions",
"severity": "${ALERT_SEV}",
"custom_details": "SF install: ${{ env.CLI_INSTALL_STATUS }}. Scanner install: ${{ env.SCANNER_INSTALL_STATUS }}. Smoke tests: ${{ env.SMOKE_TESTS_STATUS }}."
},
"links": [{
"href": "${{ env.RUN_LINK }}",
"text": "Link to action execution"
}],
"event_action": "trigger",
"dedup_key": "GH-HB-${{ matrix.os.vm }}-${{ matrix.node }}",
"routing_key": "${{ secrets.PAGERDUTY_HEARTBEAT_KEY }}"
}
EOF
}
# Make our POST request
curl --request POST --data "$(generate_post_data)" https://events.pagerduty.com/v2/enqueue