Skip to content

Commit

Permalink
add xms, xmx and log-level args
Browse files Browse the repository at this point in the history
  • Loading branch information
vemonet committed Jul 23, 2023
1 parent 7de3e9d commit e651200
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 47 deletions.
30 changes: 21 additions & 9 deletions action.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
---
name: 'Setup Apache Spark'
description: 'Set up Apache Spark and add the command-line tools to the PATH.'
author: 'vemonet'
name: "Setup Apache Spark"
description: "Set up Apache Spark and add the command-line tools to the PATH."
author: "vemonet"
inputs:
spark-version:
description: "Apache Spark version to install, see https://spark.apache.org/downloads.html"
required: true
hadoop-version:
description: "Hadoop version for the Spark binaries"
default: '3'
default: "3"
required: false
scala-version:
description: "Scala version for the Spark binaries (optional)."
Expand All @@ -18,14 +18,26 @@ inputs:
required: false
py4j-version:
description: "Py4J version used for the PYTHONPATH."
default: '0.10.9'
default: "0.10.9"
required: false
xmx:
description: "Max RAM passed to Spark options (-Xmx)."
default: "2048M"
required: false
xms:
description: "Min RAM passed to Spark options (-Xms)."
default: "1024M"
required: false
log-level:
description: "Log level passed to Spark options (info/debug/warn)."
default: "info"
required: false
outputs:
spark-version:
description: "The installed Apache Spark version."
runs:
using: 'node16'
main: 'dist/index.js'
using: "node16"
main: "dist/index.js"
branding:
icon: 'star'
color: 'orange'
icon: "star"
color: "orange"
30 changes: 17 additions & 13 deletions dist/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6606,7 +6606,7 @@ Object.defineProperty(exports, "__esModule", ({ value: true }));
const core = __importStar(__nccwpck_require__(2186));
const tc = __importStar(__nccwpck_require__(7784));
const fs = __importStar(__nccwpck_require__(7147));
// See docs to create JS action: https://docs.github.com/en/actions/creating-actions/creating-a-javascript-action
// See docs to create gh action: https://docs.github.com/en/actions/creating-actions/creating-a-javascript-action
const log = (msg) => {
core.info(`${new Date().toLocaleTimeString('fr-FR')} - ${msg}`);
};
Expand All @@ -6617,14 +6617,17 @@ function run() {
const hadoopVersion = core.getInput('hadoop-version');
const scalaVersion = core.getInput('scala-version');
const py4jVersion = core.getInput('py4j-version');
const xmx = core.getInput('xmx');
const xms = core.getInput('xms');
const logLevel = core.getInput('log-level');
let sparkUrl = core.getInput('spark-url');
// Try to write to the parent folder of the workflow workspace
const workspaceFolder = process.env.GITHUB_WORKSPACE || '/home/runner/work';
let installFolder = workspaceFolder.split('/').slice(0, -1).join('/');
try {
fs.accessSync(installFolder, fs.constants.R_OK);
}
catch (err) {
catch (error) {
log(`Using $GITHUB_WORKSPACE to store Spark (${installFolder} not writable)`);
installFolder = workspaceFolder;
}
Expand All @@ -6636,7 +6639,11 @@ function run() {
log(`Using Spark from cache ${cachedSpark}`);
sparkHome = cachedSpark;
}
else if (!sparkUrl) {
else if (sparkUrl) {
// URL provided directly by user
yield download(sparkUrl, installFolder);
}
else {
// If URL not provided directly, we try to download from official recommended https://spark.apache.org/downloads.html
sparkUrl = `https://dlcdn.apache.org/spark/spark-${sparkVersion}/spark-${sparkVersion}-bin-hadoop${hadoopVersion}${scalaBit}.tgz`;
try {
Expand All @@ -6648,15 +6655,11 @@ function run() {
yield download(sparkUrl, installFolder);
}
}
else {
// URL provided directly by user
yield download(sparkUrl, installFolder);
}
if (!fs.existsSync(`${sparkHome}/bin/spark-submit`)) {
throw new Error(`The Spark binary was not properly downloaded from ${sparkUrl}`);
throw new Error(`The Spark binary downloaded from ${sparkUrl} could not be found in ${sparkHome}`);
}
log(`Binary downloaded, setting up environment variables`);
const SPARK_OPTS = `--driver-java-options=-Xms1024M --driver-java-options=-Xmx2048M --driver-java-options=-Dlog4j.logLevel=info`;
log(`Spark binary downloaded, setting up environment variables and cache`);
const SPARK_OPTS = `--driver-java-options=-Xms${xms} --driver-java-options=-Xmx${xmx} --driver-java-options=-Dlog4j.logLevel=${logLevel}`;
const PYTHONPATH = `${sparkHome}/python:${sparkHome}/python/lib/py4j-${py4jVersion}-src.zip`;
const PYSPARK_PYTHON = 'python';
// Set environment variables in the workflow
Expand All @@ -6667,15 +6670,16 @@ function run() {
core.exportVariable('PYSPARK_DRIVER_PYTHON', PYSPARK_PYTHON);
core.exportVariable('PYTHONPATH', PYTHONPATH);
core.exportVariable('SPARK_OPTS', SPARK_OPTS);
// Add Spark to path
// Add Spark to path and cache it
core.addPath(`${sparkHome}/bin`);
yield tc.cacheDir(sparkHome, 'spark', sparkVersion);
core.setOutput('spark-version', sparkVersion);
}
catch (error) {
log(`Issue installing Spark: check if the Spark version and Hadoop versions you are using are part of the ones proposed on the Spark download page at https://spark.apache.org/downloads.html`);
core.error(error);
core.setFailed(error.message);
const err = error;
core.error(err);
core.setFailed(err.message);
}
});
}
Expand Down
13 changes: 6 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
"private": true,
"scripts": {
"fmt": "prettier --write **/*.ts",
"build": "npm run fmt && ncc build src/setup-spark.ts",
"dev": "npm run build && act -j test-setup-spark-scala",
"test": "prettier --check **/*.ts && eslint ./src/**.ts --ignore-path .gitignore",
"build": "npm run fmt && npm run test && ncc build src/setup-spark.ts",
"dev": "npm run build && act -j test-setup-spark-scala",
"release": "git tag -fa v1 -m 'v1' && git push -f origin v1",
"install-licensed": "curl -Lfs -o licensed.tar.gz https://github.com/github/licensed/releases/download/3.1.0/licensed-3.1.0-linux-x64.tar.gz && tar -xzf licensed.tar.gz",
"licensed": "./licensed cache"
Expand Down Expand Up @@ -60,13 +60,12 @@
"parser": "typescript"
},
"eslintConfig": {
"root": true,
"plugins": [
"@typescript-eslint"
],
"extends": [
"plugin:github/recommended"
],
"plugins": [
"@typescript-eslint"
],
"parser": "@typescript-eslint/parser",
"parserOptions": {
"ecmaVersion": 9,
Expand All @@ -78,7 +77,7 @@
"es6": true
},
"rules": {
"@typescript-eslint/no-explicit-any": "off",
"@typescript-eslint/no-explicit-any": "warn",
"import/no-namespace": "off",
"i18n-text/no-en": "off"
}
Expand Down
39 changes: 21 additions & 18 deletions src/setup-spark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,28 @@ import * as core from '@actions/core';
import * as tc from '@actions/tool-cache';
import * as fs from 'fs';

// See docs to create JS action: https://docs.github.com/en/actions/creating-actions/creating-a-javascript-action
// See docs to create gh action: https://docs.github.com/en/actions/creating-actions/creating-a-javascript-action
const log = (msg: string) => {
core.info(`${new Date().toLocaleTimeString('fr-FR')} - ${msg}`);
};

async function run(): Promise<void> {
async function run() {
try {
const sparkVersion = core.getInput('spark-version');
const hadoopVersion = core.getInput('hadoop-version');
const scalaVersion = core.getInput('scala-version');
const py4jVersion = core.getInput('py4j-version');
const xmx = core.getInput('xmx');
const xms = core.getInput('xms');
const logLevel = core.getInput('log-level');
let sparkUrl = core.getInput('spark-url');

// Try to write to the parent folder of the workflow workspace
const workspaceFolder: string = process.env.GITHUB_WORKSPACE || '/home/runner/work';
let installFolder: any = workspaceFolder.split('/').slice(0, -1).join('/');
const workspaceFolder = process.env.GITHUB_WORKSPACE || '/home/runner/work';
let installFolder = workspaceFolder.split('/').slice(0, -1).join('/');
try {
fs.accessSync(installFolder, fs.constants.R_OK);
} catch (err) {
} catch (error) {
log(`Using $GITHUB_WORKSPACE to store Spark (${installFolder} not writable)`);
installFolder = workspaceFolder;
}
Expand All @@ -33,27 +36,27 @@ async function run(): Promise<void> {
if (cachedSpark) {
log(`Using Spark from cache ${cachedSpark}`);
sparkHome = cachedSpark;
} else if (!sparkUrl) {
} else if (sparkUrl) {
// URL provided directly by user
await download(sparkUrl, installFolder);
} else {
// If URL not provided directly, we try to download from official recommended https://spark.apache.org/downloads.html
sparkUrl = `https://dlcdn.apache.org/spark/spark-${sparkVersion}/spark-${sparkVersion}-bin-hadoop${hadoopVersion}${scalaBit}.tgz`;
try {
await download(sparkUrl, installFolder);
} catch (error: any) {
} catch (error) {
log(`Faster recommended download URL not available, downloading from Apache Archives.`);
sparkUrl = `https://archive.apache.org/dist/spark/spark-${sparkVersion}/spark-${sparkVersion}-bin-hadoop${hadoopVersion}${scalaBit}.tgz`;
await download(sparkUrl, installFolder);
}
} else {
// URL provided directly by user
await download(sparkUrl, installFolder);
}

if (!fs.existsSync(`${sparkHome}/bin/spark-submit`)) {
throw new Error(`The Spark binary was not properly downloaded from ${sparkUrl}`);
throw new Error(`The Spark binary downloaded from ${sparkUrl} could not be found in ${sparkHome}`);
}

log(`Binary downloaded, setting up environment variables`);
const SPARK_OPTS = `--driver-java-options=-Xms1024M --driver-java-options=-Xmx2048M --driver-java-options=-Dlog4j.logLevel=info`;
log(`Spark binary downloaded, setting up environment variables and cache`);
const SPARK_OPTS = `--driver-java-options=-Xms${xms} --driver-java-options=-Xmx${xmx} --driver-java-options=-Dlog4j.logLevel=${logLevel}`;
const PYTHONPATH = `${sparkHome}/python:${sparkHome}/python/lib/py4j-${py4jVersion}-src.zip`;
const PYSPARK_PYTHON = 'python';

Expand All @@ -66,18 +69,18 @@ async function run(): Promise<void> {
core.exportVariable('PYTHONPATH', PYTHONPATH);
core.exportVariable('SPARK_OPTS', SPARK_OPTS);

// Add Spark to path
// Add Spark to path and cache it
core.addPath(`${sparkHome}/bin`);

await tc.cacheDir(sparkHome, 'spark', sparkVersion);

core.setOutput('spark-version', sparkVersion);
} catch (error: any) {
} catch (error) {
log(
`Issue installing Spark: check if the Spark version and Hadoop versions you are using are part of the ones proposed on the Spark download page at https://spark.apache.org/downloads.html`
);
core.error(error);
core.setFailed(error.message);
const err = <Error>error;
core.error(err);
core.setFailed(err.message);
}
}

Expand Down

0 comments on commit e651200

Please sign in to comment.