- Run the installer.sh file
sh installer.sh
- Now extract the spark tar file
tar -xvf spark-3.3.0-bin-hadoop3.tgz
- Move the spark directory to /usr/bin/
sudo mv spark-3.3.0-bin-hadoop3 /usr/bin/
- Setup environment variables for spark in .bashrc file
vi ~/.bashrc
- Type the following commands in the .bashrc file
export SPARK_HOME=/usr/bin/spark-3.3.0-bin-hadoop3
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
- Use this command to apply the changes in .bashrc file
source ~/.bashrc
- Use the following command to check which python version being used by pyspark
pyspark
- If the version is not 3.8, open the .bashrc file with an editor
vi ~/.bashrc
- Type in the following command to setup environment variable for python used by pyspark
export PYSPARK_PYTHON=/usr/bin/python3.8
export PYSPARK_DRIVER_PYTHON=/usr/bin/python3.8
- Use the following command to apply the changes in .bashrc file
source ~/.bashrc
- Use the following command to start the airflow server
airflow standalone
- Copy the password that is generated in the terminal
- Go to the browser and type the following url
YourAirflowHostName:8080
- Login to Airflow by entering the Username and Password which was generated in the terminal