From dd94f3348f47ab051c66b1534aa393f45ca04952 Mon Sep 17 00:00:00 2001 From: Jed Cunningham <66968678+jedcunningham@users.noreply.github.com> Date: Wed, 19 Oct 2022 09:13:06 -0700 Subject: [PATCH] Simplify origin string cleaning (#27143) (cherry picked from commit 68cb2daa410a72bcfb548587747afc9c5b946d11) --- airflow/www/views.py | 20 +++++++------------- tests/www/views/test_views.py | 6 ++++++ tests/www/views/test_views_trigger_dag.py | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index 3d2be590c76fa..d8d6a24be0dec 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -30,7 +30,7 @@ from json import JSONDecodeError from operator import itemgetter from typing import Callable, Iterable, List, Optional, Set, Tuple, Union -from urllib.parse import parse_qsl, unquote, urlencode, urlparse +from urllib.parse import unquote, urljoin, urlsplit import lazy_object_proxy import nvd3 @@ -140,27 +140,21 @@ def truncate_task_duration(task_duration): def get_safe_url(url): """Given a user-supplied URL, ensure it points to our web server""" - valid_schemes = ['http', 'https', ''] - valid_netlocs = [request.host, ''] - if not url: return url_for('Airflow.index') - parsed = urlparse(url) - # If the url contains semicolon, redirect it to homepage to avoid # potential XSS. (Similar to https://github.com/python/cpython/pull/24297/files (bpo-42967)) if ';' in unquote(url): return url_for('Airflow.index') - query = parse_qsl(parsed.query, keep_blank_values=True) - - url = parsed._replace(query=urlencode(query)).geturl() - - if parsed.scheme in valid_schemes and parsed.netloc in valid_netlocs: - return url + host_url = urlsplit(request.host_url) + redirect_url = urlsplit(urljoin(request.host_url, url)) + if not (redirect_url.scheme in ("http", "https") and host_url.netloc == redirect_url.netloc): + return url_for('Airflow.index') - return url_for('Airflow.index') + # This will ensure we only redirect to the right scheme/netloc + return redirect_url.geturl() def get_date_time_num_runs_dag_runs_form_data(www_request, session, dag): diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py index eac1e5e0eda09..9aa39b04ad634 100644 --- a/tests/www/views/test_views.py +++ b/tests/www/views/test_views.py @@ -116,7 +116,13 @@ def test_task_start_date_filter(admin_client, url, content): "test_url, expected_url", [ ("", "/home"), + ("javascript:alert(1)", "/home"), + (" javascript:alert(1)", "http://localhost:8080/ javascript:alert(1)"), ("http://google.com", "/home"), + ("google.com", "http://localhost:8080/google.com"), + ("\\/google.com", "http://localhost:8080/\\/google.com"), + ("//google.com", "/home"), + ("\\/\\/google.com", "http://localhost:8080/\\/\\/google.com"), ("36539'%3balert(1)%2f%2f166", "/home"), ( "http://localhost:8080/trigger?dag_id=test&origin=36539%27%3balert(1)%2f%2f166&abc=2", diff --git a/tests/www/views/test_views_trigger_dag.py b/tests/www/views/test_views_trigger_dag.py index 8647872f4e134..6179d1934ca57 100644 --- a/tests/www/views/test_views_trigger_dag.py +++ b/tests/www/views/test_views_trigger_dag.py @@ -103,14 +103,14 @@ def test_trigger_dag_form(admin_client): ("36539'%3balert(1)%2f%2f166", "/home"), ( '">