From 0ff449c7565fac1954c3d651f2e282936802bc77 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Sat, 24 Sep 2022 07:32:25 +0100 Subject: [PATCH 1/4] Add a note against use of top level code in timetable Accessing Variables, Connections at top level of code is bad practice --- airflow/serialization/serialized_objects.py | 6 +++++- docs/apache-airflow/concepts/timetable.rst | 6 ++++++ tests/serialization/test_dag_serialization.py | 8 ++++++-- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index 87fd56882a578..7dc61cbd67e92 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -165,7 +165,11 @@ def __init__(self, type_string: str) -> None: self.type_string = type_string def __str__(self) -> str: - return f"Timetable class {self.type_string!r} is not registered" + return ( + f"Timetable class {self.type_string!r} is not registered or " + "you have a top level database access that disrupted the session. " + "Please check the airflow best practices documentation." + ) def _encode_timetable(var: Timetable) -> dict[str, Any]: diff --git a/docs/apache-airflow/concepts/timetable.rst b/docs/apache-airflow/concepts/timetable.rst index 9e1a9e0bf3355..3a0443534ebcb 100644 --- a/docs/apache-airflow/concepts/timetable.rst +++ b/docs/apache-airflow/concepts/timetable.rst @@ -51,6 +51,12 @@ As such, Airflow allows for custom timetables to be written in plugins and used DAGs. An example demonstrating a custom timetable can be found in the :doc:`/howto/timetable` how-to guide. +.. note:: + + As a general rule, always access Variables, Connections etc or anything that would access + the database as late as possible in your code. See :ref:`best_practices/top_level_code` + for more best practices to follow. + Built-in Timetables ------------------- diff --git a/tests/serialization/test_dag_serialization.py b/tests/serialization/test_dag_serialization.py index 44a218290ea53..bb428d7190204 100644 --- a/tests/serialization/test_dag_serialization.py +++ b/tests/serialization/test_dag_serialization.py @@ -406,7 +406,9 @@ def test_dag_serialization_unregistered_custom_timetable(self): message = ( "Failed to serialize DAG 'simple_dag': Timetable class " "'tests.test_utils.timetables.CustomSerializationTimetable' " - "is not registered" + "is not registered or " + "you have a top level database access that disrupted the session. " + "Please check the airflow best practices documentation." ) assert str(ctx.value) == message @@ -712,7 +714,9 @@ def test_deserialization_timetable_unregistered(self): message = ( "Timetable class " "'tests.test_utils.timetables.CustomSerializationTimetable' " - "is not registered" + "is not registered or " + "you have a top level database access that disrupted the session. " + "Please check the airflow best practices documentation." ) assert str(ctx.value) == message From 0d21ab3c56ba2e38ead99c77bd20e42794663670 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 26 Sep 2022 07:32:28 +0100 Subject: [PATCH 2/4] Add a section in best practices ref --- docs/apache-airflow/best-practices.rst | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/apache-airflow/best-practices.rst b/docs/apache-airflow/best-practices.rst index 33f31654cc900..6c4a5ef73e782 100644 --- a/docs/apache-airflow/best-practices.rst +++ b/docs/apache-airflow/best-practices.rst @@ -216,6 +216,39 @@ or if you need to deserialize a json object from the variable : For security purpose, you're recommended to use the :ref:`Secrets Backend` for any variable that contains sensitive data. +Timetables +---------- +Avoid using Airflow Variables/Connections or accessing airflow database at the top level of your timetable code. +Database access should be delayed until the execution time of the DAG. This means that you should not have variables/connections retrieval +as argument to your timetable class initialization or have Variable/connection at the top level of your custom timetable module. + +Bad example: + +.. code-block: python + + from airflow.models.variable import Variable + from airflow.timetables.interval import CronDataIntervalTimetable + + + class CustomTimetable(CronDataIntervalTimetable): + def __init__(self, *args, something=Variable.get('something'), **kwargs): + self._something = something + super().__init__(*args, **kwargs) + +Good example: + +.. code-block: python + + from airflow.models.variable import Variable + from airflow.timetables.interval import CronDataIntervalTimetable + + + class CustomTimetable(CronDataIntervalTimetable): + def __init__(self, *args, something='something', **kwargs): + self._something = Variable.get(something) + super().__init__(*args, **kwargs) + + Triggering DAGs after changes ----------------------------- From 566b5ed278fac8c821a223e4195babcf95268c49 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Mon, 26 Sep 2022 14:37:02 +0800 Subject: [PATCH 3/4] Fix ReST syntax --- docs/apache-airflow/best-practices.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/apache-airflow/best-practices.rst b/docs/apache-airflow/best-practices.rst index 6c4a5ef73e782..e2dadf4e92e7e 100644 --- a/docs/apache-airflow/best-practices.rst +++ b/docs/apache-airflow/best-practices.rst @@ -224,7 +224,7 @@ as argument to your timetable class initialization or have Variable/connection a Bad example: -.. code-block: python +.. code-block:: python from airflow.models.variable import Variable from airflow.timetables.interval import CronDataIntervalTimetable @@ -237,7 +237,7 @@ Bad example: Good example: -.. code-block: python +.. code-block:: python from airflow.models.variable import Variable from airflow.timetables.interval import CronDataIntervalTimetable From 530f07627fcc3c442621b7ae8b8d1433d8744676 Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 26 Sep 2022 07:41:21 +0100 Subject: [PATCH 4/4] reference the timetable best practice directly --- docs/apache-airflow/best-practices.rst | 2 ++ docs/apache-airflow/concepts/timetable.rst | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/apache-airflow/best-practices.rst b/docs/apache-airflow/best-practices.rst index e2dadf4e92e7e..2beb777ecab9f 100644 --- a/docs/apache-airflow/best-practices.rst +++ b/docs/apache-airflow/best-practices.rst @@ -216,6 +216,8 @@ or if you need to deserialize a json object from the variable : For security purpose, you're recommended to use the :ref:`Secrets Backend` for any variable that contains sensitive data. +.. _best_practices/timetables: + Timetables ---------- Avoid using Airflow Variables/Connections or accessing airflow database at the top level of your timetable code. diff --git a/docs/apache-airflow/concepts/timetable.rst b/docs/apache-airflow/concepts/timetable.rst index 3a0443534ebcb..c76d63ea48bdc 100644 --- a/docs/apache-airflow/concepts/timetable.rst +++ b/docs/apache-airflow/concepts/timetable.rst @@ -54,7 +54,7 @@ DAGs. An example demonstrating a custom timetable can be found in the .. note:: As a general rule, always access Variables, Connections etc or anything that would access - the database as late as possible in your code. See :ref:`best_practices/top_level_code` + the database as late as possible in your code. See :ref:`best_practices/timetables` for more best practices to follow. Built-in Timetables