forked from apache/airflow
-
Notifications
You must be signed in to change notification settings - Fork 15
/
utils.py
444 lines (344 loc) · 15.6 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import json
import textwrap
import time
from urllib.parse import urlencode
import markdown
import sqlalchemy as sqla
from flask import Markup, Response, request, url_for
from flask_appbuilder.forms import FieldConverter
from flask_appbuilder.models.sqla import filters as fab_sqlafilters
from flask_appbuilder.models.sqla.interface import SQLAInterface
from pygments import highlight, lexers
from pygments.formatters import HtmlFormatter # noqa pylint: disable=no-name-in-module
from airflow.utils import timezone
from airflow.utils.code_utils import get_python_source
from airflow.utils.json import AirflowJsonEncoder
from airflow.utils.state import State
from airflow.www.forms import DateTimeWithTimezoneField
from airflow.www.widgets import AirflowDateTimePickerWidget
def get_sensitive_variables_fields(): # noqa: D103
import warnings
from airflow.utils.log.secrets_masker import get_sensitive_variables_fields
warnings.warn(
"This function is deprecated. Please use "
"`airflow.utils.log.secrets_masker.get_sensitive_variables_fields`",
DeprecationWarning,
stacklevel=2,
)
return get_sensitive_variables_fields()
def should_hide_value_for_key(key_name): # noqa: D103
import warnings
from airflow.utils.log.secrets_masker import should_hide_value_for_key
warnings.warn(
"This function is deprecated. Please use "
"`airflow.utils.log.secrets_masker.should_hide_value_for_key`",
DeprecationWarning,
stacklevel=2,
)
return should_hide_value_for_key(key_name)
def get_params(**kwargs):
"""Return URL-encoded params"""
return urlencode({d: v for d, v in kwargs.items() if v is not None})
def generate_pages(current_page, num_of_pages, search=None, status=None, window=7):
"""
Generates the HTML for a paging component using a similar logic to the paging
auto-generated by Flask managed views. The paging component defines a number of
pages visible in the pager (window) and once the user goes to a page beyond the
largest visible, it would scroll to the right the page numbers and keeps the
current one in the middle of the pager component. When in the last pages,
the pages won't scroll and just keep moving until the last page. Pager also contains
<first, previous, ..., next, last> pages.
This component takes into account custom parameters such as search and status,
which could be added to the pages link in order to maintain the state between
client and server. It also allows to make a bookmark on a specific paging state.
:param current_page: the current page number, 0-indexed
:param num_of_pages: the total number of pages
:param search: the search query string, if any
:param status: 'all', 'active', or 'paused'
:param window: the number of pages to be shown in the paging component (7 default)
:return: the HTML string of the paging component
"""
void_link = 'javascript:void(0)'
first_node = Markup(
"""<li class="paginate_button {disabled}" id="dags_first">
<a href="{href_link}" aria-controls="dags" data-dt-idx="0" tabindex="0">«</a>
</li>"""
)
previous_node = Markup(
"""<li class="paginate_button previous {disabled}" id="dags_previous">
<a href="{href_link}" aria-controls="dags" data-dt-idx="0" tabindex="0">‹</a>
</li>"""
)
next_node = Markup(
"""<li class="paginate_button next {disabled}" id="dags_next">
<a href="{href_link}" aria-controls="dags" data-dt-idx="3" tabindex="0">›</a>
</li>"""
)
last_node = Markup(
"""<li class="paginate_button {disabled}" id="dags_last">
<a href="{href_link}" aria-controls="dags" data-dt-idx="3" tabindex="0">»</a>
</li>"""
)
page_node = Markup(
"""<li class="paginate_button {is_active}">
<a href="{href_link}" aria-controls="dags" data-dt-idx="2" tabindex="0">{page_num}</a>
</li>"""
)
output = [Markup('<ul class="pagination" style="margin-top:0;">')]
is_disabled = 'disabled' if current_page <= 0 else ''
output.append(
first_node.format(
href_link="?{}".format(get_params(page=0, search=search, status=status)), # noqa
disabled=is_disabled,
)
)
page_link = void_link
if current_page > 0:
page_link = '?{}'.format(get_params(page=(current_page - 1), search=search, status=status))
output.append(previous_node.format(href_link=page_link, disabled=is_disabled)) # noqa
mid = int(window / 2)
last_page = num_of_pages - 1
if current_page <= mid or num_of_pages < window:
pages = list(range(0, min(num_of_pages, window)))
elif mid < current_page < last_page - mid:
pages = list(range(current_page - mid, current_page + mid + 1))
else:
pages = list(range(num_of_pages - window, last_page + 1))
def is_current(current, page): # noqa
return page == current
for page in pages:
vals = {
'is_active': 'active' if is_current(current_page, page) else '',
'href_link': void_link
if is_current(current_page, page)
else '?{}'.format(get_params(page=page, search=search, status=status)),
'page_num': page + 1,
}
output.append(page_node.format(**vals)) # noqa
is_disabled = 'disabled' if current_page >= num_of_pages - 1 else ''
page_link = (
void_link
if current_page >= num_of_pages - 1
else '?{}'.format(get_params(page=current_page + 1, search=search, status=status))
)
output.append(next_node.format(href_link=page_link, disabled=is_disabled)) # noqa
output.append(
last_node.format(
href_link="?{}".format(get_params(page=last_page, search=search, status=status)), # noqa
disabled=is_disabled,
)
)
output.append(Markup('</ul>'))
return Markup('\n'.join(output))
def epoch(dttm):
"""Returns an epoch-type date (tuple with no timezone)"""
return (int(time.mktime(dttm.timetuple())) * 1000,)
def json_response(obj):
"""Returns a json response from a json serializable python object"""
return Response(
response=json.dumps(obj, indent=4, cls=AirflowJsonEncoder), status=200, mimetype="application/json"
)
def make_cache_key(*args, **kwargs):
"""Used by cache to get a unique key per URL"""
path = request.path
args = str(hash(frozenset(request.args.items())))
return (path + args).encode('ascii', 'ignore')
def task_instance_link(attr):
"""Generates a URL to the Graph View for a TaskInstance."""
dag_id = attr.get('dag_id')
task_id = attr.get('task_id')
execution_date = attr.get('execution_date')
url = url_for('Airflow.task', dag_id=dag_id, task_id=task_id, execution_date=execution_date.isoformat())
url_root = url_for(
'Airflow.graph', dag_id=dag_id, root=task_id, execution_date=execution_date.isoformat()
)
return Markup( # noqa
"""
<span style="white-space: nowrap;">
<a href="{url}">{task_id}</a>
<a href="{url_root}" title="Filter on this task and upstream">
<span class="material-icons" style="margin-left:0;"
aria-hidden="true">filter_alt</span>
</a>
</span>
"""
).format(url=url, task_id=task_id, url_root=url_root)
def state_token(state):
"""Returns a formatted string with HTML for a given State"""
color = State.color(state)
fg_color = State.color_fg(state)
return Markup( # noqa
"""
<span class="label" style="color:{fg_color}; background-color:{color};"
title="Current State: {state}">{state}</span>
"""
).format(color=color, state=state, fg_color=fg_color)
def state_f(attr):
"""Gets 'state' & returns a formatted string with HTML for a given State"""
state = attr.get('state')
return state_token(state)
def nobr_f(attr_name):
"""Returns a formatted string with HTML with a Non-breaking Text element"""
def nobr(attr):
f = attr.get(attr_name)
return Markup("<nobr>{}</nobr>").format(f) # noqa
return nobr
def datetime_f(attr_name):
"""Returns a formatted string with HTML for given DataTime"""
def dt(attr): # pylint: disable=invalid-name
f = attr.get(attr_name)
as_iso = f.isoformat() if f else ''
if not as_iso:
return Markup('')
f = as_iso
if timezone.utcnow().isoformat()[:4] == f[:4]:
f = f[5:]
# The empty title will be replaced in JS code when non-UTC dates are displayed
return Markup('<nobr><time title="" datetime="{}">{}</time></nobr>').format(as_iso, f) # noqa
return dt
# pylint: enable=invalid-name
def json_f(attr_name):
"""Returns a formatted string with HTML for given JSON serializable"""
def json_(attr):
f = attr.get(attr_name)
serialized = json.dumps(f)
return Markup('<nobr>{}</nobr>').format(serialized) # noqa
return json_
def dag_link(attr):
"""Generates a URL to the Graph View for a Dag."""
dag_id = attr.get('dag_id')
execution_date = attr.get('execution_date')
url = url_for('Airflow.graph', dag_id=dag_id, execution_date=execution_date)
return Markup('<a href="{}">{}</a>').format(url, dag_id) if dag_id else Markup('None') # noqa
def dag_run_link(attr):
"""Generates a URL to the Graph View for a DagRun."""
dag_id = attr.get('dag_id')
run_id = attr.get('run_id')
execution_date = attr.get('execution_date')
url = url_for('Airflow.graph', dag_id=dag_id, run_id=run_id, execution_date=execution_date)
return Markup('<a href="{url}">{run_id}</a>').format(url=url, run_id=run_id) # noqa
def pygment_html_render(s, lexer=lexers.TextLexer): # noqa pylint: disable=no-member
"""Highlight text using a given Lexer"""
return highlight(s, lexer(), HtmlFormatter(linenos=True))
def render(obj, lexer):
"""Render a given Python object with a given Pygments lexer"""
out = ""
if isinstance(obj, str):
out = Markup(pygment_html_render(obj, lexer))
elif isinstance(obj, (tuple, list)):
for i, text_to_render in enumerate(obj):
out += Markup("<div>List item #{}</div>").format(i) # noqa
out += Markup("<div>" + pygment_html_render(text_to_render, lexer) + "</div>")
elif isinstance(obj, dict):
for k, v in obj.items():
out += Markup('<div>Dict item "{}"</div>').format(k) # noqa
out += Markup("<div>" + pygment_html_render(v, lexer) + "</div>")
return out
def wrapped_markdown(s, css_class='rich_doc'):
"""Convert a Markdown string to HTML."""
if s is None:
return None
s = textwrap.dedent(s)
return Markup(f'<div class="{css_class}" >' + markdown.markdown(s, extensions=['tables']) + "</div>")
# pylint: disable=no-member
def get_attr_renderer():
"""Return Dictionary containing different Pygments Lexers for Rendering & Highlighting"""
return {
'bash': lambda x: render(x, lexers.BashLexer),
'bash_command': lambda x: render(x, lexers.BashLexer),
'hql': lambda x: render(x, lexers.SqlLexer),
'html': lambda x: render(x, lexers.HtmlLexer),
'sql': lambda x: render(x, lexers.SqlLexer),
'doc': lambda x: render(x, lexers.TextLexer),
'doc_json': lambda x: render(x, lexers.JsonLexer),
'doc_rst': lambda x: render(x, lexers.RstLexer),
'doc_yaml': lambda x: render(x, lexers.YamlLexer),
'doc_md': wrapped_markdown,
'json': lambda x: render(x, lexers.JsonLexer),
'md': wrapped_markdown,
'py': lambda x: render(get_python_source(x), lexers.PythonLexer),
'python_callable': lambda x: render(get_python_source(x), lexers.PythonLexer),
'rst': lambda x: render(x, lexers.RstLexer),
'yaml': lambda x: render(x, lexers.YamlLexer),
}
# pylint: enable=no-member
def get_chart_height(dag):
"""
We use the number of tasks in the DAG as a heuristic to
approximate the size of generated chart (otherwise the charts are tiny and unreadable
when DAGs have a large number of tasks). Ideally nvd3 should allow for dynamic-height
charts, that is charts that take up space based on the size of the components within.
TODO(aoen): See [AIRFLOW-1263]
"""
return 600 + len(dag.tasks) * 10
class UtcAwareFilterMixin: # noqa: D101
"""Mixin for filter for UTC time."""
def apply(self, query, value):
"""Apply the filter."""
value = timezone.parse(value, timezone=timezone.utc)
return super().apply(query, value) # noqa
class UtcAwareFilterEqual(UtcAwareFilterMixin, fab_sqlafilters.FilterEqual): # noqa: D101
"""Equality filter for UTC time."""
class UtcAwareFilterGreater(UtcAwareFilterMixin, fab_sqlafilters.FilterGreater): # noqa: D101
"""Greater Than filter for UTC time."""
class UtcAwareFilterSmaller(UtcAwareFilterMixin, fab_sqlafilters.FilterSmaller): # noqa: D101
"""Smaller Than filter for UTC time."""
class UtcAwareFilterNotEqual(UtcAwareFilterMixin, fab_sqlafilters.FilterNotEqual): # noqa: D101
"""Not Equal To filter for UTC time."""
class UtcAwareFilterConverter(fab_sqlafilters.SQLAFilterConverter): # noqa: D101
"""Retrieve conversion tables for UTC-Aware filters."""
conversion_table = (
(
'is_utcdatetime',
[UtcAwareFilterEqual, UtcAwareFilterGreater, UtcAwareFilterSmaller, UtcAwareFilterNotEqual],
),
) + fab_sqlafilters.SQLAFilterConverter.conversion_table
class CustomSQLAInterface(SQLAInterface):
"""
FAB does not know how to handle columns with leading underscores because
they are not supported by WTForm. This hack will remove the leading
'_' from the key to lookup the column names.
"""
def __init__(self, obj, session=None):
super().__init__(obj, session=session)
def clean_column_names():
if self.list_properties:
self.list_properties = {k.lstrip('_'): v for k, v in self.list_properties.items()}
if self.list_columns:
self.list_columns = {k.lstrip('_'): v for k, v in self.list_columns.items()}
clean_column_names()
def is_utcdatetime(self, col_name):
"""Check if the datetime is a UTC one."""
from airflow.utils.sqlalchemy import UtcDateTime
if col_name in self.list_columns:
obj = self.list_columns[col_name].type
return (
isinstance(obj, UtcDateTime)
or isinstance(obj, sqla.types.TypeDecorator)
and isinstance(obj.impl, UtcDateTime)
)
return False
filter_converter_class = UtcAwareFilterConverter
# This class is used directly (i.e. we cant tell Fab to use a different
# subclass) so we have no other option than to edit the conversion table in
# place
FieldConverter.conversion_table = (
('is_utcdatetime', DateTimeWithTimezoneField, AirflowDateTimePickerWidget),
) + FieldConverter.conversion_table