Skip to content

Commit

Permalink
Add Python 3.9 support to Hive
Browse files Browse the repository at this point in the history
Hive support for Python 3.9 has been removed in apache#15515 but cloudera
released new ssl 0.3.1 library version to support it and we should
be able to get Hive provider working for Python 3.9 too.

Fixes: apache#21891
  • Loading branch information
potiuk committed Mar 1, 2022
1 parent 1691968 commit 4e2c954
Show file tree
Hide file tree
Showing 7 changed files with 6 additions and 67 deletions.
3 changes: 0 additions & 3 deletions airflow/providers/apache/hive/provider.yaml
Expand Up @@ -36,9 +36,6 @@ versions:
additional-dependencies:
- apache-airflow>=2.1.0

excluded-python-versions:
- "3.9"

integrations:
- integration-name: Apache Hive
external-doc-url: https://hive.apache.org/
Expand Down
6 changes: 5 additions & 1 deletion setup.py
Expand Up @@ -380,7 +380,11 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
]
hive = [
'hmsclient>=0.1.0',
'pyhive[hive]>=0.6.0;python_version<"3.9"',
'pyhive[hive]>=0.6.0',
# in case of Python 3.9 sasl library needs to be installed with version higher or equal than
# 0.3.1 because only that version supports Python 3.9. For other Python version pyhive[hive] pulls
# the sasl library anyway (and there sasl library version is not relevant)
'sasl>=0.3.1; python_version>="3.9"',
'thrift>=0.9.2',
pandas_requirement,
]
Expand Down
31 changes: 0 additions & 31 deletions tests/providers/apache/hive/hooks/test_hive.py
Expand Up @@ -28,7 +28,6 @@
import pytest
from hmsclient import HMSClient

from airflow import PY39
from airflow.exceptions import AirflowException
from airflow.models.connection import Connection
from airflow.models.dag import DAG
Expand All @@ -55,12 +54,6 @@ def __init__(self):
self.iterable = []


@pytest.mark.skipif(
PY39,
reason="Hive does not run on Python 3.9 because it brings SASL via thrift-sasl."
" This could be removed when https://github.com/dropbox/PyHive/issues/380"
" is solved",
)
class TestHiveEnvironment(unittest.TestCase):
def setUp(self):
self.next_day = (DEFAULT_DATE + datetime.timedelta(days=1)).isoformat()[:10]
Expand All @@ -77,12 +70,6 @@ def setUp(self):
self.hook = HiveMetastoreHook()


@pytest.mark.skipif(
PY39,
reason="Hive does not run on Python 3.9 because it brings SASL via thrift-sasl."
" This could be removed when https://github.com/dropbox/PyHive/issues/380"
" is solved",
)
class TestHiveCliHook(unittest.TestCase):
@mock.patch('tempfile.tempdir', '/tmp/')
@mock.patch('tempfile._RandomNameSequence.__next__')
Expand Down Expand Up @@ -347,12 +334,6 @@ def test_load_df_with_data_types(self, mock_run_cli):
assert_equal_ignore_multiple_spaces(self, mock_run_cli.call_args_list[0][0][0], query)


@pytest.mark.skipif(
PY39,
reason="Hive does not run on Python 3.9 because it brings SASL via thrift-sasl."
" This could be removed when https://github.com/dropbox/PyHive/issues/380"
" is solved",
)
class TestHiveMetastoreHook(TestHiveEnvironment):
VALID_FILTER_MAP = {'key2': 'value2'}

Expand Down Expand Up @@ -596,12 +577,6 @@ def test_drop_partition(self, get_metastore_client_mock, table_exist_mock):
assert metastore_mock.drop_partition(self.table, db=self.database, part_vals=[DEFAULT_DATE_DS]), ret


@pytest.mark.skipif(
PY39,
reason="Hive does not run on Python 3.9 because it brings SASL via thrift-sasl."
" This could be removed when https://github.com/dropbox/PyHive/issues/380"
" is solved",
)
class TestHiveServer2Hook(unittest.TestCase):
def _upload_dataframe(self):
df = pd.DataFrame({'a': [1, 2], 'b': [1, 2]})
Expand Down Expand Up @@ -857,12 +832,6 @@ def test_get_results_with_hive_conf(self):
assert 'test_dag_run_id' in output


@pytest.mark.skipif(
PY39,
reason="Hive does not run on Python 3.9 because it brings SASL via thrift-sasl."
" This could be removed when https://github.com/dropbox/PyHive/issues/380"
" is solved",
)
class TestHiveCli(unittest.TestCase):
def setUp(self):
self.nondefault_schema = "nondefault"
Expand Down
9 changes: 0 additions & 9 deletions tests/providers/apache/hive/transfers/test_hive_to_mysql.py
Expand Up @@ -20,9 +20,6 @@
import unittest
from unittest.mock import MagicMock, patch

import pytest

from airflow import PY39
from airflow.providers.apache.hive.transfers.hive_to_mysql import HiveToMySqlOperator
from airflow.utils import timezone
from airflow.utils.operator_helpers import context_to_airflow_vars
Expand All @@ -31,12 +28,6 @@
DEFAULT_DATE = timezone.datetime(2015, 1, 1)


@pytest.mark.skipif(
PY39,
reason="Hive does not run on Python 3.9 because it brings SASL via thrift-sasl."
" This could be removed when https://github.com/dropbox/PyHive/issues/380"
" is solved",
)
class TestHiveToMySqlTransfer(TestHiveEnvironment):
def setUp(self):
self.kwargs = dict(
Expand Down
9 changes: 0 additions & 9 deletions tests/providers/apache/hive/transfers/test_hive_to_samba.py
Expand Up @@ -19,9 +19,6 @@
import unittest
from unittest.mock import MagicMock, Mock, PropertyMock, patch

import pytest

from airflow import PY39
from airflow.providers.apache.hive.transfers.hive_to_samba import HiveToSambaOperator
from airflow.providers.samba.hooks.samba import SambaHook
from airflow.utils.operator_helpers import context_to_airflow_vars
Expand All @@ -44,12 +41,6 @@ def get_connection(self, *args):
return self.conn


@pytest.mark.skipif(
PY39,
reason="Hive does not run on Python 3.9 because it brings SASL via thrift-sasl."
" This could be removed when https://github.com/dropbox/PyHive/issues/380"
" is solved",
)
class TestHive2SambaOperator(TestHiveEnvironment):
def setUp(self):
self.kwargs = dict(
Expand Down
8 changes: 1 addition & 7 deletions tests/providers/apache/hive/transfers/test_mssql_to_hive.py
Expand Up @@ -23,7 +23,7 @@

import pytest

from airflow import PY38, PY39
from airflow import PY38

if PY38:
MsSqlToHiveTransferOperator: None = None
Expand All @@ -36,12 +36,6 @@
pymssql = None


@pytest.mark.skipif(
PY39,
reason="Hive does not run on Python 3.9 because it brings SASL via thrift-sasl."
" This could be removed when https://github.com/dropbox/PyHive/issues/380"
" is solved",
)
@pytest.mark.skipif(PY38, reason="Mssql package not available when Python >= 3.8.")
@pytest.mark.skipif(pymssql is None, reason='pymssql package not present')
class TestMsSqlToHiveTransfer(unittest.TestCase):
Expand Down
7 changes: 0 additions & 7 deletions tests/providers/apache/hive/transfers/test_mysql_to_hive.py
Expand Up @@ -23,7 +23,6 @@

import pytest

from airflow import PY39
from airflow.providers.apache.hive.hooks.hive import HiveCliHook
from airflow.providers.apache.hive.transfers.mysql_to_hive import MySqlToHiveOperator
from airflow.providers.mysql.hooks.mysql import MySqlHook
Expand All @@ -34,12 +33,6 @@
DEFAULT_DATE_DS = DEFAULT_DATE_ISO[:10]


@pytest.mark.skipif(
PY39,
reason="Hive does not run on Python 3.9 because it brings SASL via thrift-sasl."
" This could be removed when https://github.com/dropbox/PyHive/issues/380"
" is solved",
)
@pytest.mark.backend("mysql")
class TestTransfer:
env_vars = {
Expand Down

0 comments on commit 4e2c954

Please sign in to comment.