Skip to content

Commit

Permalink
Standardize code style using Black
Browse files Browse the repository at this point in the history
This PR proposes to add [Black](https://github.com/psf/black) into Koalas.

- `dev/linter-python` detects if codes are formatted or not by Black
- `dev/reformat` script that reformats the codes by using black.
- It still keeps 100 line length style, and ignores `E231` and `E203` at pycodestyle. See also psf/black#429 and psf/black#1202. Black automatically formats and violates several rules.
- Update contribution guide.

Resolve #755

Co-authored-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
  • Loading branch information
rising-star92 and deepyaman committed Feb 22, 2020
1 parent 7dc2b63 commit fd99c98
Show file tree
Hide file tree
Showing 69 changed files with 9,988 additions and 7,185 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/master.yml
Expand Up @@ -52,6 +52,10 @@ jobs:
./dev/download_travis_dependencies.sh
sudo apt-get install xclip
pip install setuptools
# Currently PIP with Python 3.5 removes Black in the requirements-dev.txt file
# as Black only works with Python 3.6+. This is hacky but we will drop
# Python 3.5 soon so it's fine.
sed -i '/black/d' requirements-dev.txt
pip install -r requirements-dev.txt
pip install pandas==$PANDAS_VERSION pyarrow==$PYARROW_VERSION
pip list
Expand Down
4 changes: 4 additions & 0 deletions .travis.yml
Expand Up @@ -67,8 +67,12 @@ install:

# Test PyPI installation at Python 3.5. This is also because
# one of the dependency requires Python 3.6 Conda specifically.
# Currently PIP with Python 3.5 removes Black in the requirements-dev.txt file
# as Black only works with Python 3.6+. This is hacky but we will drop
# Python 3.5 soon so it's fine.
- |
if [[ $TRAVIS_PYTHON_VERSION == "3.5" ]]; then
sed -i '/black/d' requirements-dev.txt && \
pip install -r requirements-dev.txt && \
pip install pandas==$PANDAS_VERSION pyarrow==$PYARROW_VERSION && \
pip list;
Expand Down
2 changes: 1 addition & 1 deletion databricks/__init__.py
Expand Up @@ -15,4 +15,4 @@
#

# https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages
__path__ = __import__('pkgutil').extend_path(__path__, __name__) # type: ignore
__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore
20 changes: 9 additions & 11 deletions databricks/conftest.py
Expand Up @@ -32,9 +32,7 @@
from databricks.koalas import utils


shared_conf = {
"spark.sql.shuffle.partitions": "4"
}
shared_conf = {"spark.sql.shuffle.partitions": "4"}
# Initialize Spark session that should be used in doctests or unittests.
# Delta requires Spark 2.4.2+. See
# https://github.com/delta-io/delta#compatibility-with-apache-spark-versions.
Expand All @@ -48,7 +46,7 @@
session = utils.default_session(shared_conf)


@pytest.fixture(scope='session', autouse=True)
@pytest.fixture(scope="session", autouse=True)
def session_termination():
yield
# Share one session across all the tests. Repeating starting and stopping sessions and contexts
Expand All @@ -58,46 +56,46 @@ def session_termination():

@pytest.fixture(autouse=True)
def add_ks(doctest_namespace):
doctest_namespace['ks'] = koalas
doctest_namespace["ks"] = koalas


@pytest.fixture(autouse=True)
def add_pd(doctest_namespace):
if os.getenv("PANDAS_VERSION", None) is not None:
assert pd.__version__ == os.getenv("PANDAS_VERSION")
doctest_namespace['pd'] = pd
doctest_namespace["pd"] = pd


@pytest.fixture(autouse=True)
def add_pa(doctest_namespace):
if os.getenv("PYARROW_VERSION", None) is not None:
assert pa.__version__ == os.getenv("PYARROW_VERSION")
doctest_namespace['pa'] = pa
doctest_namespace["pa"] = pa


@pytest.fixture(autouse=True)
def add_np(doctest_namespace):
doctest_namespace['np'] = numpy
doctest_namespace["np"] = numpy


@pytest.fixture(autouse=True)
def add_path(doctest_namespace):
path = tempfile.mkdtemp()
atexit.register(lambda: shutil.rmtree(path, ignore_errors=True))
doctest_namespace['path'] = path
doctest_namespace["path"] = path


@pytest.fixture(autouse=True)
def add_db(doctest_namespace):
db_name = "db%s" % str(uuid.uuid4()).replace("-", "")
session.sql("CREATE DATABASE %s" % db_name)
atexit.register(lambda: session.sql("DROP DATABASE IF EXISTS %s CASCADE" % db_name))
doctest_namespace['db'] = db_name
doctest_namespace["db"] = db_name


@pytest.fixture(autouse=os.getenv("KOALAS_USAGE_LOGGER", None) is not None)
def add_caplog(caplog):
with caplog.at_level(logging.INFO, logger='databricks.koalas.usage_logger'):
with caplog.at_level(logging.INFO, logger="databricks.koalas.usage_logger"):
yield


Expand Down
71 changes: 53 additions & 18 deletions databricks/koalas/__init__.py
Expand Up @@ -21,27 +21,33 @@

def assert_pyspark_version():
import logging

pyspark_ver = None
try:
import pyspark
except ImportError:
raise ImportError('Unable to import pyspark - consider doing a pip install with [spark] '
'extra to install pyspark with pip')
raise ImportError(
"Unable to import pyspark - consider doing a pip install with [spark] "
"extra to install pyspark with pip"
)
else:
pyspark_ver = getattr(pyspark, '__version__')
if pyspark_ver is None or pyspark_ver < '2.4':
pyspark_ver = getattr(pyspark, "__version__")
if pyspark_ver is None or pyspark_ver < "2.4":
logging.warning(
'Found pyspark version "{}" installed. pyspark>=2.4.0 is recommended.'
.format(pyspark_ver if pyspark_ver is not None else '<unknown version>'))
'Found pyspark version "{}" installed. pyspark>=2.4.0 is recommended.'.format(
pyspark_ver if pyspark_ver is not None else "<unknown version>"
)
)


assert_pyspark_version()

import pyspark
import pyarrow

if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15") and \
LooseVersion(pyspark.__version__) < LooseVersion("3.0"):
if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15") and LooseVersion(
pyspark.__version__
) < LooseVersion("3.0"):
# This is required to support PyArrow 0.15 in PySpark versions lower than 3.0.
# See SPARK-29367.
os.environ["ARROW_PRE_0_15_IPC_FORMAT"] = "1"
Expand All @@ -53,10 +59,31 @@ def assert_pyspark_version():
from databricks.koalas.config import get_option, set_option, reset_option, options
from databricks.koalas.groupby import NamedAgg

__all__ = ['read_csv', 'read_parquet', 'to_datetime', 'from_pandas',
'get_dummies', 'DataFrame', 'Series', 'Index', 'MultiIndex', 'pandas_wraps',
'sql', 'range', 'concat', 'melt', 'get_option', 'set_option', 'reset_option',
'read_sql_table', 'read_sql_query', 'read_sql', 'options', 'option_context', 'NamedAgg']
__all__ = [
"read_csv",
"read_parquet",
"to_datetime",
"from_pandas",
"get_dummies",
"DataFrame",
"Series",
"Index",
"MultiIndex",
"pandas_wraps",
"sql",
"range",
"concat",
"melt",
"get_option",
"set_option",
"reset_option",
"read_sql_table",
"read_sql_query",
"read_sql",
"options",
"option_context",
"NamedAgg",
]


def _auto_patch():
Expand All @@ -68,21 +95,29 @@ def _auto_patch():
if logger_module is not None:
try:
from databricks.koalas import usage_logging

usage_logging.attach(logger_module)
except Exception as e:
from pyspark.util import _exception_message
logger = logging.getLogger('databricks.koalas.usage_logger')
logger.warning('Tried to attach usage logger `{}`, but an exception was raised: {}'
.format(logger_module, _exception_message(e)))

logger = logging.getLogger("databricks.koalas.usage_logger")
logger.warning(
"Tried to attach usage logger `{}`, but an exception was raised: {}".format(
logger_module, _exception_message(e)
)
)

# Autopatching is on by default.
x = os.getenv("SPARK_KOALAS_AUTOPATCH", "true")
if x.lower() in ("true", "1", "enabled"):
logger = logging.getLogger('spark')
logger.info("Patching spark automatically. You can disable it by setting "
"SPARK_KOALAS_AUTOPATCH=false in your environment")
logger = logging.getLogger("spark")
logger.info(
"Patching spark automatically. You can disable it by setting "
"SPARK_KOALAS_AUTOPATCH=false in your environment"
)

from pyspark.sql import dataframe as df

df.DataFrame.to_koalas = DataFrame.to_koalas


Expand Down

0 comments on commit fd99c98

Please sign in to comment.