-
Notifications
You must be signed in to change notification settings - Fork 28k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-39881][PYTHON] Fix erroneous check for black and reenable black validation. #37305
Changes from 2 commits
d29d43a
a62ac64
a626a13
8691d39
5a89f6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -968,7 +968,7 @@ class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol): | |
|
||
def __init__(self, *args: Any): | ||
super(_CountVectorizerParams, self).__init__(*args) | ||
self._setDefault(minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False) | ||
self._setDefault(minTF=1.0, minDF=1.0, maxDF=2**63 - 1, vocabSize=1 << 18, binary=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW, I think this is false positive from Black presumably (per https://peps.python.org/pep-0008/#other-recommendations). I believe we use one space around these operators. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't this fall into this rule?
The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this style change because we are upgrading There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Apparently, yes. I just tested. In the old version it would not touch this change, the new version does. Playing with it actually shows that the behaviour is specifc to the power operator. I'll check if I find some more details in the black releases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Found the doc that mentions about this:
And the commit. The style change is from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ueshin Please see - psf/black#538 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like |
||
|
||
@since("1.6.0") | ||
def getMinTF(self) -> float: | ||
|
@@ -1077,7 +1077,7 @@ def __init__( | |
*, | ||
minTF: float = 1.0, | ||
minDF: float = 1.0, | ||
maxDF: float = 2 ** 63 - 1, | ||
maxDF: float = 2**63 - 1, | ||
vocabSize: int = 1 << 18, | ||
binary: bool = False, | ||
inputCol: Optional[str] = None, | ||
|
@@ -1099,7 +1099,7 @@ def setParams( | |
*, | ||
minTF: float = 1.0, | ||
minDF: float = 1.0, | ||
maxDF: float = 2 ** 63 - 1, | ||
maxDF: float = 2**63 - 1, | ||
vocabSize: int = 1 << 18, | ||
binary: bool = False, | ||
inputCol: Optional[str] = None, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -155,11 +155,11 @@ def test_pow(self): | |
|
||
b_pser, b_psser = pdf["bool"], psdf["bool"] | ||
# float is always returned in pandas-on-Spark | ||
self.assert_eq((b_pser ** 1).astype("float"), b_psser ** 1) | ||
self.assert_eq(b_pser ** 0.1, b_psser ** 0.1) | ||
self.assert_eq((b_pser**1).astype("float"), b_psser**1) | ||
self.assert_eq(b_pser**0.1, b_psser**0.1) | ||
self.assert_eq(b_pser ** b_pser.astype(float), b_psser ** b_psser.astype(float)) | ||
self.assertRaises(TypeError, lambda: b_psser ** b_psser) | ||
self.assertRaises(TypeError, lambda: b_psser ** True) | ||
self.assertRaises(TypeError, lambda: b_psser**b_psser) | ||
self.assertRaises(TypeError, lambda: b_psser**True) | ||
|
||
self.assert_eq(b_pser % pdf["float"], b_psser % psdf["float"]) | ||
for col in self.non_numeric_df_cols: | ||
|
@@ -226,10 +226,10 @@ def test_rpow(self): | |
|
||
b_pser, b_psser = pdf["bool"], psdf["bool"] | ||
# float is returned always in pandas-on-Spark | ||
self.assert_eq((1 ** b_pser).astype(float), 1 ** b_psser) | ||
self.assert_eq(0.1 ** b_pser, 0.1 ** b_psser) | ||
self.assert_eq((1**b_pser).astype(float), 1**b_psser) | ||
self.assert_eq(0.1**b_pser, 0.1**b_psser) | ||
self.assertRaises(TypeError, lambda: "x" ** b_psser) | ||
self.assertRaises(TypeError, lambda: True ** b_psser) | ||
self.assertRaises(TypeError, lambda: True**b_psser) | ||
self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) ** b_psser) | ||
self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) ** b_psser) | ||
Comment on lines
233
to
234
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure why Black doesn't reformat this, too... Shouldn't it be reformatted like: - self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) ** b_psser)
- self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) ** b_psser)
+ self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1)**b_psser)
+ self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1)**b_psser) ??? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm no black expert, but from the discussion I had with @HyukjinKwon above, it looks like it's a operator precedence behavior thing.
in
The binary operator There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I understand about the priority but just wonder why Black did: - self.assertRaises(TypeError, lambda: True ** b_psser)
+ self.assertRaises(TypeError, lambda: True**b_psser) but didn't - self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) ** b_psser)
- self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) ** b_psser)
+ self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1)**b_psser)
+ self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1)**b_psser) It seems like just inconsistence behavior from Black, so maybe we can just keep is as is for now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason is explained here #37305 (comment) - in short the operators are not simple ( |
||
|
||
|
@@ -547,19 +547,19 @@ def test_pow(self): | |
pser, psser = pdf["this"], psdf["this"] | ||
# float is always returned in pandas-on-Spark | ||
if extension_float_dtypes_available: | ||
self.check_extension((pser ** 1).astype("Float64"), psser ** 1) | ||
self.check_extension((pser ** 0.1).astype("Float64"), psser ** 0.1) | ||
self.check_extension((pser**1).astype("Float64"), psser**1) | ||
self.check_extension((pser**0.1).astype("Float64"), psser**0.1) | ||
self.check_extension( | ||
(pser ** pser.astype(float)).astype("Float64"), psser ** psser.astype(float) | ||
) | ||
else: | ||
self.assert_eq((pser ** 1).astype("float"), psser ** 1) | ||
self.assert_eq((pser ** 0.1).astype("float"), psser ** 0.1) | ||
self.assert_eq((pser**1).astype("float"), psser**1) | ||
self.assert_eq((pser**0.1).astype("float"), psser**0.1) | ||
self.assert_eq( | ||
(pser ** pser.astype(float)).astype("float"), psser ** psser.astype(float) | ||
) | ||
self.assertRaises(TypeError, lambda: psser ** psser) | ||
self.assertRaises(TypeError, lambda: psser ** True) | ||
self.assertRaises(TypeError, lambda: psser**psser) | ||
self.assertRaises(TypeError, lambda: psser**True) | ||
|
||
self.assert_eq( | ||
pser ** pdf["float"], | ||
|
@@ -648,13 +648,13 @@ def test_rfloordiv(self): | |
def test_rpow(self): | ||
pser, psser = self.boolean_pdf["this"], self.boolean_psdf["this"] | ||
if extension_float_dtypes_available: | ||
self.check_extension(pd.Series([1, 1, 1], dtype="Float64", name=psser.name), 1 ** psser) | ||
self.check_extension((0.1 ** pser).astype("Float64"), 0.1 ** psser) | ||
self.check_extension(pd.Series([1, 1, 1], dtype="Float64", name=psser.name), 1**psser) | ||
self.check_extension((0.1**pser).astype("Float64"), 0.1**psser) | ||
else: | ||
self.assert_eq(pd.Series([1, 1, 1], dtype="float", name=psser.name), 1 ** psser) | ||
self.assert_eq((0.1 ** pser).astype("float"), 0.1 ** psser) | ||
self.assert_eq(pd.Series([1, 1, 1], dtype="float", name=psser.name), 1**psser) | ||
self.assert_eq((0.1**pser).astype("float"), 0.1**psser) | ||
self.assertRaises(TypeError, lambda: "x" ** psser) | ||
self.assertRaises(TypeError, lambda: True ** psser) | ||
self.assertRaises(TypeError, lambda: True**psser) | ||
self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) ** psser) | ||
self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) ** psser) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you explain why this should be changed ??
The existing code
"$PYTHON_EXECUTABLE -m black"
isn't enough to check for Black installation ??There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The reason is very simple: