From 29d66f470d764bea9cb5271ad4bd44db0df73d76 Mon Sep 17 00:00:00 2001
From: Zander Hill <zander@xargs.io>
Date: Thu, 3 May 2018 11:46:07 +0100
Subject: [PATCH 1/2] Add tests for _filter_by_types

Adds a test for python and bash `types` using identify per interpreter
interpretation.

The python test succeeds with a false positive. The bash test identifies
the bug, where a comparison happens between `tags >= types`.

Tags is the tag list from identify, types is the value passed in from
configuration file as `acceptable types`.

The `tags(List) >= types(FrozenSet)` does a sort comparison of the two sequences
as evidenced by this code sample (which should return True):

```
>>> types = frozenset(['a'])
>>> tags = ['bash']
>>> tags >= types
True
```

Note: In 2.7, I cannot get this to return anything other than True.
Including the empty case:
```
>>> [] > frozenset()
True
```

Which means to me that a list is always >= a frozenset in Python 2.7.

In Python 3.5, I see errors when comparing a List to a FrozenSet. So
thankfully the interpreter is stricter in this case.

```
>>> [] >= frozenset()
TypeError: unorderable types: list() >= frozenset()
```
---
 pre_commit/commands/run.py |  7 +++++--
 tests/commands/run_test.py | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/pre_commit/commands/run.py b/pre_commit/commands/run.py
index 98ae25dc8..600b7e2fa 100644
--- a/pre_commit/commands/run.py
+++ b/pre_commit/commands/run.py
@@ -48,11 +48,14 @@ def _filter_by_include_exclude(filenames, include, exclude):
     }
 
 
-def _filter_by_types(filenames, types, exclude_types):
+def _filter_by_types(filenames,
+                     types,
+                     exclude_types,
+                     get_tags=tags_from_path):
     types, exclude_types = frozenset(types), frozenset(exclude_types)
     ret = []
     for filename in filenames:
-        tags = tags_from_path(filename)
+        tags = get_tags(filename)
         if tags >= types and not tags & exclude_types:
             ret.append(filename)
     return tuple(ret)
diff --git a/tests/commands/run_test.py b/tests/commands/run_test.py
index d664e8013..f458e10b7 100644
--- a/tests/commands/run_test.py
+++ b/tests/commands/run_test.py
@@ -9,12 +9,14 @@
 
 import pytest
 
+from identify.identify import tags_from_interpreter
 import pre_commit.constants as C
 from pre_commit.commands.install_uninstall import install
 from pre_commit.commands.run import _compute_cols
 from pre_commit.commands.run import _filter_by_include_exclude
 from pre_commit.commands.run import _get_skips
 from pre_commit.commands.run import _has_unmerged_paths
+from pre_commit.commands.run import _filter_by_types
 from pre_commit.commands.run import run
 from pre_commit.runner import Runner
 from pre_commit.util import cmd_output
@@ -831,3 +833,16 @@ def test_include_exclude_does_search_instead_of_match(some_filenames):
 def test_include_exclude_exclude_removes_files(some_filenames):
     ret = _filter_by_include_exclude(some_filenames, '', r'\.py$')
     assert ret == {'.pre-commit-hooks.yaml'}
+
+def get_tags_stub(interpreter):
+    return lambda x: tags_from_interpreter(interpreter)
+
+@pytest.mark.current
+def test_filter_by_types_for_bash_by_interpreter():
+    ret = _filter_by_types(['bash_script'], ['shell', 'sh', 'bash'], [], get_tags=get_tags_stub('bash'))
+    assert ret == ('bash_script',)
+
+@pytest.mark.current
+def test_filter_by_types_for_python_by_interpreter():
+    ret = _filter_by_types(['script.py'], ['python'], [], get_tags=get_tags_stub('python'))
+    assert ret == ('script.py',)

From ff66c20c6e8c5c7ad5be5e6ebe4d1b9990510803 Mon Sep 17 00:00:00 2001
From: Zander Hill <zander@xargs.io>
Date: Thu, 3 May 2018 12:08:34 +0100
Subject: [PATCH 2/2] Fix `types` bug by using FrozenSet#intersection

Fixes a bug where files are not being recognized based on their `types`
using configuration file.

In the case of using `types: ['bash', 'sh', 'shell']` in
pre-commit-config, those filetypes will not be found, resulting in "no
files found" being reported with pre-commit. This persists even when
using --all-files flag.

Intersection produces predictable results in Python 2.7 and 3.x.
---
 pre_commit/commands/run.py | 6 ++++--
 tests/commands/run_test.py | 5 +++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pre_commit/commands/run.py b/pre_commit/commands/run.py
index 600b7e2fa..a62d04749 100644
--- a/pre_commit/commands/run.py
+++ b/pre_commit/commands/run.py
@@ -53,10 +53,12 @@ def _filter_by_types(filenames,
                      exclude_types,
                      get_tags=tags_from_path):
     types, exclude_types = frozenset(types), frozenset(exclude_types)
+    valid_types = types - exclude_types
+
     ret = []
     for filename in filenames:
-        tags = get_tags(filename)
-        if tags >= types and not tags & exclude_types:
+        tags = frozenset(get_tags(filename))
+        if len(valid_types.intersection(tags)) > 0:
             ret.append(filename)
     return tuple(ret)
 
diff --git a/tests/commands/run_test.py b/tests/commands/run_test.py
index f458e10b7..e9e665343 100644
--- a/tests/commands/run_test.py
+++ b/tests/commands/run_test.py
@@ -834,15 +834,16 @@ def test_include_exclude_exclude_removes_files(some_filenames):
     ret = _filter_by_include_exclude(some_filenames, '', r'\.py$')
     assert ret == {'.pre-commit-hooks.yaml'}
 
+
 def get_tags_stub(interpreter):
     return lambda x: tags_from_interpreter(interpreter)
 
-@pytest.mark.current
+
 def test_filter_by_types_for_bash_by_interpreter():
     ret = _filter_by_types(['bash_script'], ['shell', 'sh', 'bash'], [], get_tags=get_tags_stub('bash'))
     assert ret == ('bash_script',)
 
-@pytest.mark.current
+
 def test_filter_by_types_for_python_by_interpreter():
     ret = _filter_by_types(['script.py'], ['python'], [], get_tags=get_tags_stub('python'))
     assert ret == ('script.py',)