From 957770759ca5bf1f867bff677b6f133e9b35627d Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 4 Jul 2022 14:39:15 -0400 Subject: [PATCH 01/10] Inline constants into instructions --- .github/workflows/test.yml | 3 +- code_data/__init__.py | 65 +++++++++++++++------ code_data/blocks.py | 114 ++++++++++++++++++++++++++++++++----- code_data/test.py | 32 ++++++++--- 4 files changed, 174 insertions(+), 40 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a3fbfe6..07726ca 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,8 +21,9 @@ jobs: with: python-version: ${{ matrix.py }} - uses: actions/checkout@v2 - # TODO: #48 Only install test requirements instead of all pinned in CI - run: pip install -e . -r requirements.test.txt + # TODO: Enable dev mode for Python + # https://docs.python.org/3/library/devmode.html#devmode - run: pytest -v code_data - name: Commit and push failing example if: failure() diff --git a/code_data/__init__.py b/code_data/__init__.py index 85b3d9e..88d375a 100644 --- a/code_data/__init__.py +++ b/code_data/__init__.py @@ -52,6 +52,10 @@ class CodeData(DataclassHideDefault): # Mapping of index in the names list to the name additional_names: dict[int, str] = field(default_factory=dict) + # Additional constants to include, which do not appear in any instructions, + # Mapping of index in the names list to the name + additional_constants: dict[int, ConstantDataType] = field(default_factory=dict) + # number of arguments (not including keyword only arguments, * or ** args) argcount: int = field(default=0) @@ -70,9 +74,6 @@ class CodeData(DataclassHideDefault): # code flags flags: FlagsData = field(default_factory=set) - # All code objects are recursively transformed to CodeData objects - consts: Tuple["ConstantDataType", ...] = field(default=(None,)) - # tuple of names of arguments and local variables varnames: Tuple[str, ...] = field(default=tuple()) @@ -104,22 +105,24 @@ def to_code_data(code: CodeType) -> CodeData: line_mapping = to_line_mapping(code) first_line_number_override = line_mapping.set_first_line(code.co_firstlineno) + + constants = tuple(map(to_code_constant, code.co_consts)) # retrieve the blocks and pop off used line mapping - blocks, additional_names = bytes_to_blocks( - code.co_code, line_mapping, code.co_names + blocks, additional_names, additional_constants = bytes_to_blocks( + code.co_code, line_mapping, code.co_names, constants ) return CodeData( blocks, line_mapping, first_line_number_override, additional_names, + additional_constants, code.co_argcount, posonlyargcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize, to_flags_data(code.co_flags), - tuple(map(to_code_constant, code.co_consts)), code.co_varnames, code.co_filename, code.co_name, @@ -134,12 +137,13 @@ def from_code_data(code_data: CodeData) -> CodeType: :rtype: types.CodeType """ - consts = tuple(map(from_code_constant, code_data.consts)) flags = from_flags_data(code_data.flags) - code, line_mapping, names = blocks_to_bytes( - code_data.blocks, code_data.additional_names + code, line_mapping, names, constants = blocks_to_bytes( + code_data.blocks, code_data.additional_names, code_data.additional_constants ) + consts = tuple(map(from_code_constant, constants)) + line_mapping.update(code_data.additional_line_mapping) first_line_no = line_mapping.trim_first_line(code_data.first_line_number_override) @@ -185,15 +189,12 @@ def from_code_data(code_data: CodeData) -> CodeType: ) -# We need to wrap the data structures in dataclasses to be able to represent -# them with MyPy, since it doesn't support recursive types -# https://github.com/python/mypy/issues/731 ConstantDataType = Union[ - int, + "ConstantInt", str, - float, + "ConstantFloat", None, - bool, + "ConstantBool", bytes, "EllipsisType", CodeData, @@ -206,10 +207,14 @@ def from_code_data(code_data: CodeData) -> CodeType: def to_code_constant(value: object) -> ConstantDataType: if isinstance(value, CodeType): return to_code_data(value) - if isinstance( - value, (int, str, float, type(None), bool, bytes, type(...), complex) - ): + if isinstance(value, (str, type(None), bytes, type(...), complex)): return value + if isinstance(value, bool): + return ConstantBool(value) + if isinstance(value, int): + return ConstantInt(value) + if isinstance(value, float): + return ConstantFloat(value) if isinstance(value, tuple): return ConstantTuple(tuple(map(to_code_constant, value))) if isinstance(value, frozenset): @@ -224,9 +229,33 @@ def from_code_constant(value: ConstantDataType) -> object: return tuple(map(from_code_constant, value.tuple)) if isinstance(value, ConstantSet): return frozenset(map(from_code_constant, value.frozenset)) + if isinstance(value, (ConstantBool, ConstantInt, ConstantFloat)): + return value.value return value +# Wrap these in types, so that, say, bytecode with constants of 1 +# are not equal to bytecodes of constants of True. + + +@dataclass(frozen=True) +class ConstantBool: + value: bool = field(metadata={"positional": True}) + + +@dataclass(frozen=True) +class ConstantInt: + value: int = field(metadata={"positional": True}) + + +@dataclass(frozen=True) +class ConstantFloat: + value: float = field(metadata={"positional": True}) + + +# We need to wrap the data structures in dataclasses to be able to represent +# them with MyPy, since it doesn't support recursive types +# https://github.com/python/mypy/issues/731 @dataclass(frozen=True) class ConstantTuple(DataclassHideDefault): tuple: Tuple[ConstantDataType, ...] = field(metadata={"positional": True}) diff --git a/code_data/blocks.py b/code_data/blocks.py index 31b3575..b3f7c39 100644 --- a/code_data/blocks.py +++ b/code_data/blocks.py @@ -9,16 +9,22 @@ import dis import sys from dataclasses import dataclass, field -from typing import Dict, Iterable, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union from code_data.line_mapping import LineMapping from .dataclass_hide_default import DataclassHideDefault +if TYPE_CHECKING: + from . import ConstantDataType + def bytes_to_blocks( - b: bytes, line_mapping: LineMapping, names: tuple[str, ...] -) -> tuple[Blocks, dict[int, str]]: + b: bytes, + line_mapping: LineMapping, + names: tuple[str, ...], + constants: tuple[ConstantDataType, ...], +) -> tuple[Blocks, dict[int, str], dict[int, ConstantDataType]]: """ Parse a sequence of bytes as a sequence of blocks of instructions. """ @@ -33,11 +39,17 @@ def bytes_to_blocks( # For recording what names we have found to understand the order of the names found_names: list[str] = [] + # For recording what constants we have found to understand the order of the + # constants + found_constants: list[ConstantDataType] = [] + for opcode, arg, n_args, offset, next_offset in _parse_bytes(b): # Compute the jump targets, initially with just the byte offset # Once we know all the block targets, we will transform to be block offsets - processed_arg = to_arg(opcode, arg, next_offset, names, found_names) + processed_arg = to_arg( + opcode, arg, next_offset, names, found_names, constants, found_constants + ) if isinstance(processed_arg, Jump): targets_set.add(processed_arg.target) # Store the number of args if this is a jump instruction @@ -81,12 +93,23 @@ def bytes_to_blocks( additional_names = { i: name for i, name in enumerate(names) if name not in found_names } - return {i: block for i, block in enumerate(blocks)}, additional_names + additional_constants = { + i: constant + for i, constant in enumerate(constants) + if constant not in found_constants + } + return ( + {i: block for i, block in enumerate(blocks)}, + additional_names, + additional_constants, + ) def blocks_to_bytes( - blocks: Blocks, additional_names: dict[int, str] -) -> Tuple[bytes, LineMapping, tuple[str, ...]]: + blocks: Blocks, + additional_names: dict[int, str], + additional_consts: dict[int, ConstantDataType], +) -> Tuple[bytes, LineMapping, tuple[str, ...], tuple[ConstantDataType, ...]]: # First compute mapping from block to offset changed_instruction_lengths = True # So that we know the bytecode offsets for jumps when iterating though instructions @@ -100,6 +123,11 @@ def blocks_to_bytes( # Mapping of name index to final name positions name_final_positions: dict[int, int] = {} + # List of constants we have collected from the instructions + constants: list[ConstantDataType] = [] + # Mapping of constant index to constant name positions + constant_final_positions: dict[int, int] = {} + # Iterate through all blocks and change jump instructions to offsets while changed_instruction_lengths: @@ -111,7 +139,13 @@ def blocks_to_bytes( if (block_index, instruction_index) in args: arg_value = args[block_index, instruction_index] else: - arg_value = from_arg(instruction.arg, names, name_final_positions) + arg_value = from_arg( + instruction.arg, + names, + name_final_positions, + constants, + constant_final_positions, + ) args[block_index, instruction_index] = arg_value n_instructions = instruction.n_args_override or _instrsize(arg_value) current_instruction_offset += n_instructions @@ -156,6 +190,17 @@ def blocks_to_bytes( names[i] for _, i in sorted(name_final_positions.items(), key=lambda x: x[0]) ] + # Add additional consts to the constants and add final positions + for i, constant in additional_consts.items(): + constants.append(constant) + constant_final_positions[i] = len(constants) - 1 + + # Sort positions by final position + constants = [ + constants[i] + for _, i in sorted(constant_final_positions.items(), key=lambda x: x[0]) + ] + # Finally go assemble the bytes and the line mapping bytes_: list[int] = [] line_mapping = LineMapping() @@ -180,7 +225,7 @@ def blocks_to_bytes( ) bytes_.append((arg_value >> (8 * i)) & 0xFF) - return bytes(bytes_), line_mapping, tuple(names) + return bytes(bytes_), line_mapping, tuple(names), tuple(constants) def to_arg( @@ -189,6 +234,8 @@ def to_arg( next_offset: int, names: tuple[str, ...], found_names: list[str], + consts: tuple[ConstantDataType, ...], + found_constants: list[ConstantDataType], ) -> Arg: if opcode in dis.hasjabs: return Jump((2 if _ATLEAST_310 else 1) * arg, False) @@ -201,10 +248,22 @@ def to_arg( found_names.append(name) wrong_position = found_names.index(name) != arg return Name(name, arg if wrong_position else None) + elif opcode in dis.hasconst: + constant = consts[arg] + if constant not in found_constants: + found_constants.append(constant) + wrong_position = found_constants.index(constant) != arg + return Constant(constant, arg if wrong_position else None) return arg -def from_arg(arg: Arg, names: list[str], name_final_positions: dict[int, int]) -> int: +def from_arg( + arg: Arg, + names: list[str], + name_final_positions: dict[int, int], + constants: list[ConstantDataType], + constants_final_positions: dict[int, int], +) -> int: # Use 1 as the arg_value, which will be update later if isinstance(arg, Jump): return 1 @@ -215,6 +274,13 @@ def from_arg(arg: Arg, names: list[str], name_final_positions: dict[int, int]) - final_index = index if arg.index_override is None else arg.index_override name_final_positions[final_index] = index return final_index + if isinstance(arg, Constant): + if arg.value not in constants: + constants.append(arg.value) + index = constants.index(arg.value) + final_index = index if arg.index_override is None else arg.index_override + constants_final_positions[final_index] = index + return final_index return arg @@ -277,16 +343,25 @@ class Name(DataclassHideDefault): index_override: Optional[int] = field(default=None) +@dataclass +class Constant(DataclassHideDefault): + """ + A constant argument. + """ + + value: ConstantDataType = field(metadata={"positional": True}) + # Optional override for the position if it is not ordered by occurance in the code. + index_override: Optional[int] = field(default=None) + + # TODO: Add: -# 1. constant lookup -# 2. a name lookup # 3. a local lookup # 5. An unused value # 6. Comparison lookup # 7. format value # 8. Generator kind -Arg = Union[int, Jump, Name] +Arg = Union[int, Jump, Name, Constant] # dict mapping block offset to list of instructions in the block @@ -344,3 +419,16 @@ def _instrsize(arg: int) -> int: _c_int_upper_limit = (2 ** (_c_int_bit_size - 1)) - 1 # The number of values that can be stored in a signed int _c_int_length = 2**_c_int_bit_size + + +# _PyCode_ConstantKey = ctypes.pythonapi._PyCode_ConstantKey +# _PyCode_ConstantKey.restype = ctypes.py_object + + +# def code_constant_key(value: object) -> object: +# """ +# Transforms a value with the _ConstantKey function used in the Code type +# to compare equality of constants. It transforms objects so that the constant `1` +# is not equal to `True` for example, by adding their types. +# """ +# return _PyCode_ConstantKey(ctypes.py_object(value)) diff --git a/code_data/test.py b/code_data/test.py index 5999455..963f3c5 100644 --- a/code_data/test.py +++ b/code_data/test.py @@ -1,5 +1,6 @@ from __future__ import annotations +import ctypes import dis import pathlib import sys @@ -202,14 +203,7 @@ def code_to_primitives(code: CodeType, verify_line_mappings: bool) -> dict[str, verify_line_mapping(code) return { name: ( - # Recursively transform constants - tuple( - code_to_primitives(a, verify_line_mappings) - if isinstance(a, CodeType) - else a - for a in getattr(code, name) - ) - # Compare code with instructions for easier diff + consts_to_primitives(code.co_consts, verify_line_mappings) if name == "co_consts" else [(i.opname, i.argval) for i in _get_instructions_bytes(code.co_code)] if name == "co_code" @@ -219,6 +213,28 @@ def code_to_primitives(code: CodeType, verify_line_mappings: bool) -> dict[str, } +_PyCode_ConstantKey = ctypes.pythonapi._PyCode_ConstantKey +_PyCode_ConstantKey.restype = ctypes.py_object + + +def consts_to_primitives( + consts: tuple[object, ...], verify_line_mappings: bool +) -> tuple[object, ...]: + """ + Transforms code constants into primitives which are easier to see the diff + of for comparison. + """ + + consts = tuple( + code_to_primitives(a, verify_line_mappings) if isinstance(a, CodeType) + # If we have some other constant, use the same function the code object uses + # to differentiate based on the type. + else _PyCode_ConstantKey(ctypes.py_object(a)) + for a in consts + ) + return consts + + def code_to_dict(code: CodeType) -> dict[str, object]: """ Converts a code object to a dict for testing From d92e2b06e5fe1d7188da788553e87c78b4e3d613 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 4 Jul 2022 14:48:26 -0400 Subject: [PATCH 02/10] Try alllowing push --- .github/workflows/test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 07726ca..79cfd4f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,6 +21,9 @@ jobs: with: python-version: ${{ matrix.py }} - uses: actions/checkout@v2 + # https://github.com/actions/checkout/issues/124#issuecomment-586664611 + with: + ref: ${{ github.event.pull_request.head.ref }} - run: pip install -e . -r requirements.test.txt # TODO: Enable dev mode for Python # https://docs.python.org/3/library/devmode.html#devmode From a23c1a7587e7a91e4170b524f91bef865fb868bf Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 4 Jul 2022 15:01:38 -0400 Subject: [PATCH 03/10] Fix negative zero --- code_data/__init__.py | 12 +++++++----- .../test_minimized/hypothesis.internal.floats.py | 9 +++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 code_data/test_minimized/hypothesis.internal.floats.py diff --git a/code_data/__init__.py b/code_data/__init__.py index 88d375a..b1e0f02 100644 --- a/code_data/__init__.py +++ b/code_data/__init__.py @@ -214,7 +214,7 @@ def to_code_constant(value: object) -> ConstantDataType: if isinstance(value, int): return ConstantInt(value) if isinstance(value, float): - return ConstantFloat(value) + return ConstantFloat(value, is_neg_zero=str(value) == "-0.0") if isinstance(value, tuple): return ConstantTuple(tuple(map(to_code_constant, value))) if isinstance(value, frozenset): @@ -226,9 +226,9 @@ def from_code_constant(value: ConstantDataType) -> object: if isinstance(value, CodeData): return from_code_data(value) if isinstance(value, ConstantTuple): - return tuple(map(from_code_constant, value.tuple)) + return tuple(map(from_code_constant, value.value)) if isinstance(value, ConstantSet): - return frozenset(map(from_code_constant, value.frozenset)) + return frozenset(map(from_code_constant, value.value)) if isinstance(value, (ConstantBool, ConstantInt, ConstantFloat)): return value.value return value @@ -251,6 +251,8 @@ class ConstantInt: @dataclass(frozen=True) class ConstantFloat: value: float = field(metadata={"positional": True}) + # Store if the value is negative 0, so that == distinguishes between 0.0 and -0.0 + is_neg_zero: bool = field(default=False) # We need to wrap the data structures in dataclasses to be able to represent @@ -258,9 +260,9 @@ class ConstantFloat: # https://github.com/python/mypy/issues/731 @dataclass(frozen=True) class ConstantTuple(DataclassHideDefault): - tuple: Tuple[ConstantDataType, ...] = field(metadata={"positional": True}) + value: Tuple[ConstantDataType, ...] = field(metadata={"positional": True}) @dataclass(frozen=True) class ConstantSet(DataclassHideDefault): - frozenset: FrozenSet[ConstantDataType] = field(metadata={"positional": True}) + value: FrozenSet[ConstantDataType] = field(metadata={"positional": True}) diff --git a/code_data/test_minimized/hypothesis.internal.floats.py b/code_data/test_minimized/hypothesis.internal.floats.py new file mode 100644 index 0000000..e419bfd --- /dev/null +++ b/code_data/test_minimized/hypothesis.internal.floats.py @@ -0,0 +1,9 @@ +def count_between_floats(x, y, width=64): + assert x <= y + if is_negative(x): + if is_negative(y): + return float_to_int(x, width) - float_to_int(y, width) + 1 + else: + return count_between_floats(x, -0.0, width) + count_between_floats( + 0.0, y, width + ) \ No newline at end of file From 06664f7d04d6ac9e1a54721e67582b35fe93939b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 Jul 2022 19:05:23 +0000 Subject: [PATCH 04/10] Add failing example from testing on Python 3.8 --- code_data/test_minimized/test.test_unary.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 code_data/test_minimized/test.test_unary.py diff --git a/code_data/test_minimized/test.test_unary.py b/code_data/test_minimized/test.test_unary.py new file mode 100644 index 0000000..f661384 --- /dev/null +++ b/code_data/test_minimized/test.test_unary.py @@ -0,0 +1,9 @@ +class UnaryOpTestCase(unittest.TestCase): + + def test_negative(self): + self.assertTrue(-2 == 0 - 2) + self.assertEqual(-0, 0) + self.assertEqual(--2, 2) + self.assertTrue(-2 == 0 - 2) + self.assertTrue(-2.0 == 0 - 2.0) + self.assertTrue(-2j == 0 - 2j) \ No newline at end of file From ff43a3a67d5721db6fa890f9f955156fa573d48c Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 4 Jul 2022 15:03:41 -0400 Subject: [PATCH 05/10] Remove comments --- code_data/blocks.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/code_data/blocks.py b/code_data/blocks.py index b3f7c39..a51f8a5 100644 --- a/code_data/blocks.py +++ b/code_data/blocks.py @@ -419,16 +419,3 @@ def _instrsize(arg: int) -> int: _c_int_upper_limit = (2 ** (_c_int_bit_size - 1)) - 1 # The number of values that can be stored in a signed int _c_int_length = 2**_c_int_bit_size - - -# _PyCode_ConstantKey = ctypes.pythonapi._PyCode_ConstantKey -# _PyCode_ConstantKey.restype = ctypes.py_object - - -# def code_constant_key(value: object) -> object: -# """ -# Transforms a value with the _ConstantKey function used in the Code type -# to compare equality of constants. It transforms objects so that the constant `1` -# is not equal to `True` for example, by adding their types. -# """ -# return _PyCode_ConstantKey(ctypes.py_object(value)) From cbbb935195ff585ae6c58bb16975afbb97d099e8 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 4 Jul 2022 21:45:22 -0400 Subject: [PATCH 06/10] Handle zero complex numbers --- code_data/__init__.py | 22 ++++++++++++++++++---- code_data/flags_data.py | 1 - 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/code_data/__init__.py b/code_data/__init__.py index b1e0f02..52f473a 100644 --- a/code_data/__init__.py +++ b/code_data/__init__.py @@ -198,7 +198,7 @@ def from_code_data(code_data: CodeData) -> CodeType: bytes, "EllipsisType", CodeData, - complex, + "ConstantComplex", "ConstantSet", "ConstantTuple", ] @@ -207,14 +207,16 @@ def from_code_data(code_data: CodeData) -> CodeType: def to_code_constant(value: object) -> ConstantDataType: if isinstance(value, CodeType): return to_code_data(value) - if isinstance(value, (str, type(None), bytes, type(...), complex)): + if isinstance(value, (str, type(None), bytes, type(...))): return value if isinstance(value, bool): return ConstantBool(value) if isinstance(value, int): return ConstantInt(value) if isinstance(value, float): - return ConstantFloat(value, is_neg_zero=str(value) == "-0.0") + return ConstantFloat(value, is_neg_zero(value)) + if isinstance(value, complex): + return ConstantComplex(value, is_neg_zero(value.real), is_neg_zero(value.imag)) if isinstance(value, tuple): return ConstantTuple(tuple(map(to_code_constant, value))) if isinstance(value, frozenset): @@ -229,7 +231,7 @@ def from_code_constant(value: ConstantDataType) -> object: return tuple(map(from_code_constant, value.value)) if isinstance(value, ConstantSet): return frozenset(map(from_code_constant, value.value)) - if isinstance(value, (ConstantBool, ConstantInt, ConstantFloat)): + if isinstance(value, (ConstantBool, ConstantInt, ConstantFloat, ConstantComplex)): return value.value return value @@ -255,6 +257,14 @@ class ConstantFloat: is_neg_zero: bool = field(default=False) +@dataclass(frozen=True) +class ConstantComplex: + value: complex = field(metadata={"positional": True}) + # Store if the value is negative 0, so that == distinguishes between 0.0 and -0.0 + real_is_neg_zero: bool = field(default=False) + imag_is_neg_zero: bool = field(default=False) + + # We need to wrap the data structures in dataclasses to be able to represent # them with MyPy, since it doesn't support recursive types # https://github.com/python/mypy/issues/731 @@ -266,3 +276,7 @@ class ConstantTuple(DataclassHideDefault): @dataclass(frozen=True) class ConstantSet(DataclassHideDefault): value: FrozenSet[ConstantDataType] = field(metadata={"positional": True}) + + +def is_neg_zero(value: float) -> bool: + return str(value) == "-0.0" diff --git a/code_data/flags_data.py b/code_data/flags_data.py index 1bf8848..8448ba3 100644 --- a/code_data/flags_data.py +++ b/code_data/flags_data.py @@ -19,7 +19,6 @@ def to_flags_data(flags: int) -> FlagsData: flags_data: FlagsData = set() if not flags: - # Don't iterate if no flags are set return flags_data # Iterate through all flags, raising an exception if we hit any unknown ones for f in enum._decompose(_CodeFlag, flags)[0]: # type: ignore From 0843ed3e9f7bf2a97e062910b724b66b54b4295f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 5 Jul 2022 01:56:16 +0000 Subject: [PATCH 07/10] Add failing example from testing on Python 3.9 --- .../setuptools._vendor.more_itertools.more.py | 1222 +++++++++++++++++ 1 file changed, 1222 insertions(+) create mode 100644 code_data/test_minimized/setuptools._vendor.more_itertools.more.py diff --git a/code_data/test_minimized/setuptools._vendor.more_itertools.more.py b/code_data/test_minimized/setuptools._vendor.more_itertools.more.py new file mode 100644 index 0000000..f111aa1 --- /dev/null +++ b/code_data/test_minimized/setuptools._vendor.more_itertools.more.py @@ -0,0 +1,1222 @@ +def sort_together(iterables, key_list=(0,), key=None, reverse=False): + """Return the input iterables sorted together, with *key_list* as the + priority for sorting. All iterables are trimmed to the length of the + shortest one. + + This can be used like the sorting function in a spreadsheet. If each + iterable represents a column of data, the key list determines which + columns are used for sorting. + + By default, all iterables are sorted using the ``0``-th iterable:: + + >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')] + >>> sort_together(iterables) + [(1, 2, 3, 4), ('d', 'c', 'b', 'a')] + + Set a different key list to sort according to another iterable. + Specifying multiple keys dictates how ties are broken:: + + >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')] + >>> sort_together(iterables, key_list=(1, 2)) + [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')] + + To sort by a function of the elements of the iterable, pass a *key* + function. Its arguments are the elements of the iterables corresponding to + the key list:: + + >>> names = ('a', 'b', 'c') + >>> lengths = (1, 2, 3) + >>> widths = (5, 2, 1) + >>> def area(length, width): + ... return length * width + >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area) + [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)] + + Set *reverse* to ``True`` to sort in descending order. + + >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True) + [(3, 2, 1), ('a', 'b', 'c')] + + """ + if key is None: + # if there is no key function, the key argument to sorted is an + # itemgetter + key_argument = itemgetter(*key_list) + else: + # if there is a key function, call it with the items at the offsets + # specified by the key function as arguments + key_list = list(key_list) + if len(key_list) == 1: + # if key_list contains a single item, pass the item at that offset + # as the only argument to the key function + key_offset = key_list[0] + key_argument = lambda zipped_items: key(zipped_items[key_offset]) + else: + # if key_list contains multiple items, use itemgetter to return a + # tuple of items, which we pass as *args to the key function + get_key_items = itemgetter(*key_list) + key_argument = lambda zipped_items: key( + *get_key_items(zipped_items) + ) + + return list( + zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse)) + ) + + +def unzip(iterable): + """The inverse of :func:`zip`, this function disaggregates the elements + of the zipped *iterable*. + + The ``i``-th iterable contains the ``i``-th element from each element + of the zipped iterable. The first element is used to to determine the + length of the remaining elements. + + >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> letters, numbers = unzip(iterable) + >>> list(letters) + ['a', 'b', 'c', 'd'] + >>> list(numbers) + [1, 2, 3, 4] + + This is similar to using ``zip(*iterable)``, but it avoids reading + *iterable* into memory. Note, however, that this function uses + :func:`itertools.tee` and thus may require significant storage. + + """ + head, iterable = spy(iter(iterable)) + if not head: + # empty iterable, e.g. zip([], [], []) + return () + # spy returns a one-length iterable as head + head = head[0] + iterables = tee(iterable, len(head)) + + def itemgetter(i): + def getter(obj): + try: + return obj[i] + except IndexError: + # basically if we have an iterable like + # iter([(1, 2, 3), (4, 5), (6,)]) + # the second unzipped iterable would fail at the third tuple + # since it would try to access tup[1] + # same with the third unzipped iterable and the second tuple + # to support these "improperly zipped" iterables, + # we create a custom itemgetter + # which just stops the unzipped iterables + # at first length mismatch + raise StopIteration + + return getter + + return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables)) + + +def divide(n, iterable): + """Divide the elements from *iterable* into *n* parts, maintaining + order. + + >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6]) + >>> list(group_1) + [1, 2, 3] + >>> list(group_2) + [4, 5, 6] + + If the length of *iterable* is not evenly divisible by *n*, then the + length of the returned iterables will not be identical: + + >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7]) + >>> [list(c) for c in children] + [[1, 2, 3], [4, 5], [6, 7]] + + If the length of the iterable is smaller than n, then the last returned + iterables will be empty: + + >>> children = divide(5, [1, 2, 3]) + >>> [list(c) for c in children] + [[1], [2], [3], [], []] + + This function will exhaust the iterable before returning and may require + significant storage. If order is not important, see :func:`distribute`, + which does not first pull the iterable into memory. + + """ + if n < 1: + raise ValueError('n must be at least 1') + + try: + iterable[:0] + except TypeError: + seq = tuple(iterable) + else: + seq = iterable + + q, r = divmod(len(seq), n) + + ret = [] + stop = 0 + for i in range(1, n + 1): + start = stop + stop += q + 1 if i <= r else q + ret.append(iter(seq[start:stop])) + + return ret + + +def always_iterable(obj, base_type=(str, bytes)): + """If *obj* is iterable, return an iterator over its items:: + + >>> obj = (1, 2, 3) + >>> list(always_iterable(obj)) + [1, 2, 3] + + If *obj* is not iterable, return a one-item iterable containing *obj*:: + + >>> obj = 1 + >>> list(always_iterable(obj)) + [1] + + If *obj* is ``None``, return an empty iterable: + + >>> obj = None + >>> list(always_iterable(None)) + [] + + By default, binary and text strings are not considered iterable:: + + >>> obj = 'foo' + >>> list(always_iterable(obj)) + ['foo'] + + If *base_type* is set, objects for which ``isinstance(obj, base_type)`` + returns ``True`` won't be considered iterable. + + >>> obj = {'a': 1} + >>> list(always_iterable(obj)) # Iterate over the dict's keys + ['a'] + >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit + [{'a': 1}] + + Set *base_type* to ``None`` to avoid any special handling and treat objects + Python considers iterable as iterable: + + >>> obj = 'foo' + >>> list(always_iterable(obj, base_type=None)) + ['f', 'o', 'o'] + """ + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) + + +def adjacent(predicate, iterable, distance=1): + """Return an iterable over `(bool, item)` tuples where the `item` is + drawn from *iterable* and the `bool` indicates whether + that item satisfies the *predicate* or is adjacent to an item that does. + + For example, to find whether items are adjacent to a ``3``:: + + >>> list(adjacent(lambda x: x == 3, range(6))) + [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)] + + Set *distance* to change what counts as adjacent. For example, to find + whether items are two places away from a ``3``: + + >>> list(adjacent(lambda x: x == 3, range(6), distance=2)) + [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)] + + This is useful for contextualizing the results of a search function. + For example, a code comparison tool might want to identify lines that + have changed, but also surrounding lines to give the viewer of the diff + context. + + The predicate function will only be called once for each item in the + iterable. + + See also :func:`groupby_transform`, which can be used with this function + to group ranges of items with the same `bool` value. + + """ + # Allow distance=0 mainly for testing that it reproduces results with map() + if distance < 0: + raise ValueError('distance must be at least 0') + + i1, i2 = tee(iterable) + padding = [False] * distance + selected = chain(padding, map(predicate, i1), padding) + adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1)) + return zip(adjacent_to_selected, i2) + + +def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None): + """An extension of :func:`itertools.groupby` that can apply transformations + to the grouped data. + + * *keyfunc* is a function computing a key value for each item in *iterable* + * *valuefunc* is a function that transforms the individual items from + *iterable* after grouping + * *reducefunc* is a function that transforms each group of items + + >>> iterable = 'aAAbBBcCC' + >>> keyfunc = lambda k: k.upper() + >>> valuefunc = lambda v: v.lower() + >>> reducefunc = lambda g: ''.join(g) + >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc)) + [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')] + + Each optional argument defaults to an identity function if not specified. + + :func:`groupby_transform` is useful when grouping elements of an iterable + using a separate iterable as the key. To do this, :func:`zip` the iterables + and pass a *keyfunc* that extracts the first element and a *valuefunc* + that extracts the second element:: + + >>> from operator import itemgetter + >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3] + >>> values = 'abcdefghi' + >>> iterable = zip(keys, values) + >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1)) + >>> [(k, ''.join(g)) for k, g in grouper] + [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')] + + Note that the order of items in the iterable is significant. + Only adjacent items are grouped together, so if you don't want any + duplicate groups, you should sort the iterable by the key function. + + """ + ret = groupby(iterable, keyfunc) + if valuefunc: + ret = ((k, map(valuefunc, g)) for k, g in ret) + if reducefunc: + ret = ((k, reducefunc(g)) for k, g in ret) + + return ret + + +class numeric_range(abc.Sequence, abc.Hashable): + """An extension of the built-in ``range()`` function whose arguments can + be any orderable numeric type. + + With only *stop* specified, *start* defaults to ``0`` and *step* + defaults to ``1``. The output items will match the type of *stop*: + + >>> list(numeric_range(3.5)) + [0.0, 1.0, 2.0, 3.0] + + With only *start* and *stop* specified, *step* defaults to ``1``. The + output items will match the type of *start*: + + >>> from decimal import Decimal + >>> start = Decimal('2.1') + >>> stop = Decimal('5.1') + >>> list(numeric_range(start, stop)) + [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')] + + With *start*, *stop*, and *step* specified the output items will match + the type of ``start + step``: + + >>> from fractions import Fraction + >>> start = Fraction(1, 2) # Start at 1/2 + >>> stop = Fraction(5, 2) # End at 5/2 + >>> step = Fraction(1, 2) # Count by 1/2 + >>> list(numeric_range(start, stop, step)) + [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)] + + If *step* is zero, ``ValueError`` is raised. Negative steps are supported: + + >>> list(numeric_range(3, -1, -1.0)) + [3.0, 2.0, 1.0, 0.0] + + Be aware of the limitations of floating point numbers; the representation + of the yielded numbers may be surprising. + + ``datetime.datetime`` objects can be used for *start* and *stop*, if *step* + is a ``datetime.timedelta`` object: + + >>> import datetime + >>> start = datetime.datetime(2019, 1, 1) + >>> stop = datetime.datetime(2019, 1, 3) + >>> step = datetime.timedelta(days=1) + >>> items = iter(numeric_range(start, stop, step)) + >>> next(items) + datetime.datetime(2019, 1, 1, 0, 0) + >>> next(items) + datetime.datetime(2019, 1, 2, 0, 0) + + """ + + _EMPTY_HASH = hash(range(0, 0)) + + def __init__(self, *args): + argc = len(args) + if argc == 1: + (self._stop,) = args + self._start = type(self._stop)(0) + self._step = type(self._stop - self._start)(1) + elif argc == 2: + self._start, self._stop = args + self._step = type(self._stop - self._start)(1) + elif argc == 3: + self._start, self._stop, self._step = args + elif argc == 0: + raise TypeError( + 'numeric_range expected at least ' + '1 argument, got {}'.format(argc) + ) + else: + raise TypeError( + 'numeric_range expected at most ' + '3 arguments, got {}'.format(argc) + ) + + self._zero = type(self._step)(0) + if self._step == self._zero: + raise ValueError('numeric_range() arg 3 must not be zero') + self._growing = self._step > self._zero + self._init_len() + + def __bool__(self): + if self._growing: + return self._start < self._stop + else: + return self._start > self._stop + + def __contains__(self, elem): + if self._growing: + if self._start <= elem < self._stop: + return (elem - self._start) % self._step == self._zero + else: + if self._start >= elem > self._stop: + return (self._start - elem) % (-self._step) == self._zero + + return False + + def __eq__(self, other): + if isinstance(other, numeric_range): + empty_self = not bool(self) + empty_other = not bool(other) + if empty_self or empty_other: + return empty_self and empty_other # True if both empty + else: + return ( + self._start == other._start + and self._step == other._step + and self._get_by_index(-1) == other._get_by_index(-1) + ) + else: + return False + + def __getitem__(self, key): + if isinstance(key, int): + return self._get_by_index(key) + elif isinstance(key, slice): + step = self._step if key.step is None else key.step * self._step + + if key.start is None or key.start <= -self._len: + start = self._start + elif key.start >= self._len: + start = self._stop + else: # -self._len < key.start < self._len + start = self._get_by_index(key.start) + + if key.stop is None or key.stop >= self._len: + stop = self._stop + elif key.stop <= -self._len: + stop = self._start + else: # -self._len < key.stop < self._len + stop = self._get_by_index(key.stop) + + return numeric_range(start, stop, step) + else: + raise TypeError( + 'numeric range indices must be ' + 'integers or slices, not {}'.format(type(key).__name__) + ) + + def __hash__(self): + if self: + return hash((self._start, self._get_by_index(-1), self._step)) + else: + return self._EMPTY_HASH + + def __iter__(self): + values = (self._start + (n * self._step) for n in count()) + if self._growing: + return takewhile(partial(gt, self._stop), values) + else: + return takewhile(partial(lt, self._stop), values) + + def __len__(self): + return self._len + + def _init_len(self): + if self._growing: + start = self._start + stop = self._stop + step = self._step + else: + start = self._stop + stop = self._start + step = -self._step + distance = stop - start + if distance <= self._zero: + self._len = 0 + else: # distance > 0 and step > 0: regular euclidean division + q, r = divmod(distance, step) + self._len = int(q) + int(r != self._zero) + + def __reduce__(self): + return numeric_range, (self._start, self._stop, self._step) + + def __repr__(self): + if self._step == 1: + return "numeric_range({}, {})".format( + repr(self._start), repr(self._stop) + ) + else: + return "numeric_range({}, {}, {})".format( + repr(self._start), repr(self._stop), repr(self._step) + ) + + def __reversed__(self): + return iter( + numeric_range( + self._get_by_index(-1), self._start - self._step, -self._step + ) + ) + + def count(self, value): + return int(value in self) + + def index(self, value): + if self._growing: + if self._start <= value < self._stop: + q, r = divmod(value - self._start, self._step) + if r == self._zero: + return int(q) + else: + if self._start >= value > self._stop: + q, r = divmod(self._start - value, -self._step) + if r == self._zero: + return int(q) + + raise ValueError("{} is not in numeric range".format(value)) + + def _get_by_index(self, i): + if i < 0: + i += self._len + if i < 0 or i >= self._len: + raise IndexError("numeric range object index out of range") + return self._start + i * self._step + + +def count_cycle(iterable, n=None): + """Cycle through the items from *iterable* up to *n* times, yielding + the number of completed cycles along with each item. If *n* is omitted the + process repeats indefinitely. + + >>> list(count_cycle('AB', 3)) + [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')] + + """ + iterable = tuple(iterable) + if not iterable: + return iter(()) + counter = count() if n is None else range(n) + return ((i, item) for i in counter for item in iterable) + + +def mark_ends(iterable): + """Yield 3-tuples of the form ``(is_first, is_last, item)``. + + >>> list(mark_ends('ABC')) + [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')] + + Use this when looping over an iterable to take special action on its first + and/or last items: + + >>> iterable = ['Header', 100, 200, 'Footer'] + >>> total = 0 + >>> for is_first, is_last, item in mark_ends(iterable): + ... if is_first: + ... continue # Skip the header + ... if is_last: + ... continue # Skip the footer + ... total += item + >>> print(total) + 300 + """ + it = iter(iterable) + + try: + b = next(it) + except StopIteration: + return + + try: + for i in count(): + a = b + b = next(it) + yield i == 0, False, a + + except StopIteration: + yield i == 0, True, a + + +def locate(iterable, pred=bool, window_size=None): + """Yield the index of each item in *iterable* for which *pred* returns + ``True``. + + *pred* defaults to :func:`bool`, which will select truthy items: + + >>> list(locate([0, 1, 1, 0, 1, 0, 0])) + [1, 2, 4] + + Set *pred* to a custom function to, e.g., find the indexes for a particular + item. + + >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b')) + [1, 3] + + If *window_size* is given, then the *pred* function will be called with + that many items. This enables searching for sub-sequences: + + >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] + >>> pred = lambda *args: args == (1, 2, 3) + >>> list(locate(iterable, pred=pred, window_size=3)) + [1, 5, 9] + + Use with :func:`seekable` to find indexes and then retrieve the associated + items: + + >>> from itertools import count + >>> from more_itertools import seekable + >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count()) + >>> it = seekable(source) + >>> pred = lambda x: x > 100 + >>> indexes = locate(it, pred=pred) + >>> i = next(indexes) + >>> it.seek(i) + >>> next(it) + 106 + + """ + if window_size is None: + return compress(count(), map(pred, iterable)) + + if window_size < 1: + raise ValueError('window size must be at least 1') + + it = windowed(iterable, window_size, fillvalue=_marker) + return compress(count(), starmap(pred, it)) + + +def lstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the beginning + for which *pred* returns ``True``. + + For example, to remove a set of items from the start of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(lstrip(iterable, pred)) + [1, 2, None, 3, False, None] + + This function is analogous to to :func:`str.lstrip`, and is essentially + an wrapper for :func:`itertools.dropwhile`. + + """ + return dropwhile(pred, iterable) + + +def rstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the end + for which *pred* returns ``True``. + + For example, to remove a set of items from the end of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(rstrip(iterable, pred)) + [None, False, None, 1, 2, None, 3] + + This function is analogous to :func:`str.rstrip`. + + """ + cache = [] + cache_append = cache.append + cache_clear = cache.clear + for x in iterable: + if pred(x): + cache_append(x) + else: + yield from cache + cache_clear() + yield x + + +def strip(iterable, pred): + """Yield the items from *iterable*, but strip any from the + beginning and end for which *pred* returns ``True``. + + For example, to remove a set of items from both ends of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(strip(iterable, pred)) + [1, 2, None, 3] + + This function is analogous to :func:`str.strip`. + + """ + return rstrip(lstrip(iterable, pred), pred) + + +class islice_extended: + """An extension of :func:`itertools.islice` that supports negative values + for *stop*, *start*, and *step*. + + >>> iterable = iter('abcdefgh') + >>> list(islice_extended(iterable, -4, -1)) + ['e', 'f', 'g'] + + Slices with negative values require some caching of *iterable*, but this + function takes care to minimize the amount of memory required. + + For example, you can use a negative step with an infinite iterator: + + >>> from itertools import count + >>> list(islice_extended(count(), 110, 99, -2)) + [110, 108, 106, 104, 102, 100] + + You can also use slice notation directly: + + >>> iterable = map(str, count()) + >>> it = islice_extended(iterable)[10:20:2] + >>> list(it) + ['10', '12', '14', '16', '18'] + + """ + + def __init__(self, iterable, *args): + it = iter(iterable) + if args: + self._iterable = _islice_helper(it, slice(*args)) + else: + self._iterable = it + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterable) + + def __getitem__(self, key): + if isinstance(key, slice): + return islice_extended(_islice_helper(self._iterable, key)) + + raise TypeError('islice_extended.__getitem__ argument must be a slice') + + +def _islice_helper(it, s): + start = s.start + stop = s.stop + if s.step == 0: + raise ValueError('step argument must be a non-zero integer or None.') + step = s.step or 1 + + if step > 0: + start = 0 if (start is None) else start + + if start < 0: + # Consume all but the last -start items + cache = deque(enumerate(it, 1), maxlen=-start) + len_iter = cache[-1][0] if cache else 0 + + # Adjust start to be positive + i = max(len_iter + start, 0) + + # Adjust stop to be positive + if stop is None: + j = len_iter + elif stop >= 0: + j = min(stop, len_iter) + else: + j = max(len_iter + stop, 0) + + # Slice the cache + n = j - i + if n <= 0: + return + + for index, item in islice(cache, 0, n, step): + yield item + elif (stop is not None) and (stop < 0): + # Advance to the start position + next(islice(it, start, start), None) + + # When stop is negative, we have to carry -stop items while + # iterating + cache = deque(islice(it, -stop), maxlen=-stop) + + for index, item in enumerate(it): + cached_item = cache.popleft() + if index % step == 0: + yield cached_item + cache.append(item) + else: + # When both start and stop are positive we have the normal case + yield from islice(it, start, stop, step) + else: + start = -1 if (start is None) else start + + if (stop is not None) and (stop < 0): + # Consume all but the last items + n = -stop - 1 + cache = deque(enumerate(it, 1), maxlen=n) + len_iter = cache[-1][0] if cache else 0 + + # If start and stop are both negative they are comparable and + # we can just slice. Otherwise we can adjust start to be negative + # and then slice. + if start < 0: + i, j = start, stop + else: + i, j = min(start - len_iter, -1), None + + for index, item in list(cache)[i:j:step]: + yield item + else: + # Advance to the stop position + if stop is not None: + m = stop + 1 + next(islice(it, m, m), None) + + # stop is positive, so if start is negative they are not comparable + # and we need the rest of the items. + if start < 0: + i = start + n = None + # stop is None and start is positive, so we just need items up to + # the start index. + elif stop is None: + i = None + n = start + 1 + # Both stop and start are positive, so they are comparable. + else: + i = None + n = start - stop + if n <= 0: + return + + cache = list(islice(it, n)) + + yield from cache[i::step] + + +def always_reversible(iterable): + """An extension of :func:`reversed` that supports all iterables, not + just those which implement the ``Reversible`` or ``Sequence`` protocols. + + >>> print(*always_reversible(x for x in range(3))) + 2 1 0 + + If the iterable is already reversible, this function returns the + result of :func:`reversed()`. If the iterable is not reversible, + this function will cache the remaining items in the iterable and + yield them in reverse order, which may require significant storage. + """ + try: + return reversed(iterable) + except TypeError: + return reversed(list(iterable)) + + +def consecutive_groups(iterable, ordering=lambda x: x): + """Yield groups of consecutive items using :func:`itertools.groupby`. + The *ordering* function determines whether two items are adjacent by + returning their position. + + By default, the ordering function is the identity function. This is + suitable for finding runs of numbers: + + >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40] + >>> for group in consecutive_groups(iterable): + ... print(list(group)) + [1] + [10, 11, 12] + [20] + [30, 31, 32, 33] + [40] + + For finding runs of adjacent letters, try using the :meth:`index` method + of a string of letters: + + >>> from string import ascii_lowercase + >>> iterable = 'abcdfgilmnop' + >>> ordering = ascii_lowercase.index + >>> for group in consecutive_groups(iterable, ordering): + ... print(list(group)) + ['a', 'b', 'c', 'd'] + ['f', 'g'] + ['i'] + ['l', 'm', 'n', 'o', 'p'] + + Each group of consecutive items is an iterator that shares it source with + *iterable*. When an an output group is advanced, the previous group is + no longer available unless its elements are copied (e.g., into a ``list``). + + >>> iterable = [1, 2, 11, 12, 21, 22] + >>> saved_groups = [] + >>> for group in consecutive_groups(iterable): + ... saved_groups.append(list(group)) # Copy group elements + >>> saved_groups + [[1, 2], [11, 12], [21, 22]] + + """ + for k, g in groupby( + enumerate(iterable), key=lambda x: x[0] - ordering(x[1]) + ): + yield map(itemgetter(1), g) + + +def difference(iterable, func=sub, *, initial=None): + """This function is the inverse of :func:`itertools.accumulate`. By default + it will compute the first difference of *iterable* using + :func:`operator.sub`: + + >>> from itertools import accumulate + >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10 + >>> list(difference(iterable)) + [0, 1, 2, 3, 4] + + *func* defaults to :func:`operator.sub`, but other functions can be + specified. They will be applied as follows:: + + A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ... + + For example, to do progressive division: + + >>> iterable = [1, 2, 6, 24, 120] + >>> func = lambda x, y: x // y + >>> list(difference(iterable, func)) + [1, 2, 3, 4, 5] + + If the *initial* keyword is set, the first element will be skipped when + computing successive differences. + + >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10) + >>> list(difference(it, initial=10)) + [1, 2, 3] + + """ + a, b = tee(iterable) + try: + first = [next(b)] + except StopIteration: + return iter([]) + + if initial is not None: + first = [] + + return chain(first, starmap(func, zip(b, a))) + + +class SequenceView(Sequence): + """Return a read-only view of the sequence object *target*. + + :class:`SequenceView` objects are analogous to Python's built-in + "dictionary view" types. They provide a dynamic view of a sequence's items, + meaning that when the sequence updates, so does the view. + + >>> seq = ['0', '1', '2'] + >>> view = SequenceView(seq) + >>> view + SequenceView(['0', '1', '2']) + >>> seq.append('3') + >>> view + SequenceView(['0', '1', '2', '3']) + + Sequence views support indexing, slicing, and length queries. They act + like the underlying sequence, except they don't allow assignment: + + >>> view[1] + '1' + >>> view[1:-1] + ['1', '2'] + >>> len(view) + 4 + + Sequence views are useful as an alternative to copying, as they don't + require (much) extra storage. + + """ + + def __init__(self, target): + if not isinstance(target, Sequence): + raise TypeError + self._target = target + + def __getitem__(self, index): + return self._target[index] + + def __len__(self): + return len(self._target) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, repr(self._target)) + + +class seekable: + """Wrap an iterator to allow for seeking backward and forward. This + progressively caches the items in the source iterable so they can be + re-visited. + + Call :meth:`seek` with an index to seek to that position in the source + iterable. + + To "reset" an iterator, seek to ``0``: + + >>> from itertools import count + >>> it = seekable((str(n) for n in count())) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> it.seek(0) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> next(it) + '3' + + You can also seek forward: + + >>> it = seekable((str(n) for n in range(20))) + >>> it.seek(10) + >>> next(it) + '10' + >>> it.seek(20) # Seeking past the end of the source isn't a problem + >>> list(it) + [] + >>> it.seek(0) # Resetting works even after hitting the end + >>> next(it), next(it), next(it) + ('0', '1', '2') + + Call :meth:`peek` to look ahead one item without advancing the iterator: + + >>> it = seekable('1234') + >>> it.peek() + '1' + >>> list(it) + ['1', '2', '3', '4'] + >>> it.peek(default='empty') + 'empty' + + Before the iterator is at its end, calling :func:`bool` on it will return + ``True``. After it will return ``False``: + + >>> it = seekable('5678') + >>> bool(it) + True + >>> list(it) + ['5', '6', '7', '8'] + >>> bool(it) + False + + You may view the contents of the cache with the :meth:`elements` method. + That returns a :class:`SequenceView`, a view that updates automatically: + + >>> it = seekable((str(n) for n in range(10))) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> elements = it.elements() + >>> elements + SequenceView(['0', '1', '2']) + >>> next(it) + '3' + >>> elements + SequenceView(['0', '1', '2', '3']) + + By default, the cache grows as the source iterable progresses, so beware of + wrapping very large or infinite iterables. Supply *maxlen* to limit the + size of the cache (this of course limits how far back you can seek). + + >>> from itertools import count + >>> it = seekable((str(n) for n in count()), maxlen=2) + >>> next(it), next(it), next(it), next(it) + ('0', '1', '2', '3') + >>> list(it.elements()) + ['2', '3'] + >>> it.seek(0) + >>> next(it), next(it), next(it), next(it) + ('2', '3', '4', '5') + >>> next(it) + '6' + + """ + + def __init__(self, iterable, maxlen=None): + self._source = iter(iterable) + if maxlen is None: + self._cache = [] + else: + self._cache = deque([], maxlen) + self._index = None + + def __iter__(self): + return self + + def __next__(self): + if self._index is not None: + try: + item = self._cache[self._index] + except IndexError: + self._index = None + else: + self._index += 1 + return item + + item = next(self._source) + self._cache.append(item) + return item + + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + try: + peeked = next(self) + except StopIteration: + if default is _marker: + raise + return default + if self._index is None: + self._index = len(self._cache) + self._index -= 1 + return peeked + + def elements(self): + return SequenceView(self._cache) + + def seek(self, index): + self._index = index + remainder = index - len(self._cache) + if remainder > 0: + consume(self, remainder) + + +class run_length: + """ + :func:`run_length.encode` compresses an iterable with run-length encoding. + It yields groups of repeated items with the count of how many times they + were repeated: + + >>> uncompressed = 'abbcccdddd' + >>> list(run_length.encode(uncompressed)) + [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + + :func:`run_length.decode` decompresses an iterable that was previously + compressed with run-length encoding. It yields the items of the + decompressed iterable: + + >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> list(run_length.decode(compressed)) + ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd'] + + """ + + @staticmethod + def encode(iterable): + return ((k, ilen(g)) for k, g in groupby(iterable)) + + @staticmethod + def decode(iterable): + return chain.from_iterable(repeat(k, n) for k, n in iterable) + + +def exactly_n(iterable, n, predicate=bool): + """Return ``True`` if exactly ``n`` items in the iterable are ``True`` + according to the *predicate* function. + + >>> exactly_n([True, True, False], 2) + True + >>> exactly_n([True, True, False], 1) + False + >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3) + True + + The iterable will be advanced until ``n + 1`` truthy items are encountered, + so avoid calling it on infinite iterables. + + """ + return len(take(n + 1, filter(predicate, iterable))) == n + + +def circular_shifts(iterable): + """Return a list of circular shifts of *iterable*. + + >>> circular_shifts(range(4)) + [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)] + """ + lst = list(iterable) + return take(len(lst), windowed(cycle(lst), len(lst))) + + +def make_decorator(wrapping_func, result_index=0): + """Return a decorator version of *wrapping_func*, which is a function that + modifies an iterable. *result_index* is the position in that function's + signature where the iterable goes. + + This lets you use itertools on the "production end," i.e. at function + definition. This can augment what the function returns without changing the + function's code. + + For example, to produce a decorator version of :func:`chunked`: + + >>> from more_itertools import chunked + >>> chunker = make_decorator(chunked, result_index=0) + >>> @chunker(3) + ... def iter_range(n): + ... return iter(range(n)) + ... + >>> list(iter_range(9)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + + To only allow truthy items to be returned: + + >>> truth_serum = make_decorator(filter, result_index=1) + >>> @truth_serum(bool) + ... def boolean_test(): + ... return [0, 1, '', ' ', False, True] + ... + >>> list(boolean_test()) + [1, ' ', True] + + The :func:`peekable` and :func:`seekable` wrappers make for practical + decorators: + + >>> from more_itertools import peekable + >>> peekable_function = make_decorator(peekable) + >>> @peekable_function() + ... def str_range(*args): + ... return (str(x) for x in range(*args)) + ... + >>> it = str_range(1, 20, 2) + >>> next(it), next(it), next(it) + ('1', '3', '5') + >>> it.peek() + '7' + >>> next(it) + '7' + + """ \ No newline at end of file From c2e6d474eb842b96eeb9ba56bbd188575f89f279 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 4 Jul 2022 22:05:10 -0400 Subject: [PATCH 08/10] Increase test deadline --- code_data/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code_data/test.py b/code_data/test.py index 963f3c5..7026470 100644 --- a/code_data/test.py +++ b/code_data/test.py @@ -154,7 +154,7 @@ def test_modules(): @settings( suppress_health_check=(HealthCheck.filter_too_much, HealthCheck.too_slow), deadline=timedelta( - milliseconds=1000 + milliseconds=2000 ), # increase deadline to account for slow times in CI ) def test_generated(source_code): From 19921484d741550db52e2c2ac819fa39d8be4ce2 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 5 Jul 2022 17:41:27 -0400 Subject: [PATCH 09/10] Add support for function docstrings and repeated consts --- code_data/__init__.py | 70 +- code_data/blocks.py | 40 +- code_data/cli.py | 4 +- code_data/dataclass_hide_default.py | 12 +- code_data/test.py | 152 +- .../setuptools._vendor.more_itertools.more.py | 1224 +---------------- 6 files changed, 200 insertions(+), 1302 deletions(-) diff --git a/code_data/__init__.py b/code_data/__init__.py index 52f473a..0d83f07 100644 --- a/code_data/__init__.py +++ b/code_data/__init__.py @@ -56,6 +56,9 @@ class CodeData(DataclassHideDefault): # Mapping of index in the names list to the name additional_constants: dict[int, ConstantDataType] = field(default_factory=dict) + # The type of block this is + type: BlockType = field(default=None) + # number of arguments (not including keyword only arguments, * or ** args) argcount: int = field(default=0) @@ -92,6 +95,11 @@ def _verify(self) -> None: verify_block(self.blocks) +# Functions should have both of these flags set +# https://github.com/python/cpython/blob/443370d8acd107da235d2e9758e06ab3583be4ea/Python/compile.c#L5348 +FN_FLAGS = {"NEWLOCALS", "OPTIMIZED"} + + def to_code_data(code: CodeType) -> CodeData: """ Parse a CodeType into python data structure. @@ -107,9 +115,31 @@ def to_code_data(code: CodeType) -> CodeData: first_line_number_override = line_mapping.set_first_line(code.co_firstlineno) constants = tuple(map(to_code_constant, code.co_consts)) + + flag_data = to_flags_data(code.co_flags) + + fn_flags = flag_data & FN_FLAGS + if len(fn_flags) == 0: + block_type = None + elif len(fn_flags) == 2: + # Use the first const as a docstring if its a string + # https://github.com/python/cpython/blob/da8be157f4e275c4c32b9199f1466ed7e52f62cf/Objects/funcobject.c#L33-L38 + # TODO: Maybe just assume that first arg is not docstring if it's none? Naw... + docstring_in_consts = False + docstring: Optional[str] = None + if constants: + first_constant = constants[0] + if isinstance(first_constant, str) or first_constant is None: + docstring_in_consts = True + docstring = first_constant + block_type = FunctionBlock(docstring, docstring_in_consts) + flag_data -= FN_FLAGS + else: + raise ValueError(f"Expected both flags to represent function: {fn_flags}") + # retrieve the blocks and pop off used line mapping blocks, additional_names, additional_constants = bytes_to_blocks( - code.co_code, line_mapping, code.co_names, constants + code.co_code, line_mapping, code.co_names, constants, block_type ) return CodeData( blocks, @@ -117,12 +147,13 @@ def to_code_data(code: CodeType) -> CodeData: first_line_number_override, additional_names, additional_constants, + block_type, code.co_argcount, posonlyargcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize, - to_flags_data(code.co_flags), + flag_data, code.co_varnames, code.co_filename, code.co_name, @@ -137,9 +168,15 @@ def from_code_data(code_data: CodeData) -> CodeType: :rtype: types.CodeType """ - flags = from_flags_data(code_data.flags) + flags_data = code_data.flags + if isinstance(code_data.type, FunctionBlock): + flags_data = FN_FLAGS | flags_data + flags = from_flags_data(flags_data) code, line_mapping, names, constants = blocks_to_bytes( - code_data.blocks, code_data.additional_names, code_data.additional_constants + code_data.blocks, + code_data.additional_names, + code_data.additional_constants, + code_data.type, ) consts = tuple(map(from_code_constant, constants)) @@ -189,6 +226,19 @@ def from_code_data(code_data: CodeData) -> CodeType: ) +# The type of block this is, as we can infer from the flags. +# https://github.com/python/cpython/blob/5506d603021518eaaa89e7037905f7a698c5e95c/Include/symtable.h#L13 +BlockType = Union["FunctionBlock", None] + + +@dataclass +class FunctionBlock(DataclassHideDefault): + docstring: Optional[str] = field(default=None) + # Set to false if the docstring is not saved as a constant. In this case, it + # must be 0. This happens for list comprehensions + docstring_in_consts: bool = field(default=True) + + ConstantDataType = Union[ "ConstantInt", str, @@ -241,24 +291,24 @@ def from_code_constant(value: ConstantDataType) -> object: @dataclass(frozen=True) -class ConstantBool: +class ConstantBool(DataclassHideDefault): value: bool = field(metadata={"positional": True}) @dataclass(frozen=True) -class ConstantInt: +class ConstantInt(DataclassHideDefault): value: int = field(metadata={"positional": True}) @dataclass(frozen=True) -class ConstantFloat: +class ConstantFloat(DataclassHideDefault): value: float = field(metadata={"positional": True}) # Store if the value is negative 0, so that == distinguishes between 0.0 and -0.0 is_neg_zero: bool = field(default=False) @dataclass(frozen=True) -class ConstantComplex: +class ConstantComplex(DataclassHideDefault): value: complex = field(metadata={"positional": True}) # Store if the value is negative 0, so that == distinguishes between 0.0 and -0.0 real_is_neg_zero: bool = field(default=False) @@ -270,7 +320,9 @@ class ConstantComplex: # https://github.com/python/mypy/issues/731 @dataclass(frozen=True) class ConstantTuple(DataclassHideDefault): - value: Tuple[ConstantDataType, ...] = field(metadata={"positional": True}) + # Make not positional until rich supports positional tuples + # https://github.com/Textualize/rich/pull/2379 + value: tuple[ConstantDataType, ...] = field(metadata={"positional": False}) @dataclass(frozen=True) diff --git a/code_data/blocks.py b/code_data/blocks.py index a51f8a5..03b28a4 100644 --- a/code_data/blocks.py +++ b/code_data/blocks.py @@ -11,12 +11,11 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union -from code_data.line_mapping import LineMapping - from .dataclass_hide_default import DataclassHideDefault +from .line_mapping import LineMapping if TYPE_CHECKING: - from . import ConstantDataType + from . import BlockType, ConstantDataType def bytes_to_blocks( @@ -24,10 +23,13 @@ def bytes_to_blocks( line_mapping: LineMapping, names: tuple[str, ...], constants: tuple[ConstantDataType, ...], + block_type: BlockType, ) -> tuple[Blocks, dict[int, str], dict[int, ConstantDataType]]: """ Parse a sequence of bytes as a sequence of blocks of instructions. """ + from . import FunctionBlock + # First, iterate through bytes to make instructions while also making set of all the # targets # List of bytecode offsets and instructions @@ -42,13 +44,30 @@ def bytes_to_blocks( # For recording what constants we have found to understand the order of the # constants found_constants: list[ConstantDataType] = [] + # Keep a set of the constant indices we have found, so we can check this against + # our initial constants at the end to see which we still have to add + # We should do the same with names, but checking against found_names works + # fine as long as names aren't duplicated, which they don't seem to be in the + # same way as constants + found_constant_indices: set[int] = set() + # If we have a function block, the first constant is the docstring. + if isinstance(block_type, FunctionBlock) and block_type.docstring_in_consts: + found_constants.append(block_type.docstring) + found_constant_indices.add(0) for opcode, arg, n_args, offset, next_offset in _parse_bytes(b): # Compute the jump targets, initially with just the byte offset # Once we know all the block targets, we will transform to be block offsets processed_arg = to_arg( - opcode, arg, next_offset, names, found_names, constants, found_constants + opcode, + arg, + next_offset, + names, + found_names, + constants, + found_constants, + found_constant_indices, ) if isinstance(processed_arg, Jump): targets_set.add(processed_arg.target) @@ -96,7 +115,7 @@ def bytes_to_blocks( additional_constants = { i: constant for i, constant in enumerate(constants) - if constant not in found_constants + if i not in found_constant_indices } return ( {i: block for i, block in enumerate(blocks)}, @@ -109,7 +128,10 @@ def blocks_to_bytes( blocks: Blocks, additional_names: dict[int, str], additional_consts: dict[int, ConstantDataType], + block_type: BlockType, ) -> Tuple[bytes, LineMapping, tuple[str, ...], tuple[ConstantDataType, ...]]: + from . import FunctionBlock + # First compute mapping from block to offset changed_instruction_lengths = True # So that we know the bytecode offsets for jumps when iterating though instructions @@ -120,6 +142,7 @@ def blocks_to_bytes( # List of names we have collected from the instructions names: list[str] = [] + # Mapping of name index to final name positions name_final_positions: dict[int, int] = {} @@ -128,6 +151,11 @@ def blocks_to_bytes( # Mapping of constant index to constant name positions constant_final_positions: dict[int, int] = {} + # If it is a function block, we start with the docstring + if isinstance(block_type, FunctionBlock) and block_type.docstring_in_consts: + constants.append(block_type.docstring) + constant_final_positions[0] = 0 + # Iterate through all blocks and change jump instructions to offsets while changed_instruction_lengths: @@ -236,6 +264,7 @@ def to_arg( found_names: list[str], consts: tuple[ConstantDataType, ...], found_constants: list[ConstantDataType], + found_constant_indices: set[int], ) -> Arg: if opcode in dis.hasjabs: return Jump((2 if _ATLEAST_310 else 1) * arg, False) @@ -249,6 +278,7 @@ def to_arg( wrong_position = found_names.index(name) != arg return Name(name, arg if wrong_position else None) elif opcode in dis.hasconst: + found_constant_indices.add(arg) constant = consts[arg] if constant not in found_constants: found_constants.append(constant) diff --git a/code_data/cli.py b/code_data/cli.py index f8432f7..4bb575c 100644 --- a/code_data/cli.py +++ b/code_data/cli.py @@ -29,6 +29,7 @@ parser.add_argument("--source", action="store_true", help="print the source code") +# TODO: #51 Add tests for CLI def main(): """ Parse the CLI commands and print the code data. @@ -75,4 +76,5 @@ def main(): code_data = to_code_data(code) console.print(code_data) if show_dis_after: - dis.dis(from_code_data(code_data)) + res = from_code_data(code_data) + dis.dis(res) diff --git a/code_data/dataclass_hide_default.py b/code_data/dataclass_hide_default.py index 03d43bb..9bbaa33 100644 --- a/code_data/dataclass_hide_default.py +++ b/code_data/dataclass_hide_default.py @@ -1,4 +1,10 @@ +from __future__ import annotations + from dataclasses import MISSING, fields +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import rich class DataclassHideDefault: @@ -14,13 +20,13 @@ class DataclassHideDefault: https://rich.readthedocs.io/en/stable/pretty.html """ - def __rich_repr__(self): + def __rich_repr__(self) -> rich.repr.Result: for f in fields(self): if not f.repr: continue - if f.default_factory is not MISSING: # type: ignore + if f.default_factory != MISSING: # type: ignore default = f.default_factory() - elif f.default is not MISSING: + elif f.default != MISSING: default = f.default else: default = object() diff --git a/code_data/test.py b/code_data/test.py index 7026470..0b24eaa 100644 --- a/code_data/test.py +++ b/code_data/test.py @@ -117,7 +117,7 @@ def test_modules(): continue else: try: - verify_code(code) + verify_code(code, debug=False) # If this fails, its the new minimal source except Exception: source = minimized_source @@ -137,7 +137,7 @@ def test_modules(): continue else: try: - verify_code(code) + verify_code(code, debug=False) # If this fails, its the new minimal source except Exception: source = minimized_source @@ -147,7 +147,8 @@ def test_modules(): path = EXAMPLES_DIR / f"{name}.py" path.write_text(source) progress.console.print(f"Wrote minimized source to {path}") - assert False + code = compile(source, str(path), "exec") + verify_code(code) @given(source_code=hypothesmith.from_node()) @@ -165,18 +166,36 @@ def test_generated(source_code): verify_code(code) -def verify_code(code: CodeType) -> None: +def verify_code(code: CodeType, debug=True) -> None: code_data = to_code_data(code) code_data._verify() resulting_code = from_code_data(code_data) - # First compare as primitives, for better diffing if they aren't equal - assert code_to_primitives(code, verify_line_mappings=True) == code_to_primitives( - resulting_code, verify_line_mappings=False - ) + # If we aren't debugging just assert they are equal + if not debug: + assert code == resulting_code - # Then compare objects directly, for greater equality confidence + # Otherwise, we want to get a more granular error message, if possible + code_equal = code == resulting_code + if code_equal: + return + + # Otherwise, we start analyzing the code in more granular ways to try to narrow + # down which part of the code object is different. + + # First, we check if the primitives are the same, minus the line table + assert code_to_primitives(code) == code_to_primitives(resulting_code) + + # If they are the same, we can check if the line table is the same + verify_line_mapping(code, resulting_code) + + # If the line table is the same, we can verify that the constant keys are the same + verify_constant_keys(code, resulting_code) + + # If all those are the same, then we aren't sure why the code objects are different + # and just assert they are equal assert code == resulting_code + # We used to compare the marhshalled bytes as well, but this was unstable # due to whether the constants had refernces to them, so we disabled it @@ -190,20 +209,21 @@ def verify_code(code: CodeType) -> None: # TODO: look at how co_lines works and make sure we can duplicate logic for mapping # https://docs.python.org/3/whatsnew/3.10.html?highlight=co_lines#pep-626-precise-line-numbers-for-debugging-and-other-tools and name != "co_lines" + # Also ignore lines + and name != "co_lnotab" and name != "co_linetable" ) -def code_to_primitives(code: CodeType, verify_line_mappings: bool) -> dict[str, object]: +def code_to_primitives(code: CodeType) -> dict[str, object]: """ Converts a code object to primitives, for better pytest diffing. - - Also verifies that line mapping are accurate for each """ - if verify_line_mappings: - verify_line_mapping(code) return { name: ( - consts_to_primitives(code.co_consts, verify_line_mappings) + tuple( + code_to_primitives(a) if isinstance(a, CodeType) else a + for a in code.co_consts + ) if name == "co_consts" else [(i.opname, i.argval) for i in _get_instructions_bytes(code.co_code)] if name == "co_code" @@ -217,32 +237,20 @@ def code_to_primitives(code: CodeType, verify_line_mappings: bool) -> dict[str, _PyCode_ConstantKey.restype = ctypes.py_object -def consts_to_primitives( - consts: tuple[object, ...], verify_line_mappings: bool -) -> tuple[object, ...]: +def verify_constant_keys(code: CodeType, resulting_code: CodeType) -> None: """ - Transforms code constants into primitives which are easier to see the diff - of for comparison. + Verifies that the constant keys are the same in the code object. """ + for l, r in zip(code.co_consts, resulting_code.co_consts): + if isinstance(l, CodeType): + verify_constant_keys(l, r) + else: + assert _PyCode_ConstantKey(ctypes.py_object(l)) == _PyCode_ConstantKey( + ctypes.py_object(r) + ) - consts = tuple( - code_to_primitives(a, verify_line_mappings) if isinstance(a, CodeType) - # If we have some other constant, use the same function the code object uses - # to differentiate based on the type. - else _PyCode_ConstantKey(ctypes.py_object(a)) - for a in consts - ) - return consts - -def code_to_dict(code: CodeType) -> dict[str, object]: - """ - Converts a code object to a dict for testing - """ - return {name: getattr(code, name) for name in dir(code)} - - -def verify_line_mapping(code: CodeType): +def verify_line_mapping(code: CodeType, resulting_code: CodeType) -> None: """ Verify the mapping type by testing each conversion layer and making sure they are isomorphic. @@ -250,33 +258,49 @@ def verify_line_mapping(code: CodeType): The tests are written in this way, so we can more easily which layer is causing the error. """ - # Include when we need to show locals - # _dis = dis.Bytecode(code).dis() - # print(_dis) - - b: bytes = code.co_linetable if USE_LINETABLE else code.co_lnotab # type: ignore - max_offset = len(code.co_code) - expanded_items = bytes_to_items(b) - assert items_to_bytes(expanded_items) == b, "bytes to items to bytes" - - collapsed_items = collapse_items(expanded_items, USE_LINETABLE) - assert ( - expand_items(collapsed_items, USE_LINETABLE) == expanded_items - ), "collapsed to expanded to collapsed" - - mapping = items_to_mapping(collapsed_items, max_offset, USE_LINETABLE) - assert ( - mapping_to_items(mapping, USE_LINETABLE) == collapsed_items - ), "items to mapping to items" - - assert mapping_to_line_starts(mapping, code.co_firstlineno, max_offset) == dict( - dis.findlinestarts(code) - ), "mapping matches dis.findlinestarts" - - if hasattr(code, "co_lines"): - assert mapping == co_lines_to_mapping( - cast(Any, code).co_lines(), code.co_firstlineno - ), "mapping matches dis.co_lines" + b = get_code_line_bytes(code) + # if they are not equal, try seeing where the process failed + if b != get_code_line_bytes(resulting_code): + max_offset = len(code.co_code) + expanded_items = bytes_to_items(b) + assert items_to_bytes(expanded_items) == b, "bytes to items to bytes" + + collapsed_items = collapse_items(expanded_items, USE_LINETABLE) + assert ( + expand_items(collapsed_items, USE_LINETABLE) == expanded_items + ), "collapsed to expanded to collapsed" + + mapping = items_to_mapping(collapsed_items, max_offset, USE_LINETABLE) + assert ( + mapping_to_items(mapping, USE_LINETABLE) == collapsed_items + ), "items to mapping to items" + + assert mapping_to_line_starts(mapping, code.co_firstlineno, max_offset) == dict( + dis.findlinestarts(code) + ), "mapping matches dis.findlinestarts" + + if hasattr(code, "co_lines"): + assert mapping == co_lines_to_mapping( + cast(Any, code).co_lines(), code.co_firstlineno + ), "mapping matches dis.co_lines" + + assert b == get_code_line_bytes( + resulting_code + ), "somehow line table bytes are still different" + + # Recurse on inner code objects + for const in code.co_consts: + if isinstance(const, CodeType): + verify_line_mapping(const) + + +def get_code_line_bytes(code: CodeType, offset: int) -> bytes: + """ + Get the bytes for a line of code. + """ + if USE_LINETABLE: + return code.co_linetable + return code.co_lnotab def mapping_to_line_starts( diff --git a/code_data/test_minimized/setuptools._vendor.more_itertools.more.py b/code_data/test_minimized/setuptools._vendor.more_itertools.more.py index f111aa1..d0f041e 100644 --- a/code_data/test_minimized/setuptools._vendor.more_itertools.more.py +++ b/code_data/test_minimized/setuptools._vendor.more_itertools.more.py @@ -1,1222 +1,6 @@ -def sort_together(iterables, key_list=(0,), key=None, reverse=False): - """Return the input iterables sorted together, with *key_list* as the - priority for sorting. All iterables are trimmed to the length of the - shortest one. +def x(a=(0,)): + pass - This can be used like the sorting function in a spreadsheet. If each - iterable represents a column of data, the key list determines which - columns are used for sorting. - By default, all iterables are sorted using the ``0``-th iterable:: - - >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')] - >>> sort_together(iterables) - [(1, 2, 3, 4), ('d', 'c', 'b', 'a')] - - Set a different key list to sort according to another iterable. - Specifying multiple keys dictates how ties are broken:: - - >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')] - >>> sort_together(iterables, key_list=(1, 2)) - [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')] - - To sort by a function of the elements of the iterable, pass a *key* - function. Its arguments are the elements of the iterables corresponding to - the key list:: - - >>> names = ('a', 'b', 'c') - >>> lengths = (1, 2, 3) - >>> widths = (5, 2, 1) - >>> def area(length, width): - ... return length * width - >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area) - [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)] - - Set *reverse* to ``True`` to sort in descending order. - - >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True) - [(3, 2, 1), ('a', 'b', 'c')] - - """ - if key is None: - # if there is no key function, the key argument to sorted is an - # itemgetter - key_argument = itemgetter(*key_list) - else: - # if there is a key function, call it with the items at the offsets - # specified by the key function as arguments - key_list = list(key_list) - if len(key_list) == 1: - # if key_list contains a single item, pass the item at that offset - # as the only argument to the key function - key_offset = key_list[0] - key_argument = lambda zipped_items: key(zipped_items[key_offset]) - else: - # if key_list contains multiple items, use itemgetter to return a - # tuple of items, which we pass as *args to the key function - get_key_items = itemgetter(*key_list) - key_argument = lambda zipped_items: key( - *get_key_items(zipped_items) - ) - - return list( - zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse)) - ) - - -def unzip(iterable): - """The inverse of :func:`zip`, this function disaggregates the elements - of the zipped *iterable*. - - The ``i``-th iterable contains the ``i``-th element from each element - of the zipped iterable. The first element is used to to determine the - length of the remaining elements. - - >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] - >>> letters, numbers = unzip(iterable) - >>> list(letters) - ['a', 'b', 'c', 'd'] - >>> list(numbers) - [1, 2, 3, 4] - - This is similar to using ``zip(*iterable)``, but it avoids reading - *iterable* into memory. Note, however, that this function uses - :func:`itertools.tee` and thus may require significant storage. - - """ - head, iterable = spy(iter(iterable)) - if not head: - # empty iterable, e.g. zip([], [], []) - return () - # spy returns a one-length iterable as head - head = head[0] - iterables = tee(iterable, len(head)) - - def itemgetter(i): - def getter(obj): - try: - return obj[i] - except IndexError: - # basically if we have an iterable like - # iter([(1, 2, 3), (4, 5), (6,)]) - # the second unzipped iterable would fail at the third tuple - # since it would try to access tup[1] - # same with the third unzipped iterable and the second tuple - # to support these "improperly zipped" iterables, - # we create a custom itemgetter - # which just stops the unzipped iterables - # at first length mismatch - raise StopIteration - - return getter - - return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables)) - - -def divide(n, iterable): - """Divide the elements from *iterable* into *n* parts, maintaining - order. - - >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6]) - >>> list(group_1) - [1, 2, 3] - >>> list(group_2) - [4, 5, 6] - - If the length of *iterable* is not evenly divisible by *n*, then the - length of the returned iterables will not be identical: - - >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7]) - >>> [list(c) for c in children] - [[1, 2, 3], [4, 5], [6, 7]] - - If the length of the iterable is smaller than n, then the last returned - iterables will be empty: - - >>> children = divide(5, [1, 2, 3]) - >>> [list(c) for c in children] - [[1], [2], [3], [], []] - - This function will exhaust the iterable before returning and may require - significant storage. If order is not important, see :func:`distribute`, - which does not first pull the iterable into memory. - - """ - if n < 1: - raise ValueError('n must be at least 1') - - try: - iterable[:0] - except TypeError: - seq = tuple(iterable) - else: - seq = iterable - - q, r = divmod(len(seq), n) - - ret = [] - stop = 0 - for i in range(1, n + 1): - start = stop - stop += q + 1 if i <= r else q - ret.append(iter(seq[start:stop])) - - return ret - - -def always_iterable(obj, base_type=(str, bytes)): - """If *obj* is iterable, return an iterator over its items:: - - >>> obj = (1, 2, 3) - >>> list(always_iterable(obj)) - [1, 2, 3] - - If *obj* is not iterable, return a one-item iterable containing *obj*:: - - >>> obj = 1 - >>> list(always_iterable(obj)) - [1] - - If *obj* is ``None``, return an empty iterable: - - >>> obj = None - >>> list(always_iterable(None)) - [] - - By default, binary and text strings are not considered iterable:: - - >>> obj = 'foo' - >>> list(always_iterable(obj)) - ['foo'] - - If *base_type* is set, objects for which ``isinstance(obj, base_type)`` - returns ``True`` won't be considered iterable. - - >>> obj = {'a': 1} - >>> list(always_iterable(obj)) # Iterate over the dict's keys - ['a'] - >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit - [{'a': 1}] - - Set *base_type* to ``None`` to avoid any special handling and treat objects - Python considers iterable as iterable: - - >>> obj = 'foo' - >>> list(always_iterable(obj, base_type=None)) - ['f', 'o', 'o'] - """ - if obj is None: - return iter(()) - - if (base_type is not None) and isinstance(obj, base_type): - return iter((obj,)) - - try: - return iter(obj) - except TypeError: - return iter((obj,)) - - -def adjacent(predicate, iterable, distance=1): - """Return an iterable over `(bool, item)` tuples where the `item` is - drawn from *iterable* and the `bool` indicates whether - that item satisfies the *predicate* or is adjacent to an item that does. - - For example, to find whether items are adjacent to a ``3``:: - - >>> list(adjacent(lambda x: x == 3, range(6))) - [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)] - - Set *distance* to change what counts as adjacent. For example, to find - whether items are two places away from a ``3``: - - >>> list(adjacent(lambda x: x == 3, range(6), distance=2)) - [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)] - - This is useful for contextualizing the results of a search function. - For example, a code comparison tool might want to identify lines that - have changed, but also surrounding lines to give the viewer of the diff - context. - - The predicate function will only be called once for each item in the - iterable. - - See also :func:`groupby_transform`, which can be used with this function - to group ranges of items with the same `bool` value. - - """ - # Allow distance=0 mainly for testing that it reproduces results with map() - if distance < 0: - raise ValueError('distance must be at least 0') - - i1, i2 = tee(iterable) - padding = [False] * distance - selected = chain(padding, map(predicate, i1), padding) - adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1)) - return zip(adjacent_to_selected, i2) - - -def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None): - """An extension of :func:`itertools.groupby` that can apply transformations - to the grouped data. - - * *keyfunc* is a function computing a key value for each item in *iterable* - * *valuefunc* is a function that transforms the individual items from - *iterable* after grouping - * *reducefunc* is a function that transforms each group of items - - >>> iterable = 'aAAbBBcCC' - >>> keyfunc = lambda k: k.upper() - >>> valuefunc = lambda v: v.lower() - >>> reducefunc = lambda g: ''.join(g) - >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc)) - [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')] - - Each optional argument defaults to an identity function if not specified. - - :func:`groupby_transform` is useful when grouping elements of an iterable - using a separate iterable as the key. To do this, :func:`zip` the iterables - and pass a *keyfunc* that extracts the first element and a *valuefunc* - that extracts the second element:: - - >>> from operator import itemgetter - >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3] - >>> values = 'abcdefghi' - >>> iterable = zip(keys, values) - >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1)) - >>> [(k, ''.join(g)) for k, g in grouper] - [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')] - - Note that the order of items in the iterable is significant. - Only adjacent items are grouped together, so if you don't want any - duplicate groups, you should sort the iterable by the key function. - - """ - ret = groupby(iterable, keyfunc) - if valuefunc: - ret = ((k, map(valuefunc, g)) for k, g in ret) - if reducefunc: - ret = ((k, reducefunc(g)) for k, g in ret) - - return ret - - -class numeric_range(abc.Sequence, abc.Hashable): - """An extension of the built-in ``range()`` function whose arguments can - be any orderable numeric type. - - With only *stop* specified, *start* defaults to ``0`` and *step* - defaults to ``1``. The output items will match the type of *stop*: - - >>> list(numeric_range(3.5)) - [0.0, 1.0, 2.0, 3.0] - - With only *start* and *stop* specified, *step* defaults to ``1``. The - output items will match the type of *start*: - - >>> from decimal import Decimal - >>> start = Decimal('2.1') - >>> stop = Decimal('5.1') - >>> list(numeric_range(start, stop)) - [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')] - - With *start*, *stop*, and *step* specified the output items will match - the type of ``start + step``: - - >>> from fractions import Fraction - >>> start = Fraction(1, 2) # Start at 1/2 - >>> stop = Fraction(5, 2) # End at 5/2 - >>> step = Fraction(1, 2) # Count by 1/2 - >>> list(numeric_range(start, stop, step)) - [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)] - - If *step* is zero, ``ValueError`` is raised. Negative steps are supported: - - >>> list(numeric_range(3, -1, -1.0)) - [3.0, 2.0, 1.0, 0.0] - - Be aware of the limitations of floating point numbers; the representation - of the yielded numbers may be surprising. - - ``datetime.datetime`` objects can be used for *start* and *stop*, if *step* - is a ``datetime.timedelta`` object: - - >>> import datetime - >>> start = datetime.datetime(2019, 1, 1) - >>> stop = datetime.datetime(2019, 1, 3) - >>> step = datetime.timedelta(days=1) - >>> items = iter(numeric_range(start, stop, step)) - >>> next(items) - datetime.datetime(2019, 1, 1, 0, 0) - >>> next(items) - datetime.datetime(2019, 1, 2, 0, 0) - - """ - - _EMPTY_HASH = hash(range(0, 0)) - - def __init__(self, *args): - argc = len(args) - if argc == 1: - (self._stop,) = args - self._start = type(self._stop)(0) - self._step = type(self._stop - self._start)(1) - elif argc == 2: - self._start, self._stop = args - self._step = type(self._stop - self._start)(1) - elif argc == 3: - self._start, self._stop, self._step = args - elif argc == 0: - raise TypeError( - 'numeric_range expected at least ' - '1 argument, got {}'.format(argc) - ) - else: - raise TypeError( - 'numeric_range expected at most ' - '3 arguments, got {}'.format(argc) - ) - - self._zero = type(self._step)(0) - if self._step == self._zero: - raise ValueError('numeric_range() arg 3 must not be zero') - self._growing = self._step > self._zero - self._init_len() - - def __bool__(self): - if self._growing: - return self._start < self._stop - else: - return self._start > self._stop - - def __contains__(self, elem): - if self._growing: - if self._start <= elem < self._stop: - return (elem - self._start) % self._step == self._zero - else: - if self._start >= elem > self._stop: - return (self._start - elem) % (-self._step) == self._zero - - return False - - def __eq__(self, other): - if isinstance(other, numeric_range): - empty_self = not bool(self) - empty_other = not bool(other) - if empty_self or empty_other: - return empty_self and empty_other # True if both empty - else: - return ( - self._start == other._start - and self._step == other._step - and self._get_by_index(-1) == other._get_by_index(-1) - ) - else: - return False - - def __getitem__(self, key): - if isinstance(key, int): - return self._get_by_index(key) - elif isinstance(key, slice): - step = self._step if key.step is None else key.step * self._step - - if key.start is None or key.start <= -self._len: - start = self._start - elif key.start >= self._len: - start = self._stop - else: # -self._len < key.start < self._len - start = self._get_by_index(key.start) - - if key.stop is None or key.stop >= self._len: - stop = self._stop - elif key.stop <= -self._len: - stop = self._start - else: # -self._len < key.stop < self._len - stop = self._get_by_index(key.stop) - - return numeric_range(start, stop, step) - else: - raise TypeError( - 'numeric range indices must be ' - 'integers or slices, not {}'.format(type(key).__name__) - ) - - def __hash__(self): - if self: - return hash((self._start, self._get_by_index(-1), self._step)) - else: - return self._EMPTY_HASH - - def __iter__(self): - values = (self._start + (n * self._step) for n in count()) - if self._growing: - return takewhile(partial(gt, self._stop), values) - else: - return takewhile(partial(lt, self._stop), values) - - def __len__(self): - return self._len - - def _init_len(self): - if self._growing: - start = self._start - stop = self._stop - step = self._step - else: - start = self._stop - stop = self._start - step = -self._step - distance = stop - start - if distance <= self._zero: - self._len = 0 - else: # distance > 0 and step > 0: regular euclidean division - q, r = divmod(distance, step) - self._len = int(q) + int(r != self._zero) - - def __reduce__(self): - return numeric_range, (self._start, self._stop, self._step) - - def __repr__(self): - if self._step == 1: - return "numeric_range({}, {})".format( - repr(self._start), repr(self._stop) - ) - else: - return "numeric_range({}, {}, {})".format( - repr(self._start), repr(self._stop), repr(self._step) - ) - - def __reversed__(self): - return iter( - numeric_range( - self._get_by_index(-1), self._start - self._step, -self._step - ) - ) - - def count(self, value): - return int(value in self) - - def index(self, value): - if self._growing: - if self._start <= value < self._stop: - q, r = divmod(value - self._start, self._step) - if r == self._zero: - return int(q) - else: - if self._start >= value > self._stop: - q, r = divmod(self._start - value, -self._step) - if r == self._zero: - return int(q) - - raise ValueError("{} is not in numeric range".format(value)) - - def _get_by_index(self, i): - if i < 0: - i += self._len - if i < 0 or i >= self._len: - raise IndexError("numeric range object index out of range") - return self._start + i * self._step - - -def count_cycle(iterable, n=None): - """Cycle through the items from *iterable* up to *n* times, yielding - the number of completed cycles along with each item. If *n* is omitted the - process repeats indefinitely. - - >>> list(count_cycle('AB', 3)) - [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')] - - """ - iterable = tuple(iterable) - if not iterable: - return iter(()) - counter = count() if n is None else range(n) - return ((i, item) for i in counter for item in iterable) - - -def mark_ends(iterable): - """Yield 3-tuples of the form ``(is_first, is_last, item)``. - - >>> list(mark_ends('ABC')) - [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')] - - Use this when looping over an iterable to take special action on its first - and/or last items: - - >>> iterable = ['Header', 100, 200, 'Footer'] - >>> total = 0 - >>> for is_first, is_last, item in mark_ends(iterable): - ... if is_first: - ... continue # Skip the header - ... if is_last: - ... continue # Skip the footer - ... total += item - >>> print(total) - 300 - """ - it = iter(iterable) - - try: - b = next(it) - except StopIteration: - return - - try: - for i in count(): - a = b - b = next(it) - yield i == 0, False, a - - except StopIteration: - yield i == 0, True, a - - -def locate(iterable, pred=bool, window_size=None): - """Yield the index of each item in *iterable* for which *pred* returns - ``True``. - - *pred* defaults to :func:`bool`, which will select truthy items: - - >>> list(locate([0, 1, 1, 0, 1, 0, 0])) - [1, 2, 4] - - Set *pred* to a custom function to, e.g., find the indexes for a particular - item. - - >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b')) - [1, 3] - - If *window_size* is given, then the *pred* function will be called with - that many items. This enables searching for sub-sequences: - - >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] - >>> pred = lambda *args: args == (1, 2, 3) - >>> list(locate(iterable, pred=pred, window_size=3)) - [1, 5, 9] - - Use with :func:`seekable` to find indexes and then retrieve the associated - items: - - >>> from itertools import count - >>> from more_itertools import seekable - >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count()) - >>> it = seekable(source) - >>> pred = lambda x: x > 100 - >>> indexes = locate(it, pred=pred) - >>> i = next(indexes) - >>> it.seek(i) - >>> next(it) - 106 - - """ - if window_size is None: - return compress(count(), map(pred, iterable)) - - if window_size < 1: - raise ValueError('window size must be at least 1') - - it = windowed(iterable, window_size, fillvalue=_marker) - return compress(count(), starmap(pred, it)) - - -def lstrip(iterable, pred): - """Yield the items from *iterable*, but strip any from the beginning - for which *pred* returns ``True``. - - For example, to remove a set of items from the start of an iterable: - - >>> iterable = (None, False, None, 1, 2, None, 3, False, None) - >>> pred = lambda x: x in {None, False, ''} - >>> list(lstrip(iterable, pred)) - [1, 2, None, 3, False, None] - - This function is analogous to to :func:`str.lstrip`, and is essentially - an wrapper for :func:`itertools.dropwhile`. - - """ - return dropwhile(pred, iterable) - - -def rstrip(iterable, pred): - """Yield the items from *iterable*, but strip any from the end - for which *pred* returns ``True``. - - For example, to remove a set of items from the end of an iterable: - - >>> iterable = (None, False, None, 1, 2, None, 3, False, None) - >>> pred = lambda x: x in {None, False, ''} - >>> list(rstrip(iterable, pred)) - [None, False, None, 1, 2, None, 3] - - This function is analogous to :func:`str.rstrip`. - - """ - cache = [] - cache_append = cache.append - cache_clear = cache.clear - for x in iterable: - if pred(x): - cache_append(x) - else: - yield from cache - cache_clear() - yield x - - -def strip(iterable, pred): - """Yield the items from *iterable*, but strip any from the - beginning and end for which *pred* returns ``True``. - - For example, to remove a set of items from both ends of an iterable: - - >>> iterable = (None, False, None, 1, 2, None, 3, False, None) - >>> pred = lambda x: x in {None, False, ''} - >>> list(strip(iterable, pred)) - [1, 2, None, 3] - - This function is analogous to :func:`str.strip`. - - """ - return rstrip(lstrip(iterable, pred), pred) - - -class islice_extended: - """An extension of :func:`itertools.islice` that supports negative values - for *stop*, *start*, and *step*. - - >>> iterable = iter('abcdefgh') - >>> list(islice_extended(iterable, -4, -1)) - ['e', 'f', 'g'] - - Slices with negative values require some caching of *iterable*, but this - function takes care to minimize the amount of memory required. - - For example, you can use a negative step with an infinite iterator: - - >>> from itertools import count - >>> list(islice_extended(count(), 110, 99, -2)) - [110, 108, 106, 104, 102, 100] - - You can also use slice notation directly: - - >>> iterable = map(str, count()) - >>> it = islice_extended(iterable)[10:20:2] - >>> list(it) - ['10', '12', '14', '16', '18'] - - """ - - def __init__(self, iterable, *args): - it = iter(iterable) - if args: - self._iterable = _islice_helper(it, slice(*args)) - else: - self._iterable = it - - def __iter__(self): - return self - - def __next__(self): - return next(self._iterable) - - def __getitem__(self, key): - if isinstance(key, slice): - return islice_extended(_islice_helper(self._iterable, key)) - - raise TypeError('islice_extended.__getitem__ argument must be a slice') - - -def _islice_helper(it, s): - start = s.start - stop = s.stop - if s.step == 0: - raise ValueError('step argument must be a non-zero integer or None.') - step = s.step or 1 - - if step > 0: - start = 0 if (start is None) else start - - if start < 0: - # Consume all but the last -start items - cache = deque(enumerate(it, 1), maxlen=-start) - len_iter = cache[-1][0] if cache else 0 - - # Adjust start to be positive - i = max(len_iter + start, 0) - - # Adjust stop to be positive - if stop is None: - j = len_iter - elif stop >= 0: - j = min(stop, len_iter) - else: - j = max(len_iter + stop, 0) - - # Slice the cache - n = j - i - if n <= 0: - return - - for index, item in islice(cache, 0, n, step): - yield item - elif (stop is not None) and (stop < 0): - # Advance to the start position - next(islice(it, start, start), None) - - # When stop is negative, we have to carry -stop items while - # iterating - cache = deque(islice(it, -stop), maxlen=-stop) - - for index, item in enumerate(it): - cached_item = cache.popleft() - if index % step == 0: - yield cached_item - cache.append(item) - else: - # When both start and stop are positive we have the normal case - yield from islice(it, start, stop, step) - else: - start = -1 if (start is None) else start - - if (stop is not None) and (stop < 0): - # Consume all but the last items - n = -stop - 1 - cache = deque(enumerate(it, 1), maxlen=n) - len_iter = cache[-1][0] if cache else 0 - - # If start and stop are both negative they are comparable and - # we can just slice. Otherwise we can adjust start to be negative - # and then slice. - if start < 0: - i, j = start, stop - else: - i, j = min(start - len_iter, -1), None - - for index, item in list(cache)[i:j:step]: - yield item - else: - # Advance to the stop position - if stop is not None: - m = stop + 1 - next(islice(it, m, m), None) - - # stop is positive, so if start is negative they are not comparable - # and we need the rest of the items. - if start < 0: - i = start - n = None - # stop is None and start is positive, so we just need items up to - # the start index. - elif stop is None: - i = None - n = start + 1 - # Both stop and start are positive, so they are comparable. - else: - i = None - n = start - stop - if n <= 0: - return - - cache = list(islice(it, n)) - - yield from cache[i::step] - - -def always_reversible(iterable): - """An extension of :func:`reversed` that supports all iterables, not - just those which implement the ``Reversible`` or ``Sequence`` protocols. - - >>> print(*always_reversible(x for x in range(3))) - 2 1 0 - - If the iterable is already reversible, this function returns the - result of :func:`reversed()`. If the iterable is not reversible, - this function will cache the remaining items in the iterable and - yield them in reverse order, which may require significant storage. - """ - try: - return reversed(iterable) - except TypeError: - return reversed(list(iterable)) - - -def consecutive_groups(iterable, ordering=lambda x: x): - """Yield groups of consecutive items using :func:`itertools.groupby`. - The *ordering* function determines whether two items are adjacent by - returning their position. - - By default, the ordering function is the identity function. This is - suitable for finding runs of numbers: - - >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40] - >>> for group in consecutive_groups(iterable): - ... print(list(group)) - [1] - [10, 11, 12] - [20] - [30, 31, 32, 33] - [40] - - For finding runs of adjacent letters, try using the :meth:`index` method - of a string of letters: - - >>> from string import ascii_lowercase - >>> iterable = 'abcdfgilmnop' - >>> ordering = ascii_lowercase.index - >>> for group in consecutive_groups(iterable, ordering): - ... print(list(group)) - ['a', 'b', 'c', 'd'] - ['f', 'g'] - ['i'] - ['l', 'm', 'n', 'o', 'p'] - - Each group of consecutive items is an iterator that shares it source with - *iterable*. When an an output group is advanced, the previous group is - no longer available unless its elements are copied (e.g., into a ``list``). - - >>> iterable = [1, 2, 11, 12, 21, 22] - >>> saved_groups = [] - >>> for group in consecutive_groups(iterable): - ... saved_groups.append(list(group)) # Copy group elements - >>> saved_groups - [[1, 2], [11, 12], [21, 22]] - - """ - for k, g in groupby( - enumerate(iterable), key=lambda x: x[0] - ordering(x[1]) - ): - yield map(itemgetter(1), g) - - -def difference(iterable, func=sub, *, initial=None): - """This function is the inverse of :func:`itertools.accumulate`. By default - it will compute the first difference of *iterable* using - :func:`operator.sub`: - - >>> from itertools import accumulate - >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10 - >>> list(difference(iterable)) - [0, 1, 2, 3, 4] - - *func* defaults to :func:`operator.sub`, but other functions can be - specified. They will be applied as follows:: - - A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ... - - For example, to do progressive division: - - >>> iterable = [1, 2, 6, 24, 120] - >>> func = lambda x, y: x // y - >>> list(difference(iterable, func)) - [1, 2, 3, 4, 5] - - If the *initial* keyword is set, the first element will be skipped when - computing successive differences. - - >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10) - >>> list(difference(it, initial=10)) - [1, 2, 3] - - """ - a, b = tee(iterable) - try: - first = [next(b)] - except StopIteration: - return iter([]) - - if initial is not None: - first = [] - - return chain(first, starmap(func, zip(b, a))) - - -class SequenceView(Sequence): - """Return a read-only view of the sequence object *target*. - - :class:`SequenceView` objects are analogous to Python's built-in - "dictionary view" types. They provide a dynamic view of a sequence's items, - meaning that when the sequence updates, so does the view. - - >>> seq = ['0', '1', '2'] - >>> view = SequenceView(seq) - >>> view - SequenceView(['0', '1', '2']) - >>> seq.append('3') - >>> view - SequenceView(['0', '1', '2', '3']) - - Sequence views support indexing, slicing, and length queries. They act - like the underlying sequence, except they don't allow assignment: - - >>> view[1] - '1' - >>> view[1:-1] - ['1', '2'] - >>> len(view) - 4 - - Sequence views are useful as an alternative to copying, as they don't - require (much) extra storage. - - """ - - def __init__(self, target): - if not isinstance(target, Sequence): - raise TypeError - self._target = target - - def __getitem__(self, index): - return self._target[index] - - def __len__(self): - return len(self._target) - - def __repr__(self): - return '{}({})'.format(self.__class__.__name__, repr(self._target)) - - -class seekable: - """Wrap an iterator to allow for seeking backward and forward. This - progressively caches the items in the source iterable so they can be - re-visited. - - Call :meth:`seek` with an index to seek to that position in the source - iterable. - - To "reset" an iterator, seek to ``0``: - - >>> from itertools import count - >>> it = seekable((str(n) for n in count())) - >>> next(it), next(it), next(it) - ('0', '1', '2') - >>> it.seek(0) - >>> next(it), next(it), next(it) - ('0', '1', '2') - >>> next(it) - '3' - - You can also seek forward: - - >>> it = seekable((str(n) for n in range(20))) - >>> it.seek(10) - >>> next(it) - '10' - >>> it.seek(20) # Seeking past the end of the source isn't a problem - >>> list(it) - [] - >>> it.seek(0) # Resetting works even after hitting the end - >>> next(it), next(it), next(it) - ('0', '1', '2') - - Call :meth:`peek` to look ahead one item without advancing the iterator: - - >>> it = seekable('1234') - >>> it.peek() - '1' - >>> list(it) - ['1', '2', '3', '4'] - >>> it.peek(default='empty') - 'empty' - - Before the iterator is at its end, calling :func:`bool` on it will return - ``True``. After it will return ``False``: - - >>> it = seekable('5678') - >>> bool(it) - True - >>> list(it) - ['5', '6', '7', '8'] - >>> bool(it) - False - - You may view the contents of the cache with the :meth:`elements` method. - That returns a :class:`SequenceView`, a view that updates automatically: - - >>> it = seekable((str(n) for n in range(10))) - >>> next(it), next(it), next(it) - ('0', '1', '2') - >>> elements = it.elements() - >>> elements - SequenceView(['0', '1', '2']) - >>> next(it) - '3' - >>> elements - SequenceView(['0', '1', '2', '3']) - - By default, the cache grows as the source iterable progresses, so beware of - wrapping very large or infinite iterables. Supply *maxlen* to limit the - size of the cache (this of course limits how far back you can seek). - - >>> from itertools import count - >>> it = seekable((str(n) for n in count()), maxlen=2) - >>> next(it), next(it), next(it), next(it) - ('0', '1', '2', '3') - >>> list(it.elements()) - ['2', '3'] - >>> it.seek(0) - >>> next(it), next(it), next(it), next(it) - ('2', '3', '4', '5') - >>> next(it) - '6' - - """ - - def __init__(self, iterable, maxlen=None): - self._source = iter(iterable) - if maxlen is None: - self._cache = [] - else: - self._cache = deque([], maxlen) - self._index = None - - def __iter__(self): - return self - - def __next__(self): - if self._index is not None: - try: - item = self._cache[self._index] - except IndexError: - self._index = None - else: - self._index += 1 - return item - - item = next(self._source) - self._cache.append(item) - return item - - def __bool__(self): - try: - self.peek() - except StopIteration: - return False - return True - - def peek(self, default=_marker): - try: - peeked = next(self) - except StopIteration: - if default is _marker: - raise - return default - if self._index is None: - self._index = len(self._cache) - self._index -= 1 - return peeked - - def elements(self): - return SequenceView(self._cache) - - def seek(self, index): - self._index = index - remainder = index - len(self._cache) - if remainder > 0: - consume(self, remainder) - - -class run_length: - """ - :func:`run_length.encode` compresses an iterable with run-length encoding. - It yields groups of repeated items with the count of how many times they - were repeated: - - >>> uncompressed = 'abbcccdddd' - >>> list(run_length.encode(uncompressed)) - [('a', 1), ('b', 2), ('c', 3), ('d', 4)] - - :func:`run_length.decode` decompresses an iterable that was previously - compressed with run-length encoding. It yields the items of the - decompressed iterable: - - >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] - >>> list(run_length.decode(compressed)) - ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd'] - - """ - - @staticmethod - def encode(iterable): - return ((k, ilen(g)) for k, g in groupby(iterable)) - - @staticmethod - def decode(iterable): - return chain.from_iterable(repeat(k, n) for k, n in iterable) - - -def exactly_n(iterable, n, predicate=bool): - """Return ``True`` if exactly ``n`` items in the iterable are ``True`` - according to the *predicate* function. - - >>> exactly_n([True, True, False], 2) - True - >>> exactly_n([True, True, False], 1) - False - >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3) - True - - The iterable will be advanced until ``n + 1`` truthy items are encountered, - so avoid calling it on infinite iterables. - - """ - return len(take(n + 1, filter(predicate, iterable))) == n - - -def circular_shifts(iterable): - """Return a list of circular shifts of *iterable*. - - >>> circular_shifts(range(4)) - [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)] - """ - lst = list(iterable) - return take(len(lst), windowed(cycle(lst), len(lst))) - - -def make_decorator(wrapping_func, result_index=0): - """Return a decorator version of *wrapping_func*, which is a function that - modifies an iterable. *result_index* is the position in that function's - signature where the iterable goes. - - This lets you use itertools on the "production end," i.e. at function - definition. This can augment what the function returns without changing the - function's code. - - For example, to produce a decorator version of :func:`chunked`: - - >>> from more_itertools import chunked - >>> chunker = make_decorator(chunked, result_index=0) - >>> @chunker(3) - ... def iter_range(n): - ... return iter(range(n)) - ... - >>> list(iter_range(9)) - [[0, 1, 2], [3, 4, 5], [6, 7, 8]] - - To only allow truthy items to be returned: - - >>> truth_serum = make_decorator(filter, result_index=1) - >>> @truth_serum(bool) - ... def boolean_test(): - ... return [0, 1, '', ' ', False, True] - ... - >>> list(boolean_test()) - [1, ' ', True] - - The :func:`peekable` and :func:`seekable` wrappers make for practical - decorators: - - >>> from more_itertools import peekable - >>> peekable_function = make_decorator(peekable) - >>> @peekable_function() - ... def str_range(*args): - ... return (str(x) for x in range(*args)) - ... - >>> it = str_range(1, 20, 2) - >>> next(it), next(it), next(it) - ('1', '3', '5') - >>> it.peek() - '7' - >>> next(it) - '7' - - """ \ No newline at end of file +def y(b=0): + pass From 7f39b429c76c9c4a0f43666a1691792c3f2f3476 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Wed, 6 Jul 2022 09:53:50 -0400 Subject: [PATCH 10/10] lint fixes --- code_data/test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/code_data/test.py b/code_data/test.py index 0b24eaa..e630d00 100644 --- a/code_data/test.py +++ b/code_data/test.py @@ -241,7 +241,7 @@ def verify_constant_keys(code: CodeType, resulting_code: CodeType) -> None: """ Verifies that the constant keys are the same in the code object. """ - for l, r in zip(code.co_consts, resulting_code.co_consts): + for l, r in zip(code.co_consts, resulting_code.co_consts): # noqa: E741 if isinstance(l, CodeType): verify_constant_keys(l, r) else: @@ -289,17 +289,17 @@ def verify_line_mapping(code: CodeType, resulting_code: CodeType) -> None: ), "somehow line table bytes are still different" # Recurse on inner code objects - for const in code.co_consts: + for i, const in enumerate(code.co_consts): if isinstance(const, CodeType): - verify_line_mapping(const) + verify_line_mapping(const, resulting_code.co_consts[i]) -def get_code_line_bytes(code: CodeType, offset: int) -> bytes: +def get_code_line_bytes(code: CodeType) -> bytes: """ Get the bytes for a line of code. """ if USE_LINETABLE: - return code.co_linetable + return cast(bytes, code.co_linetable) return code.co_lnotab