Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inline constants into instructions #50

Merged
merged 10 commits into from Jul 6, 2022
6 changes: 5 additions & 1 deletion .github/workflows/test.yml
Expand Up @@ -21,8 +21,12 @@ jobs:
with:
python-version: ${{ matrix.py }}
- uses: actions/checkout@v2
# TODO: #48 Only install test requirements instead of all pinned in CI
# https://github.com/actions/checkout/issues/124#issuecomment-586664611
with:
ref: ${{ github.event.pull_request.head.ref }}
- run: pip install -e . -r requirements.test.txt
# TODO: Enable dev mode for Python
# https://docs.python.org/3/library/devmode.html#devmode
- run: pytest -v code_data
- name: Commit and push failing example
if: failure()
Expand Down
147 changes: 122 additions & 25 deletions code_data/__init__.py
Expand Up @@ -52,6 +52,13 @@ class CodeData(DataclassHideDefault):
# Mapping of index in the names list to the name
additional_names: dict[int, str] = field(default_factory=dict)

# Additional constants to include, which do not appear in any instructions,
# Mapping of index in the names list to the name
additional_constants: dict[int, ConstantDataType] = field(default_factory=dict)

# The type of block this is
type: BlockType = field(default=None)

# number of arguments (not including keyword only arguments, * or ** args)
argcount: int = field(default=0)

Expand All @@ -70,9 +77,6 @@ class CodeData(DataclassHideDefault):
# code flags
flags: FlagsData = field(default_factory=set)

# All code objects are recursively transformed to CodeData objects
consts: Tuple["ConstantDataType", ...] = field(default=(None,))

# tuple of names of arguments and local variables
varnames: Tuple[str, ...] = field(default=tuple())

Expand All @@ -91,6 +95,11 @@ def _verify(self) -> None:
verify_block(self.blocks)


# Functions should have both of these flags set
# https://github.com/python/cpython/blob/443370d8acd107da235d2e9758e06ab3583be4ea/Python/compile.c#L5348
FN_FLAGS = {"NEWLOCALS", "OPTIMIZED"}


def to_code_data(code: CodeType) -> CodeData:
"""
Parse a CodeType into python data structure.
Expand All @@ -104,22 +113,47 @@ def to_code_data(code: CodeType) -> CodeData:

line_mapping = to_line_mapping(code)
first_line_number_override = line_mapping.set_first_line(code.co_firstlineno)

constants = tuple(map(to_code_constant, code.co_consts))

flag_data = to_flags_data(code.co_flags)

fn_flags = flag_data & FN_FLAGS
if len(fn_flags) == 0:
block_type = None
elif len(fn_flags) == 2:
# Use the first const as a docstring if its a string
# https://github.com/python/cpython/blob/da8be157f4e275c4c32b9199f1466ed7e52f62cf/Objects/funcobject.c#L33-L38
# TODO: Maybe just assume that first arg is not docstring if it's none? Naw...
docstring_in_consts = False
docstring: Optional[str] = None
if constants:
first_constant = constants[0]
if isinstance(first_constant, str) or first_constant is None:
docstring_in_consts = True
docstring = first_constant
block_type = FunctionBlock(docstring, docstring_in_consts)
flag_data -= FN_FLAGS
else:
raise ValueError(f"Expected both flags to represent function: {fn_flags}")

# retrieve the blocks and pop off used line mapping
blocks, additional_names = bytes_to_blocks(
code.co_code, line_mapping, code.co_names
blocks, additional_names, additional_constants = bytes_to_blocks(
code.co_code, line_mapping, code.co_names, constants, block_type
)
return CodeData(
blocks,
line_mapping,
first_line_number_override,
additional_names,
additional_constants,
block_type,
code.co_argcount,
posonlyargcount,
code.co_kwonlyargcount,
code.co_nlocals,
code.co_stacksize,
to_flags_data(code.co_flags),
tuple(map(to_code_constant, code.co_consts)),
flag_data,
code.co_varnames,
code.co_filename,
code.co_name,
Expand All @@ -134,12 +168,19 @@ def from_code_data(code_data: CodeData) -> CodeType:

:rtype: types.CodeType
"""
consts = tuple(map(from_code_constant, code_data.consts))
flags = from_flags_data(code_data.flags)
code, line_mapping, names = blocks_to_bytes(
code_data.blocks, code_data.additional_names
flags_data = code_data.flags
if isinstance(code_data.type, FunctionBlock):
flags_data = FN_FLAGS | flags_data
flags = from_flags_data(flags_data)
code, line_mapping, names, constants = blocks_to_bytes(
code_data.blocks,
code_data.additional_names,
code_data.additional_constants,
code_data.type,
)

consts = tuple(map(from_code_constant, constants))

line_mapping.update(code_data.additional_line_mapping)
first_line_no = line_mapping.trim_first_line(code_data.first_line_number_override)

Expand Down Expand Up @@ -185,19 +226,29 @@ def from_code_data(code_data: CodeData) -> CodeType:
)


# We need to wrap the data structures in dataclasses to be able to represent
# them with MyPy, since it doesn't support recursive types
# https://github.com/python/mypy/issues/731
# The type of block this is, as we can infer from the flags.
# https://github.com/python/cpython/blob/5506d603021518eaaa89e7037905f7a698c5e95c/Include/symtable.h#L13
BlockType = Union["FunctionBlock", None]


@dataclass
class FunctionBlock(DataclassHideDefault):
docstring: Optional[str] = field(default=None)
# Set to false if the docstring is not saved as a constant. In this case, it
# must be 0. This happens for list comprehensions
docstring_in_consts: bool = field(default=True)


ConstantDataType = Union[
int,
"ConstantInt",
str,
float,
"ConstantFloat",
None,
bool,
"ConstantBool",
bytes,
"EllipsisType",
CodeData,
complex,
"ConstantComplex",
"ConstantSet",
"ConstantTuple",
]
Expand All @@ -206,10 +257,16 @@ def from_code_data(code_data: CodeData) -> CodeType:
def to_code_constant(value: object) -> ConstantDataType:
if isinstance(value, CodeType):
return to_code_data(value)
if isinstance(
value, (int, str, float, type(None), bool, bytes, type(...), complex)
):
if isinstance(value, (str, type(None), bytes, type(...))):
return value
if isinstance(value, bool):
return ConstantBool(value)
if isinstance(value, int):
return ConstantInt(value)
if isinstance(value, float):
return ConstantFloat(value, is_neg_zero(value))
if isinstance(value, complex):
return ConstantComplex(value, is_neg_zero(value.real), is_neg_zero(value.imag))
if isinstance(value, tuple):
return ConstantTuple(tuple(map(to_code_constant, value)))
if isinstance(value, frozenset):
Expand All @@ -221,17 +278,57 @@ def from_code_constant(value: ConstantDataType) -> object:
if isinstance(value, CodeData):
return from_code_data(value)
if isinstance(value, ConstantTuple):
return tuple(map(from_code_constant, value.tuple))
return tuple(map(from_code_constant, value.value))
if isinstance(value, ConstantSet):
return frozenset(map(from_code_constant, value.frozenset))
return frozenset(map(from_code_constant, value.value))
if isinstance(value, (ConstantBool, ConstantInt, ConstantFloat, ConstantComplex)):
return value.value
return value


# Wrap these in types, so that, say, bytecode with constants of 1
# are not equal to bytecodes of constants of True.


@dataclass(frozen=True)
class ConstantBool(DataclassHideDefault):
value: bool = field(metadata={"positional": True})


@dataclass(frozen=True)
class ConstantInt(DataclassHideDefault):
value: int = field(metadata={"positional": True})


@dataclass(frozen=True)
class ConstantFloat(DataclassHideDefault):
value: float = field(metadata={"positional": True})
# Store if the value is negative 0, so that == distinguishes between 0.0 and -0.0
is_neg_zero: bool = field(default=False)


@dataclass(frozen=True)
class ConstantComplex(DataclassHideDefault):
value: complex = field(metadata={"positional": True})
# Store if the value is negative 0, so that == distinguishes between 0.0 and -0.0
real_is_neg_zero: bool = field(default=False)
imag_is_neg_zero: bool = field(default=False)


# We need to wrap the data structures in dataclasses to be able to represent
# them with MyPy, since it doesn't support recursive types
# https://github.com/python/mypy/issues/731
@dataclass(frozen=True)
class ConstantTuple(DataclassHideDefault):
tuple: Tuple[ConstantDataType, ...] = field(metadata={"positional": True})
# Make not positional until rich supports positional tuples
# https://github.com/Textualize/rich/pull/2379
value: tuple[ConstantDataType, ...] = field(metadata={"positional": False})


@dataclass(frozen=True)
class ConstantSet(DataclassHideDefault):
frozenset: FrozenSet[ConstantDataType] = field(metadata={"positional": True})
value: FrozenSet[ConstantDataType] = field(metadata={"positional": True})


def is_neg_zero(value: float) -> bool:
return str(value) == "-0.0"