Skip to content

Commit

Permalink
Merge pull request #77 from nneonneo/3.1.3-fixes
Browse files Browse the repository at this point in the history
Miscellaneous fixes accumulated over a year+ of use.
  • Loading branch information
nneonneo committed Apr 5, 2023
2 parents 9064936 + 1de362a commit 53d67dd
Show file tree
Hide file tree
Showing 13 changed files with 367 additions and 143 deletions.
27 changes: 9 additions & 18 deletions hachoir/core/bits.py
Expand Up @@ -4,7 +4,7 @@
"""

from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
from struct import calcsize, unpack, error as struct_error
from struct import calcsize, error as struct_error


def swap16(value):
Expand Down Expand Up @@ -292,20 +292,11 @@ def str2long(data, endian):
>>> str2long(b"\x0b\x0a\x0d\x0c", MIDDLE_ENDIAN) == 0x0a0b0c0d
True
"""
assert 1 <= len(data) <= 32 # arbitrary limit: 256 bits
try:
return unpack(_struct_format[endian][len(data)], data)[0]
except KeyError:
pass

assert endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN)
shift = 0
value = 0
if endian is BIG_ENDIAN:
data = reversed(data)
elif endian is MIDDLE_ENDIAN:
data = reversed(strswapmid(data))
for byte in data:
value += (byte << shift)
shift += 8
return value
if endian == LITTLE_ENDIAN:
return int.from_bytes(data, "little")
elif endian == BIG_ENDIAN:
return int.from_bytes(data, "big")
elif endian == MIDDLE_ENDIAN:
return int.from_bytes(strswapmid(data), "big")
else:
raise ValueError("Invalid endian %s" % (endian,))
12 changes: 8 additions & 4 deletions hachoir/parser/archive/lzx.py
Expand Up @@ -174,7 +174,7 @@ def createFields(self):
field._description = "Literal value %r" % chr(
field.realvalue)
current_decoded_size += 1
self.parent.uncompressed_data.append(field.realvalue)
self.parent._lzx_window.append(field.realvalue)
yield field
continue
position_header, length_header = divmod(
Expand Down Expand Up @@ -248,7 +248,7 @@ def createFields(self):
self.parent.r2 = self.parent.r1
self.parent.r1 = self.parent.r0
self.parent.r0 = position
extend_data(self.parent.uncompressed_data, length, position)
extend_data(self.parent._lzx_window, length, position)
current_decoded_size += length
elif self.block_type == 3: # Uncompressed block
padding = paddingSize(self.address + self.current_size, 16)
Expand All @@ -265,13 +265,16 @@ def createFields(self):
self.parent.r1 = self["r[1]"].value
self.parent.r2 = self["r[2]"].value
yield RawBytes(self, "data", self.uncompressed_size)
self.parent.uncompressed_data += self["data"].value
self.parent._lzx_window += self["data"].value
if self["block_size"].value % 2:
yield PaddingBits(self, "padding", 8)
else:
raise ParserError("Unknown block type %d!" % self.block_type)

# Fixup Intel jumps if necessary
# Fixup Intel jumps if necessary (fixups are only applied to the final output, not to the LZX window)
self.parent.uncompressed_data += self.parent._lzx_window[-self.uncompressed_size:]
self.parent._lzx_window = self.parent._lzx_window[-(1 << self.root.compr_level):]

if (
intel_started
and self.parent["filesize_indicator"].value
Expand Down Expand Up @@ -305,6 +308,7 @@ class LZXStream(Parser):

def createFields(self):
self.uncompressed_data = bytearray()
self._lzx_window = bytearray()
self.r0 = 1
self.r1 = 1
self.r2 = 1
Expand Down
8 changes: 5 additions & 3 deletions hachoir/parser/file_system/ext2.py
Expand Up @@ -240,11 +240,13 @@ def describe_file(self):
return out

def is_fast_symlink(self):
self.seekByte(4 * 15 + 4)
acl = UInt32(self, "file_acl")
acl_addr = self.absolute_address + self.current_size
# skip 15 blocks + version field
acl_addr += (4 * 15 + 4) * 8
acl = self.stream.readBits(acl_addr, 32, self.endian)

b = 0
if acl.value > 0:
if acl > 0:
b = (2 << self["/superblock/log_block_size"].value)

return (self['blocks'].value - b == 0)
Expand Down
2 changes: 1 addition & 1 deletion hachoir/parser/image/png.py
Expand Up @@ -45,7 +45,7 @@ def __call__(self, size, data=None):
COMPRESSION_NAME = {
0: "deflate" # with 32K sliding window
}
MAX_CHUNK_SIZE = 5 * 1024 * 1024 # Maximum chunk size (5 MB)
MAX_CHUNK_SIZE = 64 * 1024 * 1024 # Maximum chunk size heuristic (64 MB)


def headerParse(parent):
Expand Down
23 changes: 13 additions & 10 deletions hachoir/parser/misc/pdf.py
Expand Up @@ -44,7 +44,7 @@ def getElementEnd(s, limit=b' ', offset=0):


class PDFNumber(Field):
LIMITS = [b'[', b'/', b'\x0D', b']']
LIMITS = [b'[', b'/', b'\x0A', b'\x0D', b'>', b']']
"""
sprintf("%i") or sprinf("%.?f")
"""
Expand Down Expand Up @@ -81,18 +81,18 @@ class PDFString(Field):

def __init__(self, parent, name, desc=None):
Field.__init__(self, parent, name, description=desc)
val = ""
val = bytearray()
count = 1
off = 1
while not parent.eof:
char = parent.stream.readBytes(self.absolute_address + 8 * off, 1)
# Non-ASCII
if not char.isalpha() or char == '\\':
if not char.isalpha() or char == b'\\':
off += 1
continue
if char == '(':
if char == b'(':
count += 1
if char == ')':
if char == b')':
count -= 1
# Parenthesis block = 0 => end of string
if count == 0:
Expand All @@ -101,13 +101,15 @@ def __init__(self, parent, name, desc=None):

# Add it to the string
val += char
off += 1

val = bytes(val)
self._size = 8 * off
self.createValue = lambda: val


class PDFName(Field):
LIMITS = [b'[', b'/', b'<', b']']
LIMITS = [b'[', b'/', b'<', b'>', b']']
"""
String starting with '/', where characters may be written using their
ASCII code (exemple: '#20' would be ' '
Expand Down Expand Up @@ -145,7 +147,7 @@ class PDFID(Field):

def __init__(self, parent, name, desc=None):
Field.__init__(self, parent, name, description=desc)
self._size = 8 * getElementEnd(parent, '>')
self._size = 8 * getElementEnd(parent, b'>')
self.createValue = lambda: parent.stream.readBytes(
self.absolute_address + 8, (self._size // 8) - 1)

Expand Down Expand Up @@ -254,7 +256,7 @@ def parsePDFType(s):
else:
# First parse size
size = getElementEnd(s)
for limit in ['/', '>', '<']:
for limit in [b'/', b'>', b'<']:
other_size = getElementEnd(s, limit)
if other_size is not None:
other_size -= 1
Expand Down Expand Up @@ -424,7 +426,7 @@ def createFields(self):
new_length = getElementEnd(self, limit)
if length is None or (new_length is not None and new_length - len(limit) < length):
length = new_length - len(limit)
yield String(self, "object", length, strip=' ')
yield String(self, "object", length, strip=' \n')
if self.stream.readBytes(self.absolute_address + self.current_size, 2) == b'<<':
yield PDFDictionary(self, "key_list")
# End of catalog: this one has "endobj"
Expand All @@ -441,9 +443,9 @@ def createFields(self):
yield RawBytes(self, "marker", len(self.MAGIC))
yield WhiteSpace(self, "sep[]")
yield String(self, "start_attribute_marker", 2)
yield WhiteSpace(self, "sep[]")
addr = self.absolute_address + self.current_size
while self.stream.readBytes(addr, 2) != b'>>':
yield WhiteSpace(self, "sep[]")
t = PDFName(self, "type[]")
yield t
name = t.value.decode()
Expand All @@ -462,6 +464,7 @@ def createFields(self):
yield PDFDictionary(self, "decrypt")
else:
raise ParserError("Don't know trailer type '%s'" % name)
yield WhiteSpace(self, "sep[]")
addr = self.absolute_address + self.current_size
yield String(self, "end_attribute_marker", 2)
yield LineEnd(self, "line_end[]")
Expand Down
2 changes: 1 addition & 1 deletion hachoir/parser/program/exe.py
Expand Up @@ -19,7 +19,7 @@
from hachoir.parser.program.exe_pe import PE_Header, PE_OptHeader, SectionHeader
from hachoir.parser.program.exe_res import PE_Resource, NE_VersionInfoNode

MAX_NB_SECTION = 50
MAX_NB_SECTION = 100


class MSDosHeader(FieldSet):
Expand Down
90 changes: 88 additions & 2 deletions hachoir/parser/program/java.py
Expand Up @@ -435,6 +435,19 @@ def createDisplay(self):
return "%s(%i,%i,%i)" % (self.op, self["index"].value, self["count"].value, self["zero"].value)


class OpcodeSpecial_invokedynamic(JavaOpcode):
OPSIZE = 5

def createFields(self):
yield UInt8(self, "opcode")
yield CPIndex(self, "index")
yield UInt8(self, "zero1", "Must be zero.")
yield UInt8(self, "zero2", "Must be zero.")

def createDisplay(self):
return "%s(%i,%i,%i)" % (self.op, self["index"].value, self["zero1"].value, self["zero2"].value)


class OpcodeSpecial_newarray(JavaOpcode):
OPSIZE = 2

Expand Down Expand Up @@ -659,6 +672,7 @@ class JavaBytecode(FieldSet):
0x98: ("dcmpg", OpcodeNoArgs, "compares two doubles. Stack: value1, value2 -> result"),
0x99: ("ifeq", OpcodeShortJump, "if 'value' is 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9a: ("ifne", OpcodeShortJump, "if 'value' is not 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9b: ("iflt", OpcodeShortJump, "if 'value' is less than 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9c: ("ifge", OpcodeShortJump, "if 'value' is greater than or equal to 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9d: ("ifgt", OpcodeShortJump, "if 'value' is greater than 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
0x9e: ("ifle", OpcodeShortJump, "if 'value' is less than or equal to 0, branch to the 16-bit instruction offset argument. Stack: value ->"),
Expand Down Expand Up @@ -689,7 +703,7 @@ class JavaBytecode(FieldSet):
0xb7: ("invokespecial", OpcodeCPIndex, "invoke instance method on object 'objectref', where the method is identified by method reference <argument> in constant pool. Stack: objectref, [arg1, arg2, ...] ->"),
0xb8: ("invokestatic", OpcodeCPIndex, "invoke a static method, where the method is identified by method reference <argument> in the constant pool. Stack: [arg1, arg2, ...] ->"),
0xb9: ("invokeinterface", OpcodeSpecial_invokeinterface, "invokes an interface method on object 'objectref', where the interface method is identified by method reference <argument> in constant pool. Stack: objectref, [arg1, arg2, ...] ->"),
0xba: ("xxxunusedxxx", OpcodeNoArgs, "this opcode is reserved for historical reasons. Stack: "),
0xba: ("invokedynamic", OpcodeSpecial_invokedynamic, "invokes a dynamically-computed call site, where the bootstrap method is identified by <argument> in constant pool. Stack: [arg1, arg2, ...] -> "),
0xbb: ("new", OpcodeCPIndex, "creates new object of type identified by class reference <argument> in constant pool. Stack: -> objectref"),
0xbc: ("newarray", OpcodeSpecial_newarray, "creates new array with 'count' elements of primitive type given in the argument. Stack: count -> arrayref"),
0xbd: ("anewarray", OpcodeCPIndex, "creates a new array of references of length 'count' and component type identified by the class reference <argument> in the constant pool. Stack: count -> arrayref"),
Expand Down Expand Up @@ -762,6 +776,33 @@ def createFields(self):
elif self.constant_type == "NameAndType":
yield CPIndex(self, "name_index", target_types="Utf8")
yield CPIndex(self, "descriptor_index", target_types="Utf8")
elif self.constant_type == "MethodHandle":
refkind_map = {
1: ("getField", "Fieldref"),
2: ("getStatic", "Fieldref"),
3: ("putField", "Fieldref"),
4: ("putStatic", "Fieldref"),
5: ("invokeVirtual", "Methodref"),
6: ("invokeStatic", ("Methodref", "InterfaceMethodref")),
7: ("invokeSpecial", ("Methodref", "InterfaceMethodref")),
8: ("newInvokeSpecial", "Methodref"),
9: ("invokeInterface", "InterfaceMethodref"),
}
yield Enum(UInt8(self, "reference_kind"), {k: v[0] for k, v in refkind_map.items()})
target_types = refkind_map[self["reference_kind"].value][1]
yield CPIndex(self, "reference_index", target_types=target_types)
elif self.constant_type == "MethodType":
yield CPIndex(self, "descriptor_index", target_types="Utf8")
elif self.constant_type == "Dynamic":
yield UInt16(self, "bootstrap_method_attr_index")
yield CPIndex(self, "name_and_type_index", target_types="NameAndType")
elif self.constant_type == "InvokeDynamic":
yield UInt16(self, "bootstrap_method_attr_index")
yield CPIndex(self, "name_and_type_index", target_types="NameAndType")
elif self.constant_type == "Module":
yield CPIndex(self, "name_index", target_types="Utf8")
elif self.constant_type == "Package":
yield CPIndex(self, "name_index", target_types="Utf8")
else:
raise ParserError("Not a valid constant pool element type: "
+ self["tag"].value)
Expand All @@ -785,6 +826,21 @@ def rawvalue(self):
elif self.constant_type == "NameAndType":
return (self["descriptor_index"].rawvalue(),
self["name_index"].rawvalue())
elif self.constant_type == "MethodHandle":
return (self["reference_kind"].display,
self["reference_index"].rawvalue())
elif self.constant_type == "MethodType":
return self["descriptor_index"].rawvalue()
elif self.constant_type == "Dynamic":
return (self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].rawvalue())
elif self.constant_type == "InvokeDynamic":
return (self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].rawvalue())
elif self.constant_type == "Module":
return self["name_index"].rawvalue()
elif self.constant_type == "Package":
return self["name_index"].rawvalue()
else:
# FIXME: Return "<error>" instead of raising an exception?
raise ParserError("Not a valid constant pool element type: "
Expand All @@ -811,6 +867,24 @@ def __str__(self):
elif self.constant_type == "NameAndType":
descriptor, name = self.rawvalue()
return parse_any_descriptor(descriptor, name=name)
elif self.constant_type == "MethodHandle":
return "%s(%s)" % (self["reference_kind"].display, self["reference_index"].str())
elif self.constant_type == "MethodType":
return self["descriptor_index"].str()
elif self.constant_type == "Dynamic":
return "%d, %s" % (
self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].str()
)
elif self.constant_type == "InvokeDynamic":
return "%d, %s" % (
self["bootstrap_method_attr_index"].value,
self["name_and_type_index"].str()
)
elif self.constant_type == "Module":
return self["name_index"].str()
elif self.constant_type == "Package":
return self["name_index"].str()
else:
# FIXME: Return "<error>" instead of raising an exception?
raise ParserError("Not a valid constant pool element type: "
Expand Down Expand Up @@ -1192,6 +1266,12 @@ class JavaCompiledClassFile(Parser):
"50.0": "JDK 1.6",
"51.0": "JDK 1.7",
"52.0": "JDK 1.8",
"53.0": "JDK 9",
"54.0": "JDK 10",
"55.0": "JDK 11",
"56.0": "JDK 12",
"57.0": "JDK 13",
"58.0": "JDK 14",
}

# Constants go here since they will probably depend on the detected format
Expand All @@ -1208,7 +1288,13 @@ class JavaCompiledClassFile(Parser):
9: "Fieldref",
10: "Methodref",
11: "InterfaceMethodref",
12: "NameAndType"
12: "NameAndType",
15: "MethodHandle",
16: "MethodType",
17: "Dynamic",
18: "InvokeDynamic",
19: "Module",
20: "Package",
}

def validate(self):
Expand Down

0 comments on commit 53d67dd

Please sign in to comment.