Skip to content

Commit

Permalink
Bounds checks on decoding (#113)
Browse files Browse the repository at this point in the history
* Fix #99 failing with overflow exeptions

We want to force the decoder to attempt to allocate array string and byte buffers smaller than
`sys.maxsize` but still far larger than anyone could have in their machine.

It also needs to be aware of the native pointer size where the test runs.

* Check that decoded lengths make sense

* Fix bounds checks in C decoder

* Remove it from decode bytestring, since the PyBytes_FromStringAndSize
checks the bounds for us

* Add it in to decode definite string since it allocates a temporary
buffer of its own.

* Further error checking around indefinite data

string and byte chunks within an indefinite string or bytestring should not be indefinite themselves

* Fix flake8 nits

* Fix system errors by exiting out of indefinite loop when there's an error

* Use Py_ssize_t everywhere it makes sense

This makes handling unsigned long long lengths a bit more complicated
but it is now consistent between 32 and 64 bit platforms.

Also, all exceptions that make reference to lengths should be using
Hexadecimal.

Reference used to fix comparisons here:
https://wiki.sei.cmu.edu/confluence/display/c/INT18-C.+Evaluate+integer+expressions+in+a+larger+size+before+comparing+or+assigning+to+that+size

* Don't raise an error for large map size (c-lib consistency)
  • Loading branch information
Sekenre committed Jun 2, 2021
1 parent 60b90e6 commit c55426e
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 33 deletions.
14 changes: 14 additions & 0 deletions cbor2/decoder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import struct
import sys
from collections.abc import Mapping
from datetime import datetime, timedelta, timezone
from io import BytesIO
Expand Down Expand Up @@ -234,12 +235,18 @@ def decode_bytestring(self, subtype):
break
elif initial_byte >> 5 == 2:
length = self._decode_length(initial_byte & 0x1f)
if length is None or length > sys.maxsize:
raise CBORDecodeValueError(
'invalid length for indefinite bytestring chunk 0x%x' % length
)
value = self.read(length)
buf.append(value)
else:
raise CBORDecodeValueError(
"non-bytestring found in indefinite length bytestring")
else:
if length > sys.maxsize:
raise CBORDecodeValueError('invalid length for bytestring 0x%x' % length)
result = self.read(length)
self._stringref_namespace_add(result, length)
return self.set_shareable(result)
Expand Down Expand Up @@ -270,12 +277,17 @@ def decode_string(self, subtype):
break
elif initial_byte >> 5 == 3:
length = self._decode_length(initial_byte & 0x1f)
if length is None or length > sys.maxsize:
raise CBORDecodeValueError(
'invalid length for indefinite string chunk 0x%x' % length)
value = self.read(length).decode('utf-8', self._str_errors)
buf.append(value)
else:
raise CBORDecodeValueError(
"non-string found in indefinite length string")
else:
if length > sys.maxsize:
raise CBORDecodeValueError('invalid length for string 0x%x' % length)
result = self.read(length).decode('utf-8', self._str_errors)
self._stringref_namespace_add(result, length)
return self.set_shareable(result)
Expand All @@ -295,6 +307,8 @@ def decode_array(self, subtype):
else:
items.append(value)
else:
if length > sys.maxsize:
raise CBORDecodeValueError('invalid length for array 0x%x' % length)
items = []
if not self._immutable:
self.set_shareable(items)
Expand Down
69 changes: 45 additions & 24 deletions source/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
#define be64toh(x) _byteswap_uint64(x)
#endif

// copied from cpython/Objects/bytesobject.c for bounds checks
#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)

enum DecodeOption {
DECODE_NORMAL = 0,
DECODE_IMMUTABLE = 1,
Expand Down Expand Up @@ -341,13 +344,13 @@ _CBORDecoder_get_immutable(CBORDecoderObject *self, void *closure)
// Utility functions /////////////////////////////////////////////////////////

static int
fp_read(CBORDecoderObject *self, char *buf, const uint64_t size)
fp_read(CBORDecoderObject *self, char *buf, const Py_ssize_t size)
{
PyObject *obj, *size_obj;
char *data;
int ret = -1;

size_obj = PyLong_FromUnsignedLongLong(size);
size_obj = PyLong_FromSsize_t(size);
if (size_obj) {
obj = PyObject_CallFunctionObjArgs(self->read, size_obj, NULL);
if (obj) {
Expand All @@ -359,8 +362,8 @@ fp_read(CBORDecoderObject *self, char *buf, const uint64_t size)
} else {
PyErr_Format(
_CBOR2_CBORDecodeEOF,
"premature end of stream (expected to read %llu bytes, "
"got %lld instead)", size, PyBytes_GET_SIZE(obj));
"premature end of stream (expected to read %zd bytes, "
"got %zd instead)", size, PyBytes_GET_SIZE(obj));
}
Py_DECREF(obj);
}
Expand Down Expand Up @@ -530,12 +533,10 @@ decode_negint(CBORDecoderObject *self, uint8_t subtype)


static PyObject *
decode_definite_bytestring(CBORDecoderObject *self, uint64_t length)
decode_definite_bytestring(CBORDecoderObject *self, Py_ssize_t length)
{
PyObject *ret = NULL;

if (length > PY_SSIZE_T_MAX)
return NULL;
ret = PyBytes_FromStringAndSize(NULL, length);
if (!ret)
return NULL;
Expand All @@ -562,12 +563,14 @@ decode_indefinite_bytestrings(CBORDecoderObject *self)
while (1) {
if (fp_read(self, &lead.byte, 1) == -1)
break;
if (lead.major == 2) {
if (lead.major == 2 && lead.subtype != 31) {
ret = decode_bytestring(self, lead.subtype);
if (ret) {
PyList_Append(list, ret);
Py_DECREF(ret);
ret = NULL;
} else {
break;
}
} else if (lead.major == 7 && lead.subtype == 31) { // break-code
ret = PyObject_CallMethodObjArgs(
Expand All @@ -590,16 +593,25 @@ static PyObject *
decode_bytestring(CBORDecoderObject *self, uint8_t subtype)
{
// major type 2
uint64_t length;
uint64_t length = 0;
bool indefinite = true;
PyObject *ret;
char length_hex[17];

if (decode_length(self, subtype, &length, &indefinite) == -1)
return NULL;

if (length > (uint64_t)PY_SSIZE_T_MAX - (uint64_t)PyBytesObject_SIZE) {
sprintf(length_hex, "%llX", length);
PyErr_Format(
_CBOR2_CBORDecodeValueError,
"excessive bytestring size 0x%s", length_hex);
return NULL;
}
if (indefinite)
ret = decode_indefinite_bytestrings(self);
else
ret = decode_definite_bytestring(self, length);
ret = decode_definite_bytestring(self, (Py_ssize_t)length);
set_shareable(self, ret);
return ret;
}
Expand All @@ -620,13 +632,11 @@ decode_bytestring(CBORDecoderObject *self, uint8_t subtype)


static PyObject *
decode_definite_string(CBORDecoderObject *self, uint64_t length)
decode_definite_string(CBORDecoderObject *self, Py_ssize_t length)
{
PyObject *ret = NULL;
char *buf;

if (length > PY_SSIZE_T_MAX)
return NULL;
buf = PyMem_Malloc(length);
if (!buf)
return PyErr_NoMemory();
Expand Down Expand Up @@ -655,12 +665,14 @@ decode_indefinite_strings(CBORDecoderObject *self)
while (1) {
if (fp_read(self, &lead.byte, 1) == -1)
break;
if (lead.major == 3) {
if (lead.major == 3 && lead.subtype != 31) {
ret = decode_string(self, lead.subtype);
if (ret) {
PyList_Append(list, ret);
Py_DECREF(ret);
ret = NULL;
} else {
break;
}
} else if (lead.major == 7 && lead.subtype == 31) { // break-code
ret = PyObject_CallMethodObjArgs(
Expand All @@ -683,16 +695,24 @@ static PyObject *
decode_string(CBORDecoderObject *self, uint8_t subtype)
{
// major type 3
uint64_t length;
uint64_t length = 0;
bool indefinite = true;
PyObject *ret;
char length_hex[17];

if (decode_length(self, subtype, &length, &indefinite) == -1)
return NULL;
if (length > (uint64_t)PY_SSIZE_T_MAX - (uint64_t)PyBytesObject_SIZE) {
sprintf(length_hex, "%llX", length);
PyErr_Format(
_CBOR2_CBORDecodeValueError,
"excessive string size 0x%s", length_hex);
return NULL;
}
if (indefinite)
ret = decode_indefinite_strings(self);
else
ret = decode_definite_string(self, length);
ret = decode_definite_string(self, (Py_ssize_t)length);
set_shareable(self, ret);
return ret;
}
Expand Down Expand Up @@ -745,13 +765,7 @@ decode_definite_array(CBORDecoderObject *self, Py_ssize_t length)
{
Py_ssize_t i;
PyObject *array, *item, *ret = NULL;

if (length > PY_SSIZE_T_MAX || length < 0) {
PyErr_Format(
_CBOR2_CBORDecodeValueError,
"excessive array size %llu", length);
return NULL;
} else if (length > 65536) {
if (length > 65536) {
// Let cPython manage allocation of huge lists by appending
// items one-by-one
array = PyList_New(0);
Expand Down Expand Up @@ -839,12 +853,19 @@ decode_array(CBORDecoderObject *self, uint8_t subtype)
// major type 4
uint64_t length;
bool indefinite = true;
char length_hex[17];

if (decode_length(self, subtype, &length, &indefinite) == -1)
return NULL;
if (indefinite)
return decode_indefinite_array(self);
else
if (length > (uint64_t)PY_SSIZE_T_MAX) {
sprintf(length_hex, "%llX", length);
PyErr_Format(
_CBOR2_CBORDecodeValueError,
"excessive array size 0x%s", length_hex);
return NULL;
} else
return decode_definite_array(self, (Py_ssize_t) length);
}

Expand Down
12 changes: 12 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import platform
import struct

import pytest

Expand All @@ -20,6 +21,17 @@
)


@pytest.fixture
def will_overflow():
'''
Construct an array/string/bytes length which would cause a memory error
on decode. This should be less than sys.maxsize (the max integer index)
'''
bit_size = struct.calcsize('P') * 8
huge_length = 1 << (bit_size - 8)
return struct.pack('Q', huge_length)


class Module(object):
# Mock module class
pass
Expand Down
63 changes: 54 additions & 9 deletions tests/test_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from fractions import Fraction
from io import BytesIO
from ipaddress import ip_address, ip_network
import struct
import sys
from uuid import UUID

import pytest
Expand Down Expand Up @@ -634,16 +636,59 @@ def test_immutable_keys(impl, payload, expected):
assert value == expected


def test_huge_truncated_array(impl):
with pytest.raises(impl.CBORDecodeEOF):
impl.loads(unhexlify('9b37388519251ae9ca'))
# Corrupted or invalid data checks


def test_huge_truncated_bytes(impl):
with pytest.raises((impl.CBORDecodeEOF, MemoryError)):
impl.loads(unhexlify('5b37388519251ae9ca'))
def test_huge_truncated_array(impl, will_overflow):
with pytest.raises(impl.CBORDecodeError):
impl.loads(unhexlify('9b') + will_overflow)


def test_huge_truncated_string(impl):
with pytest.raises((impl.CBORDecodeEOF, MemoryError)):
impl.loads(unhexlify('7B37388519251ae9ca'))
huge_index = struct.pack('Q', sys.maxsize + 1)
with pytest.raises((impl.CBORDecodeError, MemoryError)):
impl.loads(unhexlify('7b') + huge_index + unhexlify('70717273'))


@pytest.mark.parametrize('dtype_prefix', ['7B', '5b'], ids=['string', 'bytes'])
def test_huge_truncated_data(impl, dtype_prefix, will_overflow):
with pytest.raises((impl.CBORDecodeError, MemoryError)):
impl.loads(unhexlify(dtype_prefix) + will_overflow)


@pytest.mark.parametrize('tag_dtype', ['7F7B', '5f5B'], ids=['string', 'bytes'])
def test_huge_truncated_indefinite_data(impl, tag_dtype, will_overflow):
huge_index = struct.pack('Q', sys.maxsize + 1)
with pytest.raises((impl.CBORDecodeError, MemoryError)):
impl.loads(unhexlify(tag_dtype) + huge_index + unhexlify('70717273ff'))


@pytest.mark.parametrize('data', ['7f61777f6177ffff', '5f41775f4177ffff'], ids=['string', 'bytes'])
def test_embedded_indefinite_data(impl, data):
with pytest.raises(impl.CBORDecodeValueError):
impl.loads(unhexlify(data))


@pytest.mark.parametrize('data', ['7f01ff', '5f01ff'], ids=['string', 'bytes'])
def test_invalid_indefinite_data_item(impl, data):
with pytest.raises(impl.CBORDecodeValueError):
impl.loads(unhexlify(data))


@pytest.mark.parametrize('data', [
'7f7bff0000000000000471717272ff', '5f5bff0000000000000471717272ff'
],
ids=['string', 'bytes']
)
def test_indefinite_overflow(impl, data):
with pytest.raises(impl.CBORDecodeValueError):
impl.loads(unhexlify(data))


def test_invalid_cbor(impl):
with pytest.raises(impl.CBORDecodeError):
impl.loads(unhexlify(
'c788370016b8965bdb2074bff82e5a20e09bec21f8406e86442b87ec3ff245b70a47624dc9cdc6824b2a'
'4c52e95ec9d6b0534b71c2b49e4bf9031500cee6869979c297bb5a8b381e98db714108415e5c50db7897'
'4c271579b01633a3ef6271be5c225eb2'
)
)

0 comments on commit c55426e

Please sign in to comment.