Skip to content

Commit

Permalink
[Python] Python fixed size array (#7529)
Browse files Browse the repository at this point in the history
* feat: Added support for fixed sized arrays to python

Problem:
We encountered that using fixed arrays from C++ to python that python would
not read those arrays correctly due to no size information being encoded in the byte
array itself.

Fix:
Encode the sizes within the generated python file during code generation.
Specfically we add GetArrayAsNumpy to the python version of table, which takes as input
the length of the vector. When generating the python message files we include this length
from the VectorType().fixed_length.

* fix: added digit support for camel case to snake case conversion

Problem:
When including a number in the message name we would encounter cases where SnakeCase would
not add the appropirate breaks. e.g. Int32Stamped -> int_32stamped rather than int_32_stamped.

Fix:
To fix this we can add the condition that we check if the current character is not lower and
not a digit, that we check if the previous character was a lower or digit. If it was a lower
or digit then we add the break.

* fix: Array support for structures

Problem:
The python generated code for handling non-struct and struct vectors
and arrays was inconsistent. The calls to populate the obj api was
creating incorrect code.

Solution:
To fix this the VectorOfStruct and VectorOfNonStruct was rewritten
to handle array cases and bring the two methods in line which each
other.

Testing:
PythonTesting.sh now correctly runs and generates the code for
array_test.fbs.
Minor modifications were done on the test to use the new index
accessor for struct arrays and the script correctly sources the
location of the python code.

* chore: clang format changes

* Added code generated by scripts/generate_code. Modified GetArrayOfNonStruct slightly
to allow for function overloading allowing the user to get a single element of an array
or the whole array.

* Added new_line parameter to OffsetPrefix to allow optional new lines to be added.
This allows us to use the GenIndents method that automatically adds new lines instead.

* Reupload of generated code from the scripts/generate_code.py

* Removed new line in GetVectorAsNumpy.

* Updated Array lengths to use Length methods where possible. Added fallthrough for GenTypePointer. Added digit check to CamelToSnake method. Added and modified tests for ToSnakeCase and CamelToSnake.

* Added range check on the getter methods for vector and array types. Renamed == as is for python
  • Loading branch information
joshua-smith8 committed Sep 22, 2022
1 parent 8804619 commit 4131158
Show file tree
Hide file tree
Showing 24 changed files with 293 additions and 133 deletions.
11 changes: 10 additions & 1 deletion python/flatbuffers/table.py
Expand Up @@ -113,6 +113,15 @@ def GetVectorAsNumpy(self, flags, off):
numpy_dtype = N.to_numpy_type(flags)
return encode.GetVectorAsNumpy(numpy_dtype, self.Bytes, length, offset)

def GetArrayAsNumpy(self, flags, off, length):
"""
GetArrayAsNumpy returns the array with fixed width that starts at `Vector(offset)`
with length `length` as a numpy array with the type specified by `flags`. The
array is a `view` into Bytes so modifying the returned will modify Bytes in place.
"""
numpy_dtype = N.to_numpy_type(flags)
return encode.GetVectorAsNumpy(numpy_dtype, self.Bytes, length, off)

def GetVOffsetTSlot(self, slot, d):
"""
GetVOffsetTSlot retrieves the VOffsetT that the given vtable location
Expand All @@ -125,5 +134,5 @@ def GetVOffsetTSlot(self, slot, d):

off = self.Offset(slot)
if off == 0:
return d
return d
return off
165 changes: 119 additions & 46 deletions src/idl_gen_python.cpp
Expand Up @@ -83,11 +83,11 @@ class PythonGenerator : public BaseGenerator {

// Most field accessors need to retrieve and test the field offset first,
// this is the prefix code for that.
std::string OffsetPrefix(const FieldDef &field) const {
std::string OffsetPrefix(const FieldDef &field, bool new_line = true) const {
return "\n" + Indent + Indent +
"o = flatbuffers.number_types.UOffsetTFlags.py_type" +
"(self._tab.Offset(" + NumToString(field.value.offset) + "))\n" +
Indent + Indent + "if o != 0:\n";
Indent + Indent + "if o != 0:" + (new_line ? "\n" : "");
}

// Begin a class declaration.
Expand Down Expand Up @@ -164,9 +164,14 @@ class PythonGenerator : public BaseGenerator {

GenReceiver(struct_def, code_ptr);
code += namer_.Method(field) + "Length(self";
code += "):" + OffsetPrefix(field);
code += Indent + Indent + Indent + "return self._tab.VectorLen(o)\n";
code += Indent + Indent + "return 0\n\n";
code += "):";
if(!IsArray(field.value.type)){
code += OffsetPrefix(field,false);
code += GenIndents(3) + "return self._tab.VectorLen(o)";
code += GenIndents(2) + "return 0\n\n";
}else{
code += GenIndents(2) + "return "+NumToString(field.value.type.fixed_length)+"\n\n";
}
}

// Determines whether a vector is none or not.
Expand All @@ -177,10 +182,15 @@ class PythonGenerator : public BaseGenerator {
GenReceiver(struct_def, code_ptr);
code += namer_.Method(field) + "IsNone(self";
code += "):";
code += GenIndents(2) +
"o = flatbuffers.number_types.UOffsetTFlags.py_type" +
"(self._tab.Offset(" + NumToString(field.value.offset) + "))";
code += GenIndents(2) + "return o == 0";
if(!IsArray(field.value.type)){
code += GenIndents(2) +
"o = flatbuffers.number_types.UOffsetTFlags.py_type" +
"(self._tab.Offset(" + NumToString(field.value.offset) + "))";
code += GenIndents(2) + "return o == 0";
} else {
//assume that we always have an array as memory is preassigned
code += GenIndents(2) + "return False";
}
code += "\n\n";
}

Expand Down Expand Up @@ -244,21 +254,42 @@ class PythonGenerator : public BaseGenerator {
const auto vec_type = field.value.type.VectorType();
GenReceiver(struct_def, code_ptr);
code += namer_.Method(field);
if (IsStruct(vec_type)) {
code += "(self, obj, i):\n";
code += Indent + Indent + "obj.Init(self._tab.Bytes, self._tab.Pos + ";
code += NumToString(field.value.offset) + " + i * ";
code += NumToString(InlineSize(vec_type));
code += ")\n" + Indent + Indent + "return obj\n\n";
} else {
auto getter = GenGetter(vec_type);
code += "(self): return [" + getter;
code += "self._tab.Pos + flatbuffers.number_types.UOffsetTFlags.py_type(";
code += NumToString(field.value.offset) + " + i * ";
code += NumToString(InlineSize(vec_type));
code += ")) for i in range(";
code += NumToString(field.value.type.fixed_length) + ")]\n";
code += "(self, i: int):";
if (parser_.opts.include_dependence_headers) {
code += GenIndents(2);
code += "from " + GenPackageReference(field.value.type) + " import " +
TypeName(field);
}
code += GenIndents(2) + "obj = " + TypeName(field) + "()";
code += GenIndents(2) + "obj.Init(self._tab.Bytes, self._tab.Pos + ";
code += NumToString(field.value.offset) + " + i * ";
code += NumToString(InlineSize(vec_type));
code += ")" + GenIndents(2) + "return obj\n\n";
}

// Get the value of a vector's non-struct member. Uses a named return
// argument to conveniently set the zero value for the result.
void GetArrayOfNonStruct(const StructDef &struct_def, const FieldDef &field,
std::string *code_ptr) const {
auto &code = *code_ptr;
GenReceiver(struct_def, code_ptr);
code += namer_.Method(field);
code += "(self, j = None):";
code += GenIndents(2) + "if j is None:";
code += GenIndents(3) + "return [" + GenGetter(field.value.type);
code += "self._tab.Pos + flatbuffers.number_types.UOffsetTFlags.py_type(";
code += NumToString(field.value.offset) + " + i * ";
code += NumToString(InlineSize(field.value.type.VectorType()));
code += ")) for i in range(";
code += "self."+namer_.Method(field)+"Length()" + ")]";
code += GenIndents(2) +"elif j >= 0 and j < self."+namer_.Method(field)+"Length():";
code += GenIndents(3) + "return " + GenGetter(field.value.type);
code += "self._tab.Pos + flatbuffers.number_types.UOffsetTFlags.py_type(";
code += NumToString(field.value.offset) + " + j * ";
code += NumToString(InlineSize(field.value.type.VectorType()));
code += "))";
code += GenIndents(2) + "else:";
code += GenIndents(3) + "return None\n\n";
}

// Get a struct by initializing an existing struct.
Expand Down Expand Up @@ -403,18 +434,25 @@ class PythonGenerator : public BaseGenerator {

GenReceiver(struct_def, code_ptr);
code += namer_.Method(field) + "AsNumpy(self):";
code += OffsetPrefix(field);
if(!IsArray(field.value.type)){
code += OffsetPrefix(field, false);

code += Indent + Indent + Indent;
code += "return ";
code += "self._tab.GetVectorAsNumpy(flatbuffers.number_types.";
code += namer_.Method(GenTypeGet(field.value.type));
code += "Flags, o)\n";
code += GenIndents(3);
code += "return ";
code += "self._tab.GetVectorAsNumpy(flatbuffers.number_types.";
code += namer_.Method(GenTypeGet(field.value.type));
code += "Flags, o)";

if (IsString(vectortype)) {
code += Indent + Indent + "return \"\"\n";
} else {
code += Indent + Indent + "return 0\n";
if (IsString(vectortype)) {
code += GenIndents(2) + "return \"\"\n";
} else {
code += GenIndents(2) + "return 0\n";
}
}else{
code += GenIndents(2) + "return ";
code += "self._tab.GetArrayAsNumpy(flatbuffers.number_types.";
code += namer_.Method(GenTypeGet(field.value.type.VectorType()));
code += "Flags, self._tab.Pos + "+NumToString(field.value.offset)+", "+NumToString("self."+namer_.Method(field)+"Length()")+")\n";
}
code += "\n";
}
Expand Down Expand Up @@ -714,8 +752,6 @@ class PythonGenerator : public BaseGenerator {
} else {
GetScalarFieldOfTable(struct_def, field, code_ptr);
}
} else if (IsArray(field.value.type)) {
GetArrayOfStruct(struct_def, field, code_ptr);
} else {
switch (field.value.type.base_type) {
case BASE_TYPE_STRUCT:
Expand All @@ -739,6 +775,17 @@ class PythonGenerator : public BaseGenerator {
}
break;
}
case BASE_TYPE_ARRAY: {
auto vectortype = field.value.type.VectorType();
if (vectortype.base_type == BASE_TYPE_STRUCT) {
GetArrayOfStruct(struct_def, field, code_ptr);
} else {
GetArrayOfNonStruct(struct_def, field, code_ptr);
GetVectorOfNonStructAsNumpy(struct_def, field, code_ptr);
GetVectorAsNestedFlatbuffer(struct_def, field, code_ptr);
}
break;
}
case BASE_TYPE_UNION: GetUnionField(struct_def, field, code_ptr); break;
default: FLATBUFFERS_ASSERT(0);
}
Expand Down Expand Up @@ -1061,8 +1108,9 @@ class PythonGenerator : public BaseGenerator {

code += GenIndents(1) + "@classmethod";
code += GenIndents(1) + "def InitFromBuf(cls, buf, pos):";
code += GenIndents(2) + "n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, 0)";
code += GenIndents(2) + struct_var + " = " + struct_type + "()";
code += GenIndents(2) + struct_var + ".Init(buf, pos)";
code += GenIndents(2) + struct_var + ".Init(buf, pos+n)";
code += GenIndents(2) + "return cls.InitFromObj(" + struct_var + ")";
code += "\n";
}
Expand Down Expand Up @@ -1143,12 +1191,41 @@ class PythonGenerator : public BaseGenerator {
auto field_type = TypeName(field);
auto one_instance = field_type + "_";
one_instance[0] = CharToLower(one_instance[0]);

if (parser_.opts.include_dependence_headers) {
auto package_reference = GenPackageReference(field.value.type);
field_type = package_reference + "." + TypeName(field);
}
code += GenIndents(4) + "if " + struct_var + "." + field_method +
"(i) is None:";
code += GenIndents(5) + "self." + field_field + ".append(None)";
code += GenIndents(4) + "else:";
code += GenIndents(5) + one_instance + " = " + field_type +
"T.InitFromObj(" + struct_var + "." + field_method + "(i))";
code +=
GenIndents(5) + "self." + field_field + ".append(" + one_instance + ")";
}

void GenUnpackForTableVector(const StructDef &struct_def,
const FieldDef &field,
std::string *code_ptr) const {
auto &code = *code_ptr;
const auto field_field = namer_.Field(field);
const auto field_method = namer_.Method(field);
const auto struct_var = namer_.Variable(struct_def);

code += GenIndents(2) + "if not " + struct_var + "." + field_method +
"IsNone():";
code += GenIndents(3) + "self." + field_field + " = []";
code += GenIndents(3) + "for i in range(" + struct_var + "." +
field_method + "Length()):";

auto field_type = TypeName(field);
auto one_instance = field_type + "_";
one_instance[0] = CharToLower(one_instance[0]);
if (parser_.opts.include_dependence_headers) {
auto package_reference = GenPackageReference(field.value.type);
field_type = package_reference + "." + TypeName(field);
}
code += GenIndents(4) + "if " + struct_var + "." + field_method +
"(i) is None:";
code += GenIndents(5) + "self." + field_field + ".append(None)";
Expand Down Expand Up @@ -1233,6 +1310,7 @@ class PythonGenerator : public BaseGenerator {
GenUnPackForUnion(struct_def, field, &code);
break;
}
case BASE_TYPE_ARRAY:
case BASE_TYPE_VECTOR: {
auto vectortype = field.value.type.VectorType();
if (vectortype.base_type == BASE_TYPE_STRUCT) {
Expand All @@ -1242,10 +1320,6 @@ class PythonGenerator : public BaseGenerator {
}
break;
}
case BASE_TYPE_ARRAY: {
GenUnPackForScalarVector(struct_def, field, &code);
break;
}
default: GenUnPackForScalar(struct_def, field, &code);
}
}
Expand Down Expand Up @@ -1482,6 +1556,7 @@ class PythonGenerator : public BaseGenerator {
GenPackForUnionField(struct_def, field, &code_prefix, &code);
break;
}
case BASE_TYPE_ARRAY:
case BASE_TYPE_VECTOR: {
auto vectortype = field.value.type.VectorType();
if (vectortype.base_type == BASE_TYPE_STRUCT) {
Expand All @@ -1491,10 +1566,6 @@ class PythonGenerator : public BaseGenerator {
}
break;
}
case BASE_TYPE_ARRAY: {
GenPackForScalarVectorField(struct_def, field, &code_prefix, &code);
break;
}
case BASE_TYPE_STRING: {
code_prefix +=
GenIndents(2) + "if self." + field_field + " is not None:";
Expand Down Expand Up @@ -1666,7 +1737,9 @@ class PythonGenerator : public BaseGenerator {
std::string GenTypePointer(const Type &type) const {
switch (type.base_type) {
case BASE_TYPE_STRING: return "string";
case BASE_TYPE_VECTOR: return GenTypeGet(type.VectorType());
case BASE_TYPE_VECTOR:
// fall through
case BASE_TYPE_ARRAY: return GenTypeGet(type.VectorType());
case BASE_TYPE_STRUCT: return type.struct_def->name;
case BASE_TYPE_UNION:
// fall through
Expand Down
4 changes: 2 additions & 2 deletions src/util.cpp
Expand Up @@ -109,7 +109,7 @@ static std::string ToSnakeCase(const std::string &input, bool screaming) {
} else if (!islower(input[i])) {
// Prevent duplicate underscores for Upper_Snake_Case strings
// and UPPERCASE strings.
if (islower(input[i - 1])) { s += '_'; }
if (islower(input[i - 1]) || (isdigit(input[i-1]) && !isdigit(input[i]))) { s += '_'; }
s += screaming ? CharToUpper(input[i]) : CharToLower(input[i]);
} else {
s += screaming ? CharToUpper(input[i]) : input[i];
Expand All @@ -135,7 +135,7 @@ std::string CamelToSnake(const std::string &input) {
} else if (!islower(input[i])) {
// Prevent duplicate underscores for Upper_Snake_Case strings
// and UPPERCASE strings.
if (islower(input[i - 1])) { s += '_'; }
if (islower(input[i - 1]) || (isdigit(input[i-1]) && !isdigit(input[i]))) { s += '_'; }
s += CharToLower(input[i]);
} else {
s += input[i];
Expand Down
3 changes: 2 additions & 1 deletion tests/MyGame/Example/Ability.py
Expand Up @@ -38,8 +38,9 @@ def __init__(self):

@classmethod
def InitFromBuf(cls, buf, pos):
n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, 0)
ability = Ability()
ability.Init(buf, pos)
ability.Init(buf, pos+n)
return cls.InitFromObj(ability)

@classmethod
Expand Down

0 comments on commit 4131158

Please sign in to comment.