From 19821954294230c5af0f6d931e35ff9c8c8d4e7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=F0=9F=90=BC=20Samrose=20Ahmed=20=F0=9F=90=BC?= Date: Fri, 21 Oct 2022 20:32:21 +0300 Subject: [PATCH] add avro record names when converting arrow schema to avro --- src/io/avro/write/schema.rs | 26 +++++++----- tests/it/io/avro/write.rs | 85 +++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 10 deletions(-) diff --git a/src/io/avro/write/schema.rs b/src/io/avro/write/schema.rs index 94673f45a48..b5fd17d8e69 100644 --- a/src/io/avro/write/schema.rs +++ b/src/io/avro/write/schema.rs @@ -8,10 +8,11 @@ use crate::error::{Error, Result}; /// Converts a [`Schema`] to an Avro [`Record`]. pub fn to_record(schema: &Schema) -> Result { + let mut name_counter: i32 = 0; let fields = schema .fields .iter() - .map(field_to_field) + .map(|f| field_to_field(&f, &mut name_counter)) .collect::>()?; Ok(Record { name: "".to_string(), @@ -22,20 +23,25 @@ pub fn to_record(schema: &Schema) -> Result { }) } -fn field_to_field(field: &Field) -> Result { - let schema = type_to_schema(field.data_type(), field.is_nullable)?; +fn field_to_field(field: &Field, name_counter: &mut i32) -> Result { + let schema = type_to_schema(field.data_type(), field.is_nullable, name_counter)?; Ok(AvroField::new(&field.name, schema)) } -fn type_to_schema(data_type: &DataType, is_nullable: bool) -> Result { +fn type_to_schema(data_type: &DataType, is_nullable: bool, name_counter: &mut i32) -> Result { Ok(if is_nullable { - AvroSchema::Union(vec![AvroSchema::Null, _type_to_schema(data_type)?]) + AvroSchema::Union(vec![AvroSchema::Null, _type_to_schema(data_type, name_counter)?]) } else { - _type_to_schema(data_type)? + _type_to_schema(data_type, name_counter)? }) } -fn _type_to_schema(data_type: &DataType) -> Result { +fn _get_field_name(name_counter: &mut i32) -> String { + *name_counter += 1; + format!("r{}", name_counter) +} + +fn _type_to_schema(data_type: &DataType, name_counter: &mut i32) -> Result { Ok(match data_type.to_logical_type() { DataType::Null => AvroSchema::Null, DataType::Boolean => AvroSchema::Boolean, @@ -48,13 +54,13 @@ fn _type_to_schema(data_type: &DataType) -> Result { DataType::Utf8 => AvroSchema::String(None), DataType::LargeUtf8 => AvroSchema::String(None), DataType::LargeList(inner) | DataType::List(inner) => AvroSchema::Array(Box::new( - type_to_schema(&inner.data_type, inner.is_nullable)?, + type_to_schema(&inner.data_type, inner.is_nullable, name_counter)?, )), DataType::Struct(fields) => AvroSchema::Record(Record::new( - "", + _get_field_name(name_counter), fields .iter() - .map(field_to_field) + .map(|f| field_to_field(&f, name_counter)) .collect::>>()?, )), DataType::Date32 => AvroSchema::Int(Some(IntLogical::Date)), diff --git a/tests/it/io/avro/write.rs b/tests/it/io/avro/write.rs index ffb71dcfc23..2e0acd76150 100644 --- a/tests/it/io/avro/write.rs +++ b/tests/it/io/avro/write.rs @@ -6,6 +6,7 @@ use arrow2::io::avro::avro_schema::file::{Block, CompressedBlock, Compression}; use arrow2::io::avro::avro_schema::write::{compress, write_block, write_metadata}; use arrow2::io::avro::write; use arrow2::types::months_days_ns; +use avro_schema::schema::{ Schema as AvroSchema, Record, Field as AvroField, }; use super::read::read_avro; @@ -284,6 +285,90 @@ fn struct_data() -> Chunk> { ]) } +fn avro_record() -> Record { + Record { + name: "".to_string(), + namespace: None, + doc: None, + aliases: vec![], + fields: vec![ + AvroField { + name: "struct".to_string(), + doc: None, + schema: AvroSchema::Record(Record { + name: "r1".to_string(), + namespace: None, + doc: None, + aliases: vec![], + fields: vec![ + AvroField { + name: "item1".to_string(), + doc: None, + schema: AvroSchema::Int(None), + default: None, + order: None, + aliases: vec![], + }, + AvroField { + name: "item2".to_string(), + doc: None, + schema: AvroSchema::Union(vec![AvroSchema::Null, AvroSchema::Int(None)]), + default: None, + order: None, + aliases: vec![], + }, + ], + }), + default: None, + order: None, + aliases: vec![], + }, + AvroField { + name: "struct nullable".to_string(), + doc: None, + schema: AvroSchema::Union(vec![ + AvroSchema::Null, + AvroSchema::Record(Record { + name: "r2".to_string(), + namespace: None, + doc: None, + aliases: vec![], + fields: vec![ + AvroField { + name: "item1".to_string(), + doc: None, + schema: AvroSchema::Int(None), + default: None, + order: None, + aliases: vec![], + }, + AvroField { + name: "item2".to_string(), + doc: None, + schema: AvroSchema::Union(vec![AvroSchema::Null, AvroSchema::Int(None)]), + default: None, + order: None, + aliases: vec![], + }, + ], + }), + ]), + default: None, + order: None, + aliases: vec![], + }, + ], + } +} + +#[test] +fn avro_record_schema() -> Result<()> { + let arrow_schema = struct_schema(); + let record = write::to_record(&arrow_schema)?; + assert_eq!(record, avro_record()); + Ok(()) +} + #[test] fn struct_() -> Result<()> { let write_schema = struct_schema();