From 8d75101e3773e5d74c8d5cda356a9eaba34acf90 Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
Date: Wed, 9 Nov 2022 18:10:24 +1300
Subject: [PATCH] Split out arrow-json (#3044) (#3049)

* Split out arrow-json (#3044)

* RAT

* Fix feature

* Revert no_run

* RAT
---
 .github/workflows/arrow.yml                   |   5 +
 .github/workflows/arrow_flight.yml            |   1 +
 .github/workflows/dev_pr/labeler.yml          |   1 +
 .github/workflows/integration.yml             |   1 +
 .github/workflows/miri.yaml                   |   1 +
 .github/workflows/parquet.yml                 |   1 +
 Cargo.toml                                    |   3 +-
 arrow-json/Cargo.toml                         |  54 +++++++
 .../src/json/mod.rs => arrow-json/src/lib.rs  |   0
 {arrow/src/json => arrow-json/src}/reader.rs  | 136 +++++++++---------
 {arrow/src/json => arrow-json/src}/writer.rs  |  98 +++++++------
 {arrow => arrow-json}/test/data/arrays.json   |   0
 {arrow => arrow-json}/test/data/basic.json    |   0
 .../test/data/basic_nulls.json                |   0
 .../test/data/list_string_dict_nested.json    |   0
 .../data/list_string_dict_nested_nulls.json   |   0
 .../test/data/mixed_arrays.json               |   0
 .../test/data/mixed_arrays.json.gz            | Bin
 .../test/data/nested_structs.json             |   0
 arrow/Cargo.toml                              |  18 ++-
 arrow/benches/json_reader.rs                  |   5 +-
 arrow/src/lib.rs                              |   6 +-
 dev/release/rat_exclude_files.txt             |   1 +
 23 files changed, 194 insertions(+), 137 deletions(-)
 create mode 100644 arrow-json/Cargo.toml
 rename arrow/src/json/mod.rs => arrow-json/src/lib.rs (100%)
 rename {arrow/src/json => arrow-json/src}/reader.rs (97%)
 rename {arrow/src/json => arrow-json/src}/writer.rs (95%)
 rename {arrow => arrow-json}/test/data/arrays.json (100%)
 rename {arrow => arrow-json}/test/data/basic.json (100%)
 rename {arrow => arrow-json}/test/data/basic_nulls.json (100%)
 rename {arrow => arrow-json}/test/data/list_string_dict_nested.json (100%)
 rename {arrow => arrow-json}/test/data/list_string_dict_nested_nulls.json (100%)
 rename {arrow => arrow-json}/test/data/mixed_arrays.json (100%)
 rename {arrow => arrow-json}/test/data/mixed_arrays.json.gz (100%)
 rename {arrow => arrow-json}/test/data/nested_structs.json (100%)

diff --git a/.github/workflows/arrow.yml b/.github/workflows/arrow.yml
index 461e7e87ea5..2e1c64ebe3a 100644
--- a/.github/workflows/arrow.yml
+++ b/.github/workflows/arrow.yml
@@ -35,6 +35,7 @@ on:
       - arrow-integration-test/**
       - arrow-ipc/**
       - arrow-csv/**
+      - arrow-json/**
       - .github/**
 
 jobs:
@@ -67,6 +68,8 @@ jobs:
         run: cargo test -p arrow-ipc --all-features
       - name: Test arrow-csv with all features
         run: cargo test -p arrow-csv --all-features
+      - name: Test arrow-json with all features
+        run: cargo test -p arrow-json --all-features
       - name: Test arrow-integration-test with all features
         run: cargo test -p arrow-integration-test --all-features
       - name: Test arrow with default features
@@ -179,5 +182,7 @@ jobs:
         run: cargo clippy -p arrow-ipc --all-targets --all-features -- -D warnings
       - name: Clippy arrow-csv with all features
         run: cargo clippy -p arrow-csv --all-targets --all-features -- -D warnings
+      - name: Clippy arrow-json with all features
+        run: cargo clippy -p arrow-json --all-targets --all-features -- -D warnings
       - name: Clippy arrow
         run: cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi,ipc_compression,dyn_cmp_dict,dyn_arith_dict,chrono-tz --all-targets -- -D warnings
diff --git a/.github/workflows/arrow_flight.yml b/.github/workflows/arrow_flight.yml
index 1f830ccf2b2..2825d2400f1 100644
--- a/.github/workflows/arrow_flight.yml
+++ b/.github/workflows/arrow_flight.yml
@@ -37,6 +37,7 @@ on:
       - arrow-flight/**
       - arrow-ipc/**
       - arrow-csv/**
+      - arrow-json/**
       - .github/**
 
 jobs:
diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index 04c7c080e01..d93932cd233 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -25,6 +25,7 @@ arrow:
   - arrow-select/**/*
   - arrow-ipc/**/*
   - arrow-csv/**/*
+  - arrow-json/**/*
 
 arrow-flight:
   - arrow-flight/**/*
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 9418b904283..3ece06b2923 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -33,6 +33,7 @@ on:
       - arrow-select/**
       - arrow-ipc/**
       - arrow-csv/**
+      - arrow-json/**
       - arrow-pyarrow-integration-testing/**
       - arrow-integration-test/**
       - arrow-integration-testing/**
diff --git a/.github/workflows/miri.yaml b/.github/workflows/miri.yaml
index e58ebdb3569..b1f5d85fc58 100644
--- a/.github/workflows/miri.yaml
+++ b/.github/workflows/miri.yaml
@@ -33,6 +33,7 @@ on:
       - arrow-select/**
       - arrow-ipc/**
       - arrow-csv/**
+      - arrow-json/**
       - .github/**
 
 jobs:
diff --git a/.github/workflows/parquet.yml b/.github/workflows/parquet.yml
index 4f3cf5f8005..5b0cc87440e 100644
--- a/.github/workflows/parquet.yml
+++ b/.github/workflows/parquet.yml
@@ -36,6 +36,7 @@ on:
       - arrow-select/**
       - arrow-ipc/**
       - arrow-csv/**
+      - arrow-json/**
       - parquet/**
       - .github/**
 
diff --git a/Cargo.toml b/Cargo.toml
index 18497d04379..16b4cb7f89e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,12 +27,13 @@ members = [
         "arrow-integration-test",
         "arrow-integration-testing",
         "arrow-ipc",
+        "arrow-json",
         "arrow-schema",
         "arrow-select",
+        "object_store",
         "parquet",
         "parquet_derive",
         "parquet_derive_test",
-        "object_store",
 ]
 # Enable the version 2 feature resolver, which avoids unifying features for targets that are not being built
 #
diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml
new file mode 100644
index 00000000000..0d8c9109210
--- /dev/null
+++ b/arrow-json/Cargo.toml
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "arrow-json"
+version = "26.0.0"
+description = "Support for parsing JSON format into the Arrow format"
+homepage = "https://github.com/apache/arrow-rs"
+repository = "https://github.com/apache/arrow-rs"
+authors = ["Apache Arrow <dev@arrow.apache.org>"]
+license = "Apache-2.0"
+keywords = ["arrow"]
+include = [
+    "benches/*.rs",
+    "src/**/*.rs",
+    "Cargo.toml",
+]
+edition = "2021"
+rust-version = "1.62"
+
+[lib]
+name = "arrow_json"
+path = "src/lib.rs"
+bench = false
+
+[dependencies]
+arrow-array = { version = "26.0.0", path = "../arrow-array" }
+arrow-buffer = { version = "26.0.0", path = "../arrow-buffer" }
+arrow-cast = { version = "26.0.0", path = "../arrow-cast" }
+arrow-data = { version = "26.0.0", path = "../arrow-data" }
+arrow-schema = { version = "26.0.0", path = "../arrow-schema" }
+half = { version = "2.1", default-features = false }
+indexmap = { version = "1.9", default-features = false, features = ["std"] }
+num = { version = "0.4", default-features = false, features = ["std"] }
+serde_json = { version = "1.0", default-features = false, features = ["std"] }
+chrono = { version = "0.4", default-features = false, features = ["clock"] }
+
+[dev-dependencies]
+tempfile = "3.3"
+flate2 = { version = "1", default-features = false, features = ["rust_backend"] }
diff --git a/arrow/src/json/mod.rs b/arrow-json/src/lib.rs
similarity index 100%
rename from arrow/src/json/mod.rs
rename to arrow-json/src/lib.rs
diff --git a/arrow/src/json/reader.rs b/arrow-json/src/reader.rs
similarity index 97%
rename from arrow/src/json/reader.rs
rename to arrow-json/src/reader.rs
index 78c51559a7d..b3af909ef46 100644
--- a/arrow/src/json/reader.rs
+++ b/arrow-json/src/reader.rs
@@ -24,11 +24,10 @@
 //! Example:
 //!
 //! ```
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use std::fs::File;
-//! use std::io::BufReader;
-//! use std::sync::Arc;
+//! # use arrow_schema::*;
+//! # use std::fs::File;
+//! # use std::io::BufReader;
+//! # use std::sync::Arc;
 //!
 //! let schema = Schema::new(vec![
 //!     Field::new("a", DataType::Float64, false),
@@ -38,10 +37,10 @@
 //!
 //! let file = File::open("test/data/basic.json").unwrap();
 //!
-//! let mut json = json::Reader::new(
+//! let mut json = arrow_json::Reader::new(
 //!    BufReader::new(file),
 //!    Arc::new(schema),
-//!    json::reader::DecoderOptions::new(),
+//!    arrow_json::reader::DecoderOptions::new(),
 //! );
 //!
 //! let batch = json.next().unwrap().unwrap();
@@ -55,13 +54,13 @@ use indexmap::set::IndexSet as HashSet;
 use serde_json::json;
 use serde_json::{map::Map as JsonMap, Value};
 
-use crate::buffer::MutableBuffer;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::record_batch::{RecordBatch, RecordBatchOptions};
-use crate::util::bit_util;
-use crate::{array::*, buffer::Buffer};
+use arrow_array::builder::*;
+use arrow_array::types::*;
+use arrow_array::*;
+use arrow_buffer::{bit_util, Buffer, MutableBuffer};
 use arrow_cast::parse::Parser;
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::*;
 
 #[derive(Debug, Clone)]
 enum InferredType {
@@ -72,7 +71,7 @@ enum InferredType {
 }
 
 impl InferredType {
-    fn merge(&mut self, other: InferredType) -> Result<()> {
+    fn merge(&mut self, other: InferredType) -> Result<(), ArrowError> {
         match (self, other) {
             (InferredType::Array(s), InferredType::Array(o)) => {
                 s.merge(*o)?;
@@ -147,7 +146,7 @@ fn coerce_data_type(dt: Vec<&DataType>) -> DataType {
     })
 }
 
-fn generate_datatype(t: &InferredType) -> Result<DataType> {
+fn generate_datatype(t: &InferredType) -> Result<DataType, ArrowError> {
     Ok(match t {
         InferredType::Scalar(hs) => coerce_data_type(hs.iter().collect()),
         InferredType::Object(spec) => DataType::Struct(generate_fields(spec)?),
@@ -160,14 +159,16 @@ fn generate_datatype(t: &InferredType) -> Result<DataType> {
     })
 }
 
-fn generate_fields(spec: &HashMap<String, InferredType>) -> Result<Vec<Field>> {
+fn generate_fields(
+    spec: &HashMap<String, InferredType>,
+) -> Result<Vec<Field>, ArrowError> {
     spec.iter()
         .map(|(k, types)| Ok(Field::new(k, generate_datatype(types)?, true)))
         .collect()
 }
 
 /// Generate schema from JSON field names and inferred data types
-fn generate_schema(spec: HashMap<String, InferredType>) -> Result<Schema> {
+fn generate_schema(spec: HashMap<String, InferredType>) -> Result<Schema, ArrowError> {
     Ok(Schema::new(generate_fields(&spec)?))
 }
 
@@ -178,7 +179,7 @@ fn generate_schema(spec: HashMap<String, InferredType>) -> Result<Schema> {
 /// ```
 /// use std::fs::File;
 /// use std::io::BufReader;
-/// use arrow::json::reader::ValueIter;
+/// use arrow_json::reader::ValueIter;
 ///
 /// let mut reader =
 ///     BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
@@ -208,7 +209,7 @@ impl<'a, R: Read> ValueIter<'a, R> {
 }
 
 impl<'a, R: Read> Iterator for ValueIter<'a, R> {
-    type Item = Result<Value>;
+    type Item = Result<Value, ArrowError>;
 
     fn next(&mut self) -> Option<Self::Item> {
         if let Some(max) = self.max_read_records {
@@ -259,7 +260,7 @@ impl<'a, R: Read> Iterator for ValueIter<'a, R> {
 /// ```
 /// use std::fs::File;
 /// use std::io::BufReader;
-/// use arrow::json::reader::infer_json_schema_from_seekable;
+/// use arrow_json::reader::infer_json_schema_from_seekable;
 ///
 /// let file = File::open("test/data/mixed_arrays.json").unwrap();
 /// // file's cursor's offset at 0
@@ -270,7 +271,7 @@ impl<'a, R: Read> Iterator for ValueIter<'a, R> {
 pub fn infer_json_schema_from_seekable<R: Read + Seek>(
     reader: &mut BufReader<R>,
     max_read_records: Option<usize>,
-) -> Result<Schema> {
+) -> Result<Schema, ArrowError> {
     let schema = infer_json_schema(reader, max_read_records);
     // return the reader seek back to the start
     reader.seek(SeekFrom::Start(0))?;
@@ -292,7 +293,7 @@ pub fn infer_json_schema_from_seekable<R: Read + Seek>(
 /// use std::fs::File;
 /// use std::io::{BufReader, SeekFrom, Seek};
 /// use flate2::read::GzDecoder;
-/// use arrow::json::reader::infer_json_schema;
+/// use arrow_json::reader::infer_json_schema;
 ///
 /// let mut file = File::open("test/data/mixed_arrays.json.gz").unwrap();
 ///
@@ -307,7 +308,7 @@ pub fn infer_json_schema_from_seekable<R: Read + Seek>(
 pub fn infer_json_schema<R: Read>(
     reader: &mut BufReader<R>,
     max_read_records: Option<usize>,
-) -> Result<Schema> {
+) -> Result<Schema, ArrowError> {
     infer_json_schema_from_iterator(ValueIter::new(reader, max_read_records))
 }
 
@@ -315,7 +316,7 @@ fn set_object_scalar_field_type(
     field_types: &mut HashMap<String, InferredType>,
     key: &str,
     ftype: DataType,
-) -> Result<()> {
+) -> Result<(), ArrowError> {
     if !field_types.contains_key(key) {
         field_types.insert(key.to_string(), InferredType::Scalar(HashSet::new()));
     }
@@ -340,7 +341,7 @@ fn set_object_scalar_field_type(
     }
 }
 
-fn infer_scalar_array_type(array: &[Value]) -> Result<InferredType> {
+fn infer_scalar_array_type(array: &[Value]) -> Result<InferredType, ArrowError> {
     let mut hs = HashSet::new();
 
     for v in array {
@@ -371,7 +372,7 @@ fn infer_scalar_array_type(array: &[Value]) -> Result<InferredType> {
     Ok(InferredType::Scalar(hs))
 }
 
-fn infer_nested_array_type(array: &[Value]) -> Result<InferredType> {
+fn infer_nested_array_type(array: &[Value]) -> Result<InferredType, ArrowError> {
     let mut inner_ele_type = InferredType::Any;
 
     for v in array {
@@ -391,7 +392,7 @@ fn infer_nested_array_type(array: &[Value]) -> Result<InferredType> {
     Ok(InferredType::Array(Box::new(inner_ele_type)))
 }
 
-fn infer_struct_array_type(array: &[Value]) -> Result<InferredType> {
+fn infer_struct_array_type(array: &[Value]) -> Result<InferredType, ArrowError> {
     let mut field_types = HashMap::new();
 
     for v in array {
@@ -411,7 +412,7 @@ fn infer_struct_array_type(array: &[Value]) -> Result<InferredType> {
     Ok(InferredType::Object(field_types))
 }
 
-fn infer_array_element_type(array: &[Value]) -> Result<InferredType> {
+fn infer_array_element_type(array: &[Value]) -> Result<InferredType, ArrowError> {
     match array.iter().take(1).next() {
         None => Ok(InferredType::Any), // empty array, return any type that can be updated later
         Some(a) => match a {
@@ -425,7 +426,7 @@ fn infer_array_element_type(array: &[Value]) -> Result<InferredType> {
 fn collect_field_types_from_object(
     field_types: &mut HashMap<String, InferredType>,
     map: &JsonMap<String, Value>,
-) -> Result<()> {
+) -> Result<(), ArrowError> {
     for (k, v) in map {
         match v {
             Value::Array(array) => {
@@ -532,9 +533,9 @@ fn collect_field_types_from_object(
 /// The reason we diverge here is because we don't have utilities to deal with JSON data once it's
 /// interpreted as Strings. We should match Spark's behavior once we added more JSON parsing
 /// kernels in the future.
-pub fn infer_json_schema_from_iterator<I>(value_iter: I) -> Result<Schema>
+pub fn infer_json_schema_from_iterator<I>(value_iter: I) -> Result<Schema, ArrowError>
 where
-    I: Iterator<Item = Result<Value>>,
+    I: Iterator<Item = Result<Value, ArrowError>>,
 {
     let mut field_types: HashMap<String, InferredType> = HashMap::new();
 
@@ -563,7 +564,7 @@ where
 ///
 /// # Examples
 /// ```
-/// use arrow::json::reader::{Decoder, DecoderOptions, ValueIter, infer_json_schema};
+/// use arrow_json::reader::{Decoder, DecoderOptions, ValueIter, infer_json_schema};
 /// use std::fs::File;
 /// use std::io::{BufReader, Seek, SeekFrom};
 /// use std::sync::Arc;
@@ -673,9 +674,12 @@ impl Decoder {
     /// interator into a [`RecordBatch`].
     ///
     /// Returns `None` if the input iterator is exhausted.
-    pub fn next_batch<I>(&self, value_iter: &mut I) -> Result<Option<RecordBatch>>
+    pub fn next_batch<I>(
+        &self,
+        value_iter: &mut I,
+    ) -> Result<Option<RecordBatch>, ArrowError>
     where
-        I: Iterator<Item = Result<Value>>,
+        I: Iterator<Item = Result<Value, ArrowError>>,
     {
         let batch_size = self.options.batch_size;
         let mut rows: Vec<Value> = Vec::with_capacity(batch_size);
@@ -732,7 +736,7 @@ impl Decoder {
         rows: &[Value],
         col_name: &str,
         key_type: &DataType,
-    ) -> Result<ArrayRef> {
+    ) -> Result<ArrayRef, ArrowError> {
         match *key_type {
             DataType::Int8 => {
                 let dtype = DataType::Dictionary(
@@ -803,7 +807,7 @@ impl Decoder {
         data_type: &DataType,
         col_name: &str,
         rows: &[Value],
-    ) -> Result<ArrayRef>
+    ) -> Result<ArrayRef, ArrowError>
     where
         DT: ArrowPrimitiveType + ArrowDictionaryKeyType,
     {
@@ -923,7 +927,7 @@ impl Decoder {
         col_name: &str,
         key_type: &DataType,
         value_type: &DataType,
-    ) -> Result<ArrayRef> {
+    ) -> Result<ArrayRef, ArrowError> {
         if let DataType::Utf8 = *value_type {
             match *key_type {
                 DataType::Int8 => self.build_dictionary_array::<Int8Type>(rows, col_name),
@@ -959,7 +963,11 @@ impl Decoder {
         }
     }
 
-    fn build_boolean_array(&self, rows: &[Value], col_name: &str) -> Result<ArrayRef> {
+    fn build_boolean_array(
+        &self,
+        rows: &[Value],
+        col_name: &str,
+    ) -> Result<ArrayRef, ArrowError> {
         let mut builder = BooleanBuilder::with_capacity(rows.len());
         for row in rows {
             if let Some(value) = row.get(col_name) {
@@ -980,9 +988,9 @@ impl Decoder {
         &self,
         rows: &[Value],
         col_name: &str,
-    ) -> Result<ArrayRef>
+    ) -> Result<ArrayRef, ArrowError>
     where
-        T: ArrowNumericType,
+        T: ArrowPrimitiveType,
         T::Native: num::NumCast,
     {
         let format_string = self
@@ -1019,7 +1027,7 @@ impl Decoder {
         &self,
         rows: &[Value],
         list_field: &Field,
-    ) -> Result<ArrayRef> {
+    ) -> Result<ArrayRef, ArrowError> {
         // build list offsets
         let mut cur_offset = OffsetSize::zero();
         let list_len = rows.len();
@@ -1188,8 +1196,8 @@ impl Decoder {
         rows: &[Value],
         struct_fields: &[Field],
         projection: &Option<Vec<String>>,
-    ) -> Result<Vec<ArrayRef>> {
-        let arrays: Result<Vec<ArrayRef>> = struct_fields
+    ) -> Result<Vec<ArrayRef>, ArrowError> {
+        let arrays: Result<Vec<ArrayRef>, ArrowError> = struct_fields
             .iter()
             .filter(|field| {
                 projection
@@ -1393,7 +1401,7 @@ impl Decoder {
         field_name: &str,
         map_type: &DataType,
         struct_field: &Field,
-    ) -> Result<ArrayRef> {
+    ) -> Result<ArrayRef, ArrowError> {
         // A map has the format {"key": "value"} where key is most commonly a string,
         // but could be a string, number or boolean (🤷🏾‍♂️) (e.g. {1: "value"}).
         // A map is also represented as a flattened contiguous array, with the number
@@ -1488,7 +1496,7 @@ impl Decoder {
         &self,
         rows: &[Value],
         col_name: &str,
-    ) -> Result<ArrayRef>
+    ) -> Result<ArrayRef, ArrowError>
     where
         T::Native: num::NumCast,
         T: ArrowPrimitiveType + ArrowDictionaryKeyType,
@@ -1512,7 +1520,7 @@ impl Decoder {
     /// Read the primitive list's values into ArrayData
     fn read_primitive_list_values<T>(&self, rows: &[Value]) -> ArrayData
     where
-        T: ArrowPrimitiveType + ArrowNumericType,
+        T: ArrowPrimitiveType,
         T::Native: num::NumCast,
     {
         let values = rows
@@ -1637,7 +1645,7 @@ impl<R: Read> Reader<R> {
 
     /// Read the next batch of records
     #[allow(clippy::should_implement_trait)]
-    pub fn next(&mut self) -> Result<Option<RecordBatch>> {
+    pub fn next(&mut self) -> Result<Option<RecordBatch>, ArrowError> {
         self.decoder
             .next_batch(&mut ValueIter::new(&mut self.reader, None))
     }
@@ -1667,16 +1675,13 @@ impl ReaderBuilder {
     /// # Example
     ///
     /// ```
-    /// extern crate arrow;
-    ///
-    /// use arrow::json;
-    /// use std::fs::File;
+    /// # use std::fs::File;
     ///
-    /// fn example() -> json::Reader<File> {
+    /// fn example() -> arrow_json::Reader<File> {
     ///     let file = File::open("test/data/basic.json").unwrap();
     ///
     ///     // create a builder, inferring the schema with the first 100 records
-    ///     let builder = json::ReaderBuilder::new().infer_schema(Some(100));
+    ///     let builder = arrow_json::ReaderBuilder::new().infer_schema(Some(100));
     ///
     ///     let reader = builder.build::<File>(file).unwrap();
     ///
@@ -1723,7 +1728,7 @@ impl ReaderBuilder {
     }
 
     /// Create a new `Reader` from the `ReaderBuilder`
-    pub fn build<R>(self, source: R) -> Result<Reader<R>>
+    pub fn build<R>(self, source: R) -> Result<Reader<R>, ArrowError>
     where
         R: Read + Seek,
     {
@@ -1743,7 +1748,7 @@ impl ReaderBuilder {
 }
 
 impl<R: Read> Iterator for Reader<R> {
-    type Item = Result<RecordBatch>;
+    type Item = Result<RecordBatch, ArrowError>;
 
     fn next(&mut self) -> Option<Self::Item> {
         self.next().transpose()
@@ -1752,12 +1757,9 @@ impl<R: Read> Iterator for Reader<R> {
 
 #[cfg(test)]
 mod tests {
-    use crate::{
-        buffer::Buffer,
-        datatypes::DataType::{Dictionary, List},
-    };
-
     use super::*;
+    use arrow_buffer::ToByteSlice;
+    use arrow_schema::DataType::{Dictionary, List};
     use flate2::read::GzDecoder;
     use std::fs::File;
     use std::io::Cursor;
@@ -2076,12 +2078,8 @@ mod tests {
 
     #[test]
     fn test_invalid_json_infer_schema() {
-        let re = infer_json_schema_from_seekable(
-            &mut BufReader::new(
-                File::open("test/data/uk_cities_with_headers.csv").unwrap(),
-            ),
-            None,
-        );
+        let re =
+            infer_json_schema_from_seekable(&mut BufReader::new(Cursor::new(b"}")), None);
         assert_eq!(
             re.err().unwrap().to_string(),
             "Json error: Not valid JSON: expected value at line 1 column 1",
@@ -2096,9 +2094,7 @@ mod tests {
             true,
         )]));
         let builder = ReaderBuilder::new().with_schema(schema).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/uk_cities_with_headers.csv").unwrap())
-            .unwrap();
+        let mut reader = builder.build(Cursor::new(b"}")).unwrap();
         assert_eq!(
             reader.next().err().unwrap().to_string(),
             "Json error: Not valid JSON: expected value at line 1 column 1",
@@ -2107,7 +2103,7 @@ mod tests {
 
     #[test]
     fn test_coersion_scalar_and_list() {
-        use crate::datatypes::DataType::*;
+        use arrow_schema::DataType::*;
 
         assert_eq!(
             List(Box::new(Field::new("item", Float64, true))),
diff --git a/arrow/src/json/writer.rs b/arrow-json/src/writer.rs
similarity index 95%
rename from arrow/src/json/writer.rs
rename to arrow-json/src/writer.rs
index f622b0cce77..69f62660039 100644
--- a/arrow/src/json/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -27,18 +27,15 @@
 //! [`record_batches_to_json_rows`]:
 //!
 //! ```
-//! use std::sync::Arc;
-//!
-//! use arrow::array::Int32Array;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use arrow::record_batch::RecordBatch;
+//! # use std::sync::Arc;
+//! # use arrow_array::{Int32Array, RecordBatch};
+//! # use arrow_schema::{DataType, Field, Schema};
 //!
 //! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
 //! let a = Int32Array::from(vec![1, 2, 3]);
 //! let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
 //!
-//! let json_rows = json::writer::record_batches_to_json_rows(&[batch]).unwrap();
+//! let json_rows = arrow_json::writer::record_batches_to_json_rows(&[batch]).unwrap();
 //! assert_eq!(
 //!     serde_json::Value::Object(json_rows[1].clone()),
 //!     serde_json::json!({"a": 2}),
@@ -51,12 +48,9 @@
 //! [`LineDelimitedWriter`]:
 //!
 //! ```
-//! use std::sync::Arc;
-//!
-//! use arrow::array::Int32Array;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use arrow::record_batch::RecordBatch;
+//! # use std::sync::Arc;
+//! # use arrow_array::{Int32Array, RecordBatch};
+//! # use arrow_schema::{DataType, Field, Schema};
 //!
 //! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
 //! let a = Int32Array::from(vec![1, 2, 3]);
@@ -64,7 +58,7 @@
 //!
 //! // Write the record batch out as JSON
 //! let buf = Vec::new();
-//! let mut writer = json::LineDelimitedWriter::new(buf);
+//! let mut writer = arrow_json::LineDelimitedWriter::new(buf);
 //! writer.write_batches(&vec![batch]).unwrap();
 //! writer.finish().unwrap();
 //!
@@ -80,12 +74,9 @@
 //! [`ArrayWriter`]:
 //!
 //! ```
-//! use std::sync::Arc;
-//!
-//! use arrow::array::Int32Array;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use arrow::record_batch::RecordBatch;
+//! # use std::sync::Arc;
+//! # use arrow_array::{Int32Array, RecordBatch};
+//! use arrow_schema::{DataType, Field, Schema};
 //!
 //! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
 //! let a = Int32Array::from(vec![1, 2, 3]);
@@ -93,7 +84,7 @@
 //!
 //! // Write the record batch out as a JSON array
 //! let buf = Vec::new();
-//! let mut writer = json::ArrayWriter::new(buf);
+//! let mut writer = arrow_json::ArrayWriter::new(buf);
 //! writer.write_batches(&vec![batch]).unwrap();
 //! writer.finish().unwrap();
 //!
@@ -108,13 +99,13 @@ use std::{fmt::Debug, io::Write};
 use serde_json::map::Map as JsonMap;
 use serde_json::Value;
 
-use crate::array::*;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::json::JsonSerializable;
-use crate::record_batch::RecordBatch;
+use crate::JsonSerializable;
+use arrow_array::cast::*;
+use arrow_array::types::*;
+use arrow_array::*;
+use arrow_schema::*;
 
-fn primitive_array_to_json<T>(array: &ArrayRef) -> Result<Vec<Value>>
+fn primitive_array_to_json<T>(array: &ArrayRef) -> Result<Vec<Value>, ArrowError>
 where
     T: ArrowPrimitiveType,
     T::Native: JsonSerializable,
@@ -131,7 +122,7 @@ where
 fn struct_array_to_jsonmap_array(
     array: &StructArray,
     row_count: usize,
-) -> Result<Vec<JsonMap<String, Value>>> {
+) -> Result<Vec<JsonMap<String, Value>>, ArrowError> {
     let inner_col_names = array.column_names();
 
     let mut inner_objs = iter::repeat(JsonMap::new())
@@ -150,7 +141,7 @@ fn struct_array_to_jsonmap_array(
 }
 
 /// Converts an arrow [`ArrayRef`] into a `Vec` of Serde JSON [`serde_json::Value`]'s
-pub fn array_to_json_array(array: &ArrayRef) -> Result<Vec<Value>> {
+pub fn array_to_json_array(array: &ArrayRef) -> Result<Vec<Value>, ArrowError> {
     match array.data_type() {
         DataType::Null => Ok(iter::repeat(Value::Null).take(array.len()).collect()),
         DataType::Boolean => Ok(as_boolean_array(array)
@@ -269,7 +260,7 @@ fn set_column_for_json_rows(
     row_count: usize,
     array: &ArrayRef,
     col_name: &str,
-) -> Result<()> {
+) -> Result<(), ArrowError> {
     match array.data_type() {
         DataType::Int8 => {
             set_column_by_primitive_type::<Int8Type>(rows, row_count, array, col_name);
@@ -474,7 +465,7 @@ fn set_column_for_json_rows(
             rows.iter_mut()
                 .zip(listarr.iter())
                 .take(row_count)
-                .try_for_each(|(row, maybe_value)| -> Result<()> {
+                .try_for_each(|(row, maybe_value)| -> Result<(), ArrowError> {
                     if let Some(v) = maybe_value {
                         row.insert(
                             col_name.to_string(),
@@ -489,7 +480,7 @@ fn set_column_for_json_rows(
             rows.iter_mut()
                 .zip(listarr.iter())
                 .take(row_count)
-                .try_for_each(|(row, maybe_value)| -> Result<()> {
+                .try_for_each(|(row, maybe_value)| -> Result<(), ArrowError> {
                     if let Some(v) = maybe_value {
                         let val = array_to_json_array(&v)?;
                         row.insert(col_name.to_string(), Value::Array(val));
@@ -499,7 +490,7 @@ fn set_column_for_json_rows(
         }
         DataType::Dictionary(_, value_type) => {
             let slice = array.slice(0, row_count);
-            let hydrated = crate::compute::kernels::cast::cast(&slice, value_type)
+            let hydrated = arrow_cast::cast::cast(&slice, value_type)
                 .expect("cannot cast dictionary to underlying values");
             set_column_for_json_rows(rows, row_count, &hydrated, col_name)?;
         }
@@ -555,7 +546,7 @@ fn set_column_for_json_rows(
 /// [`JsonMap`]s (objects)
 pub fn record_batches_to_json_rows(
     batches: &[RecordBatch],
-) -> Result<Vec<JsonMap<String, Value>>> {
+) -> Result<Vec<JsonMap<String, Value>>, ArrowError> {
     let mut rows: Vec<JsonMap<String, Value>> = iter::repeat(JsonMap::new())
         .take(batches.iter().map(|b| b.num_rows()).sum())
         .collect();
@@ -581,24 +572,28 @@ pub fn record_batches_to_json_rows(
 pub trait JsonFormat: Debug + Default {
     #[inline]
     /// write any bytes needed at the start of the file to the writer
-    fn start_stream<W: Write>(&self, _writer: &mut W) -> Result<()> {
+    fn start_stream<W: Write>(&self, _writer: &mut W) -> Result<(), ArrowError> {
         Ok(())
     }
 
     #[inline]
     /// write any bytes needed for the start of each row
-    fn start_row<W: Write>(&self, _writer: &mut W, _is_first_row: bool) -> Result<()> {
+    fn start_row<W: Write>(
+        &self,
+        _writer: &mut W,
+        _is_first_row: bool,
+    ) -> Result<(), ArrowError> {
         Ok(())
     }
 
     #[inline]
     /// write any bytes needed for the end of each row
-    fn end_row<W: Write>(&self, _writer: &mut W) -> Result<()> {
+    fn end_row<W: Write>(&self, _writer: &mut W) -> Result<(), ArrowError> {
         Ok(())
     }
 
     /// write any bytes needed for the start of each row
-    fn end_stream<W: Write>(&self, _writer: &mut W) -> Result<()> {
+    fn end_stream<W: Write>(&self, _writer: &mut W) -> Result<(), ArrowError> {
         Ok(())
     }
 }
@@ -614,7 +609,7 @@ pub trait JsonFormat: Debug + Default {
 pub struct LineDelimited {}
 
 impl JsonFormat for LineDelimited {
-    fn end_row<W: Write>(&self, writer: &mut W) -> Result<()> {
+    fn end_row<W: Write>(&self, writer: &mut W) -> Result<(), ArrowError> {
         writer.write_all(b"\n")?;
         Ok(())
     }
@@ -629,19 +624,23 @@ impl JsonFormat for LineDelimited {
 pub struct JsonArray {}
 
 impl JsonFormat for JsonArray {
-    fn start_stream<W: Write>(&self, writer: &mut W) -> Result<()> {
+    fn start_stream<W: Write>(&self, writer: &mut W) -> Result<(), ArrowError> {
         writer.write_all(b"[")?;
         Ok(())
     }
 
-    fn start_row<W: Write>(&self, writer: &mut W, is_first_row: bool) -> Result<()> {
+    fn start_row<W: Write>(
+        &self,
+        writer: &mut W,
+        is_first_row: bool,
+    ) -> Result<(), ArrowError> {
         if !is_first_row {
             writer.write_all(b",")?;
         }
         Ok(())
     }
 
-    fn end_stream<W: Write>(&self, writer: &mut W) -> Result<()> {
+    fn end_stream<W: Write>(&self, writer: &mut W) -> Result<(), ArrowError> {
         writer.write_all(b"]")?;
         Ok(())
     }
@@ -692,7 +691,7 @@ where
     }
 
     /// Write a single JSON row to the output writer
-    pub fn write_row(&mut self, row: &Value) -> Result<()> {
+    pub fn write_row(&mut self, row: &Value) -> Result<(), ArrowError> {
         let is_first_row = !self.started;
         if !self.started {
             self.format.start_stream(&mut self.writer)?;
@@ -709,7 +708,7 @@ where
     }
 
     /// Convert the `RecordBatch` into JSON rows, and write them to the output
-    pub fn write(&mut self, batch: RecordBatch) -> Result<()> {
+    pub fn write(&mut self, batch: RecordBatch) -> Result<(), ArrowError> {
         for row in record_batches_to_json_rows(&[batch])? {
             self.write_row(&Value::Object(row))?;
         }
@@ -717,7 +716,7 @@ where
     }
 
     /// Convert the [`RecordBatch`] into JSON rows, and write them to the output
-    pub fn write_batches(&mut self, batches: &[RecordBatch]) -> Result<()> {
+    pub fn write_batches(&mut self, batches: &[RecordBatch]) -> Result<(), ArrowError> {
         for row in record_batches_to_json_rows(batches)? {
             self.write_row(&Value::Object(row))?;
         }
@@ -727,7 +726,7 @@ where
     /// Finishes the output stream. This function must be called after
     /// all record batches have been produced. (e.g. producing the final `']'` if writing
     /// arrays.
-    pub fn finish(&mut self) -> Result<()> {
+    pub fn finish(&mut self) -> Result<(), ArrowError> {
         if self.started && !self.finished {
             self.format.end_stream(&mut self.writer)?;
             self.finished = true;
@@ -743,15 +742,14 @@ where
 
 #[cfg(test)]
 mod tests {
-    use std::convert::TryFrom;
     use std::fs::{read_to_string, File};
     use std::sync::Arc;
 
+    use crate::reader::*;
+    use arrow_buffer::{Buffer, ToByteSlice};
+    use arrow_data::ArrayData;
     use serde_json::json;
 
-    use crate::buffer::*;
-    use crate::json::reader::*;
-
     use super::*;
 
     /// Asserts that the NDJSON `input` is semantically identical to `expected`
diff --git a/arrow/test/data/arrays.json b/arrow-json/test/data/arrays.json
similarity index 100%
rename from arrow/test/data/arrays.json
rename to arrow-json/test/data/arrays.json
diff --git a/arrow/test/data/basic.json b/arrow-json/test/data/basic.json
similarity index 100%
rename from arrow/test/data/basic.json
rename to arrow-json/test/data/basic.json
diff --git a/arrow/test/data/basic_nulls.json b/arrow-json/test/data/basic_nulls.json
similarity index 100%
rename from arrow/test/data/basic_nulls.json
rename to arrow-json/test/data/basic_nulls.json
diff --git a/arrow/test/data/list_string_dict_nested.json b/arrow-json/test/data/list_string_dict_nested.json
similarity index 100%
rename from arrow/test/data/list_string_dict_nested.json
rename to arrow-json/test/data/list_string_dict_nested.json
diff --git a/arrow/test/data/list_string_dict_nested_nulls.json b/arrow-json/test/data/list_string_dict_nested_nulls.json
similarity index 100%
rename from arrow/test/data/list_string_dict_nested_nulls.json
rename to arrow-json/test/data/list_string_dict_nested_nulls.json
diff --git a/arrow/test/data/mixed_arrays.json b/arrow-json/test/data/mixed_arrays.json
similarity index 100%
rename from arrow/test/data/mixed_arrays.json
rename to arrow-json/test/data/mixed_arrays.json
diff --git a/arrow/test/data/mixed_arrays.json.gz b/arrow-json/test/data/mixed_arrays.json.gz
similarity index 100%
rename from arrow/test/data/mixed_arrays.json.gz
rename to arrow-json/test/data/mixed_arrays.json.gz
diff --git a/arrow/test/data/nested_structs.json b/arrow-json/test/data/nested_structs.json
similarity index 100%
rename from arrow/test/data/nested_structs.json
rename to arrow-json/test/data/nested_structs.json
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index cc9421de710..d5392673e29 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -44,16 +44,15 @@ ahash = { version = "0.8", default-features = false, features = ["compile-time-r
 ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
 
 [dependencies]
+arrow-array = { version = "26.0.0", path = "../arrow-array" }
 arrow-buffer = { version = "26.0.0", path = "../arrow-buffer" }
 arrow-cast = { version = "26.0.0", path = "../arrow-cast" }
 arrow-csv = { version = "26.0.0", path = "../arrow-csv", optional = true }
 arrow-data = { version = "26.0.0", path = "../arrow-data" }
+arrow-ipc = { version = "26.0.0", path = "../arrow-ipc", optional = true }
+arrow-json = { version = "26.0.0", path = "../arrow-json", optional = true }
 arrow-schema = { version = "26.0.0", path = "../arrow-schema" }
-arrow-array = { version = "26.0.0", path = "../arrow-array" }
 arrow-select = { version = "26.0.0", path = "../arrow-select" }
-arrow-ipc = { version = "26.0.0", path = "../arrow-ipc", optional = true }
-serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
-indexmap = { version = "1.9", default-features = false, features = ["std"] }
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
 num = { version = "0.4", default-features = false, features = ["std"] }
 half = { version = "2.1", default-features = false, features = ["num-traits"] }
@@ -64,9 +63,8 @@ packed_simd = { version = "0.3", default-features = false, optional = true, pack
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
 comfy-table = { version = "6.0", optional = true, default-features = false }
 pyo3 = { version = "0.17", default-features = false, optional = true }
-lexical-core = { version = "^0.8", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] }
 multiversion = { version = "0.6.1", default-features = false }
-bitflags = { version = "1.2.1", default-features = false }
+bitflags = { version = "1.2.1", default-features = false, optional = true }
 
 [package.metadata.docs.rs]
 features = ["prettyprint", "ipc_compression", "dyn_cmp_dict", "ffi", "pyarrow"]
@@ -76,7 +74,7 @@ default = ["csv", "ipc", "json"]
 ipc_compression = ["ipc", "arrow-ipc/lz4", "arrow-ipc/zstd"]
 csv = ["arrow-csv"]
 ipc = ["arrow-ipc"]
-json = ["serde_json"]
+json = ["arrow-json"]
 simd = ["packed_simd"]
 prettyprint = ["comfy-table"]
 # The test utils feature enables code used in benchmarks and tests but
@@ -90,7 +88,7 @@ pyarrow = ["pyo3", "ffi"]
 # but is run as part of our CI checks
 force_validate = ["arrow-data/force_validate"]
 # Enable ffi support
-ffi = []
+ffi = ["bitflags"]
 # Enable dyn-comparison of dictionary arrays with other arrays
 # Note: this does not impact comparison against scalars
 dyn_cmp_dict = []
@@ -100,9 +98,9 @@ dyn_arith_dict = []
 chrono-tz = ["arrow-array/chrono-tz"]
 
 [dev-dependencies]
-rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
+chrono = { version = "0.4", default-features = false, features = ["clock"] }
 criterion = { version = "0.4", default-features = false }
-flate2 = { version = "1", default-features = false, features = ["rust_backend"] }
+rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
 tempfile = { version = "3", default-features = false }
 
 [build-dependencies]
diff --git a/arrow/benches/json_reader.rs b/arrow/benches/json_reader.rs
index ef3ddf0537b..7bc3f4179fe 100644
--- a/arrow/benches/json_reader.rs
+++ b/arrow/benches/json_reader.rs
@@ -15,13 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-extern crate arrow;
-extern crate criterion;
-
 use criterion::*;
 
 use arrow::datatypes::*;
-use arrow::json::ReaderBuilder;
+use arrow_json::ReaderBuilder;
 use std::io::Cursor;
 use std::sync::Arc;
 
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index d1e0095840a..1b2ff0684a6 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -34,7 +34,9 @@
 //! * [`arrow-array`][arrow_array] - type-safe arrow array abstractions
 //! * [`arrow-buffer`][arrow_buffer] - buffer abstractions for arrow arrays
 //! * [`arrow-cast`][arrow_cast] - cast kernels for arrow arrays
+//! * [`arrow-csv`][arrow_csv] - read/write CSV to arrow format
 //! * [`arrow-data`][arrow_data] - the underlying data of arrow arrays
+//! * [`arrow-json`][arrow_json] - read/write JSON to arrow format
 //! * [`arrow-schema`][arrow_schema] - the logical types for arrow arrays
 //! * [`arrow-select`][arrow_select] - selection kernels for arrow arrays
 //!
@@ -315,8 +317,8 @@ pub mod ffi;
 pub mod ffi_stream;
 #[cfg(feature = "ipc")]
 pub use arrow_ipc as ipc;
-#[cfg(feature = "serde_json")]
-pub mod json;
+#[cfg(feature = "json")]
+pub use arrow_json as json;
 #[cfg(feature = "pyarrow")]
 pub mod pyarrow;
 
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index bafee11edb7..0ca2ab91a5e 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -3,6 +3,7 @@ testing/*
 target/*
 dev/release/rat_exclude_files.txt
 arrow/test/data/*
+arrow-json/test/data/*
 arrow/test/dependency/*
 arrow-integration-test/data/*
 parquet_derive/test/dependency/*