Skip to content

Commit

Permalink
Added JSON Type
Browse files Browse the repository at this point in the history
Closes #1251
  • Loading branch information
fulmicoton committed Feb 22, 2022
1 parent d37633e commit dc253cd
Show file tree
Hide file tree
Showing 26 changed files with 1,777 additions and 336 deletions.
37 changes: 37 additions & 0 deletions benches/index-bench.rs
Expand Up @@ -21,6 +21,11 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
schema_builder.add_text_field("severity", STRING | STORED);
schema_builder.build()
};
let dynamic_schema = {
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
schema_builder.add_json_field("json", TEXT);
schema_builder.build()
};

let mut group = c.benchmark_group("index-hdfs");
group.sample_size(20);
Expand Down Expand Up @@ -74,6 +79,38 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
index_writer.commit().unwrap();
})
});
group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
b.iter(|| {
let index = Index::create_in_ram(dynamic_schema.clone());
let json_field = dynamic_schema.get_field("json").unwrap();
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
for _ in 0..NUM_REPEATS {
for doc_json in HDFS_LOGS.trim().split("\n") {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
}
index_writer.commit().unwrap();
})
});
group.bench_function("index-hdfs-with-commit-json-without-docstore", |b| {
b.iter(|| {
let index = Index::create_in_ram(dynamic_schema.clone());
let json_field = dynamic_schema.get_field("json").unwrap();
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
for _ in 0..NUM_REPEATS {
for doc_json in HDFS_LOGS.trim().split("\n") {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
}
index_writer.commit().unwrap();
})
});
}

criterion_group! {
Expand Down
61 changes: 61 additions & 0 deletions examples/json_field.rs
@@ -0,0 +1,61 @@
// # Json field example
//
// This example shows how the json field can be used
// to make tantivy partially schemaless.

use tantivy::collector::{Count, TopDocs};
use tantivy::query::QueryParser;
use tantivy::schema::*;
use tantivy::{doc, Index, ReloadPolicy};

fn main() -> tantivy::Result<()> {
// # Defining the schema
//
// We need two fields:
// - a timestamp
// - a json object field
let mut schema_builder = Schema::builder();
schema_builder.add_date_field("timestamp", FAST | STORED);
let event_type = schema_builder.add_text_field("event_type", STRING | STORED);
let attributes = schema_builder.add_json_field("attributes", STORED | TEXT);
let schema = schema_builder.build();

// # Indexing documents
let index = Index::create_in_ram(schema.clone());

let mut index_writer = index.writer(50_000_000)?;
let doc = schema.parse_document(
r#"{
"timestamp": "2022-02-22T23:20:50.53Z",
"event_type": "click",
"attributes": {
"target": "submit-button",
"cart": {"product_id": 103},
"description": "the best"
}
}"#,
)?;
index_writer.add_document(doc)?;
index_writer.commit()?;

let reader = index.reader()?;
let searcher = reader.searcher();

let query_parser = QueryParser::for_index(&index, vec![event_type, attributes]);
{
let query = query_parser.parse_query("target:submit-button")?;
let count_docs = searcher.search(&*query, &Count)?;
assert_eq!(count_docs, 1);
}
{
let query = query_parser.parse_query("cart.product_id:103")?;
let count_docs = searcher.search(&*query, &Count)?;
assert_eq!(count_docs, 1);
}
{
let query = query_parser.parse_query("cart.product_id:103")?;
let count_docs = searcher.search(&*query, &Count)?;
assert_eq!(count_docs, 1);
}
Ok(())
}
4 changes: 2 additions & 2 deletions src/aggregation/mod.rs
Expand Up @@ -243,7 +243,7 @@ pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 {
Type::U64 => val as f64,
Type::I64 => i64::from_u64(val) as f64,
Type::F64 => f64::from_u64(val),
Type::Date | Type::Str | Type::Facet | Type::Bytes => unimplemented!(),
Type::Date | Type::Str | Type::Facet | Type::Bytes | Type::Json => unimplemented!(),
}
}

Expand All @@ -262,7 +262,7 @@ pub(crate) fn f64_to_fastfield_u64(val: f64, field_type: &Type) -> u64 {
Type::U64 => val as u64,
Type::I64 => (val as i64).to_u64(),
Type::F64 => val.to_u64(),
Type::Date | Type::Str | Type::Facet | Type::Bytes => unimplemented!(),
Type::Date | Type::Str | Type::Facet | Type::Bytes | Type::Json => unimplemented!(),
}
}

Expand Down

0 comments on commit dc253cd

Please sign in to comment.