diff --git a/search_aggs_bucket_multi_terms.go b/search_aggs_bucket_multi_terms.go new file mode 100644 index 00000000..de70fb07 --- /dev/null +++ b/search_aggs_bucket_multi_terms.go @@ -0,0 +1,313 @@ +// Copyright 2012-present Oliver Eilhard. All rights reserved. +// Use of this source code is governed by a MIT-license. +// See http://olivere.mit-license.org/license.txt for details. + +package elastic + +import "fmt" + +// A multi-bucket value source based aggregation where buckets are dynamically built - one per +// unique set of values. The multi terms aggregation is very similar to the terms aggregation, +// however in most cases it will be slower than the terms aggregation and will consume more +// memory. Therefore, if the same set of fields is constantly used, it would be more efficient to +// index a combined key for this fields as a separate field and use the terms aggregation on this field. +// +// See: https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-multi-terms-aggregation.html +type MultiTermsAggregation struct { + multiTerms []*MultiTerm + subAggregations map[string]Aggregation + meta map[string]interface{} + + size *int + shardSize *int + minDocCount *int + shardMinDocCount *int + collectionMode string + showTermDocCountError *bool + order []MultiTermsOrder +} + +func NewMultiTermsAggregation() *MultiTermsAggregation { + return &MultiTermsAggregation{ + subAggregations: make(map[string]Aggregation), + } +} + +func (a *MultiTermsAggregation) Terms(multiTerm ...*MultiTerm) *MultiTermsAggregation { + a.multiTerms = multiTerm + return a +} + +func (a *MultiTermsAggregation) SubAggregation(name string, subAggregation Aggregation) *MultiTermsAggregation { + a.subAggregations[name] = subAggregation + return a +} + +// Meta sets the meta data to be included in the aggregation response. +func (a *MultiTermsAggregation) Meta(metaData map[string]interface{}) *MultiTermsAggregation { + a.meta = metaData + return a +} + +func (a *MultiTermsAggregation) Size(size int) *MultiTermsAggregation { + a.size = &size + return a +} + +func (a *MultiTermsAggregation) ShardSize(shardSize int) *MultiTermsAggregation { + a.shardSize = &shardSize + return a +} + +func (a *MultiTermsAggregation) MinDocCount(minDocCount int) *MultiTermsAggregation { + a.minDocCount = &minDocCount + return a +} + +func (a *MultiTermsAggregation) ShardMinDocCount(shardMinDocCount int) *MultiTermsAggregation { + a.shardMinDocCount = &shardMinDocCount + return a +} + +func (a *MultiTermsAggregation) Order(order string, asc bool) *MultiTermsAggregation { + a.order = append(a.order, MultiTermsOrder{Field: order, Ascending: asc}) + return a +} + +func (a *MultiTermsAggregation) OrderByCount(asc bool) *MultiTermsAggregation { + // "order" : { "_count" : "asc" } + a.order = append(a.order, MultiTermsOrder{Field: "_count", Ascending: asc}) + return a +} + +func (a *MultiTermsAggregation) OrderByCountAsc() *MultiTermsAggregation { + return a.OrderByCount(true) +} + +func (a *MultiTermsAggregation) OrderByCountDesc() *MultiTermsAggregation { + return a.OrderByCount(false) +} + +func (a *MultiTermsAggregation) OrderByKey(asc bool) *MultiTermsAggregation { + // "order" : { "_term" : "asc" } + a.order = append(a.order, MultiTermsOrder{Field: "_key", Ascending: asc}) + return a +} + +func (a *MultiTermsAggregation) OrderByKeyAsc() *MultiTermsAggregation { + return a.OrderByKey(true) +} + +func (a *MultiTermsAggregation) OrderByKeyDesc() *MultiTermsAggregation { + return a.OrderByKey(false) +} + +// OrderByAggregation creates a bucket ordering strategy which sorts buckets +// based on a single-valued calc get. +func (a *MultiTermsAggregation) OrderByAggregation(aggName string, asc bool) *MultiTermsAggregation { + // { + // "aggs": { + // "genres_and_products": { + // "multi_terms": { + // "terms": [ + // { + // "field": "genre" + // }, + // { + // "field": "product" + // } + // ], + // "order": { + // "total_quantity": "desc" + // } + // }, + // "aggs": { + // "total_quantity": { + // "sum": { + // "field": "quantity" + // } + // } + // } + // } + // } + // } + a.order = append(a.order, MultiTermsOrder{Field: aggName, Ascending: asc}) + return a +} + +// OrderByAggregationAndMetric creates a bucket ordering strategy which +// sorts buckets based on a multi-valued calc get. +func (a *MultiTermsAggregation) OrderByAggregationAndMetric(aggName, metric string, asc bool) *MultiTermsAggregation { + // { + // "aggs": { + // "genres_and_products": { + // "multi_terms": { + // "terms": [ + // { + // "field": "genre" + // }, + // { + // "field": "product" + // } + // ], + // "order": { + // "total_quantity": "desc" + // } + // }, + // "aggs": { + // "total_quantity": { + // "sum": { + // "field": "quantity" + // } + // } + // } + // } + // } + // } + a.order = append(a.order, MultiTermsOrder{Field: aggName + "." + metric, Ascending: asc}) + return a +} + +// Collection mode can be depth_first or breadth_first as of 1.4.0. +func (a *MultiTermsAggregation) CollectionMode(collectionMode string) *MultiTermsAggregation { + a.collectionMode = collectionMode + return a +} + +func (a *MultiTermsAggregation) ShowTermDocCountError(showTermDocCountError bool) *MultiTermsAggregation { + a.showTermDocCountError = &showTermDocCountError + return a +} + +func (a *MultiTermsAggregation) Source() (interface{}, error) { + // Example: + // { + // "aggs": { + // "genres_and_products": { + // "multi_terms": { + // "terms": [ + // { + // "field": "genre" + // }, + // { + // "field": "product" + // } + // ] + // } + // } + // } + // } + // This method returns only the "multi_terms": { "terms": [ { "field": "genre" }, { "field": "product" } ] } part. + + source := make(map[string]interface{}) + opts := make(map[string]interface{}) + source["multi_terms"] = opts + + // ValuesSourceAggregationBuilder + terms := make([]interface{}, len(a.multiTerms)) + for i := range a.multiTerms { + if a.multiTerms[i] == nil { + return nil, fmt.Errorf("expected a multiterm but found a nil multiterm") + } + s, err := a.multiTerms[i].Source() + if err != nil { + return nil, err + } + terms[i] = s + } + opts["terms"] = terms + + // TermsBuilder + if a.size != nil && *a.size >= 0 { + opts["size"] = *a.size + } + if a.shardSize != nil && *a.shardSize >= 0 { + opts["shard_size"] = *a.shardSize + } + if a.minDocCount != nil && *a.minDocCount >= 0 { + opts["min_doc_count"] = *a.minDocCount + } + if a.shardMinDocCount != nil && *a.shardMinDocCount >= 0 { + opts["shard_min_doc_count"] = *a.shardMinDocCount + } + if a.showTermDocCountError != nil { + opts["show_term_doc_count_error"] = *a.showTermDocCountError + } + if a.collectionMode != "" { + opts["collect_mode"] = a.collectionMode + } + if len(a.order) > 0 { + var orderSlice []interface{} + for _, order := range a.order { + src, err := order.Source() + if err != nil { + return nil, err + } + orderSlice = append(orderSlice, src) + } + opts["order"] = orderSlice + } + + // AggregationBuilder (SubAggregations) + if len(a.subAggregations) > 0 { + aggsMap := make(map[string]interface{}) + source["aggregations"] = aggsMap + for name, aggregate := range a.subAggregations { + src, err := aggregate.Source() + if err != nil { + return nil, err + } + aggsMap[name] = src + } + } + + // Add Meta data if available + if len(a.meta) > 0 { + source["meta"] = a.meta + } + + return source, nil +} + +// MultiTermsOrder specifies a single order field for a multi terms aggregation. +type MultiTermsOrder struct { + Field string + Ascending bool +} + +// Source returns serializable JSON of the MultiTermsOrder. +func (order *MultiTermsOrder) Source() (interface{}, error) { + source := make(map[string]string) + if order.Ascending { + source[order.Field] = "asc" + } else { + source[order.Field] = "desc" + } + return source, nil +} + +// MultiTerm specifies a single term field for a multi terms aggregation. +type MultiTerm struct { + field string + missing interface{} +} + +// Source returns serializable JSON of the MultiTerm. +func (term *MultiTerm) Source() (interface{}, error) { + source := make(map[string]interface{}) + source["field"] = term.field + if term.missing != nil { + source["missing"] = term.missing + } + return source, nil +} + +// Missing configures the value to use when document miss a value +func (term *MultiTerm) Missing(missing interface{}) *MultiTerm { + term.missing = missing + return term +} + +func NewMultiTerm(field string) *MultiTerm { + return &MultiTerm{field: field} +} diff --git a/search_aggs_bucket_multi_terms_test.go b/search_aggs_bucket_multi_terms_test.go new file mode 100644 index 00000000..93a47f71 --- /dev/null +++ b/search_aggs_bucket_multi_terms_test.go @@ -0,0 +1,104 @@ +// Copyright 2012-present Oliver Eilhard. All rights reserved. +// Use of this source code is governed by a MIT-license. +// See http://olivere.mit-license.org/license.txt for details. + +package elastic + +import ( + "encoding/json" + "testing" +) + +func TestMultiTermsAggregation(t *testing.T) { + agg := NewMultiTermsAggregation().Terms(NewMultiTerm("genre"), NewMultiTerm("product")) + src, err := agg.Source() + if err != nil { + t.Fatal(err) + } + data, err := json.Marshal(src) + if err != nil { + t.Fatalf("marshaling to JSON failed: %v", err) + } + got := string(data) + expected := `{"multi_terms":{"terms":[{"field":"genre"},{"field":"product"}]}}` + if got != expected { + t.Errorf("expected\n%s\n,got:\n%s", expected, got) + } +} + +func TestMultiTermsAggregationWithSubAggregation(t *testing.T) { + subAgg := NewAvgAggregation().Field("height") + agg := NewMultiTermsAggregation().Terms(NewMultiTerm("genre"), NewMultiTerm("product")).Size(10). + OrderByAggregation("avg_height", false) + agg = agg.SubAggregation("avg_height", subAgg) + src, err := agg.Source() + if err != nil { + t.Fatal(err) + } + data, err := json.Marshal(src) + if err != nil { + t.Fatalf("marshaling to JSON failed: %v", err) + } + got := string(data) + expected := `{"aggregations":{"avg_height":{"avg":{"field":"height"}}},"multi_terms":{"order":[{"avg_height":"desc"}],"size":10,"terms":[{"field":"genre"},{"field":"product"}]}}` + if got != expected { + t.Errorf("expected\n%s\n,got:\n%s", expected, got) + } +} + +func TestMultiTermsAggregationWithMultipleSubAggregation(t *testing.T) { + subAgg1 := NewAvgAggregation().Field("height") + subAgg2 := NewAvgAggregation().Field("width") + agg := NewMultiTermsAggregation().Terms(NewMultiTerm("genre"), NewMultiTerm("product")).Size(10). + OrderByAggregation("avg_height", false) + agg = agg.SubAggregation("avg_height", subAgg1) + agg = agg.SubAggregation("avg_width", subAgg2) + src, err := agg.Source() + if err != nil { + t.Fatal(err) + } + data, err := json.Marshal(src) + if err != nil { + t.Fatalf("marshaling to JSON failed: %v", err) + } + got := string(data) + expected := `{"aggregations":{"avg_height":{"avg":{"field":"height"}},"avg_width":{"avg":{"field":"width"}}},"multi_terms":{"order":[{"avg_height":"desc"}],"size":10,"terms":[{"field":"genre"},{"field":"product"}]}}` + if got != expected { + t.Errorf("expected\n%s\n,got:\n%s", expected, got) + } +} + +func TestMultiTermsAggregationWithMetaData(t *testing.T) { + agg := NewMultiTermsAggregation().Terms(NewMultiTerm("genre"), NewMultiTerm("product")).Size(10).OrderByKeyDesc() + agg = agg.Meta(map[string]interface{}{"name": "Oliver"}) + src, err := agg.Source() + if err != nil { + t.Fatal(err) + } + data, err := json.Marshal(src) + if err != nil { + t.Fatalf("marshaling to JSON failed: %v", err) + } + got := string(data) + expected := `{"meta":{"name":"Oliver"},"multi_terms":{"order":[{"_key":"desc"}],"size":10,"terms":[{"field":"genre"},{"field":"product"}]}}` + if got != expected { + t.Errorf("expected\n%s\n,got:\n%s", expected, got) + } +} + +func TestMultiTermsAggregationWithMissing(t *testing.T) { + agg := NewMultiTermsAggregation().Terms(NewMultiTerm("genre"), NewMultiTerm("product").Missing("n/a")).Size(10) + src, err := agg.Source() + if err != nil { + t.Fatal(err) + } + data, err := json.Marshal(src) + if err != nil { + t.Fatalf("marshaling to JSON failed: %v", err) + } + got := string(data) + expected := `{"multi_terms":{"size":10,"terms":[{"field":"genre"},{"field":"product","missing":"n/a"}]}}` + if got != expected { + t.Errorf("expected\n%s\n,got:\n%s", expected, got) + } +}