Skip to content

Commit 1296258

Browse files
authored
[ES|QL] Add suggested_cast (#127139)
Adds a `suggested_cast` for unsupported fields based on `original_types` for specific cases: - when a field is mapped to `datetime` and `date_nanos`, suggests `date_nanos` - when a field is mapped to `aggregate_metric_double` and other numerics, suggests `aggregate_metric_double` - when a field is unsupported anywhere, suggests nothing - suggests `keyword` in all other cases For example, a field that is mapped to `aggregate_metric_double` in one index, but `double` in another will have a response like: ``` { "name" : "agg_metric", "type" : "unsupported", "original_types" : [ "aggregate_metric_double", "double" ], "suggested_cast" : "aggregate_metric_double" } ``` A field that is completely unsupported will have a response like: ``` { "name" : "binary_field", "type" : "unsupported", "original_types" : [ "binary" ] } ```
1 parent cd4fcbf commit 1296258

File tree

10 files changed

+333
-7
lines changed

10 files changed

+333
-7
lines changed

docs/changelog/127139.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127139
2+
summary: Add `suggested_cast`
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java

+23
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,29 @@ public boolean isDate() {
708708
};
709709
}
710710

711+
public static DataType suggestedCast(Set<DataType> originalTypes) {
712+
if (originalTypes.isEmpty() || originalTypes.contains(UNSUPPORTED)) {
713+
return null;
714+
}
715+
if (originalTypes.contains(DATE_NANOS) && originalTypes.contains(DATETIME) && originalTypes.size() == 2) {
716+
return DATE_NANOS;
717+
}
718+
if (originalTypes.contains(AGGREGATE_METRIC_DOUBLE)) {
719+
boolean allNumeric = true;
720+
for (DataType type : originalTypes) {
721+
if (type.isNumeric() == false && type != AGGREGATE_METRIC_DOUBLE) {
722+
allNumeric = false;
723+
break;
724+
}
725+
}
726+
if (allNumeric) {
727+
return AGGREGATE_METRIC_DOUBLE;
728+
}
729+
}
730+
731+
return KEYWORD;
732+
}
733+
711734
/**
712735
* Named parameters with default values. It's just easier to do this with
713736
* a builder in java....

x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java

+118
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.elasticsearch.xcontent.XContentBuilder;
2828
import org.elasticsearch.xcontent.XContentType;
2929
import org.elasticsearch.xcontent.json.JsonXContent;
30+
import org.elasticsearch.xpack.esql.core.type.DataType;
3031
import org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase;
3132
import org.elasticsearch.xpack.esql.tools.ProfileParser;
3233
import org.hamcrest.Matchers;
@@ -40,11 +41,14 @@
4041
import java.nio.charset.StandardCharsets;
4142
import java.util.ArrayList;
4243
import java.util.Arrays;
44+
import java.util.Comparator;
4345
import java.util.HashSet;
4446
import java.util.List;
4547
import java.util.Locale;
4648
import java.util.Map;
4749
import java.util.Set;
50+
import java.util.stream.Collectors;
51+
import java.util.stream.Stream;
4852

4953
import static org.elasticsearch.test.ListMatcher.matchesList;
5054
import static org.elasticsearch.test.MapMatcher.assertMap;
@@ -648,6 +652,120 @@ public void testForceSleepsProfile() throws IOException {
648652
}
649653
}
650654

655+
public void testSuggestedCast() throws IOException {
656+
// TODO: Figure out how best to make sure we don't leave out new types
657+
Map<DataType, String> typesAndValues = Map.ofEntries(
658+
Map.entry(DataType.BOOLEAN, "\"true\""),
659+
Map.entry(DataType.LONG, "-1234567890234567"),
660+
Map.entry(DataType.INTEGER, "123"),
661+
Map.entry(DataType.UNSIGNED_LONG, "1234567890234567"),
662+
Map.entry(DataType.DOUBLE, "12.4"),
663+
Map.entry(DataType.KEYWORD, "\"keyword\""),
664+
Map.entry(DataType.TEXT, "\"some text\""),
665+
Map.entry(DataType.DATE_NANOS, "\"2015-01-01T12:10:30.123456789Z\""),
666+
Map.entry(DataType.DATETIME, "\"2015-01-01T12:10:30Z\""),
667+
Map.entry(DataType.IP, "\"192.168.30.1\""),
668+
Map.entry(DataType.VERSION, "\"8.19.0\""),
669+
Map.entry(DataType.GEO_POINT, "[-71.34, 41.12]"),
670+
Map.entry(DataType.GEO_SHAPE, """
671+
{
672+
"type": "Point",
673+
"coordinates": [-77.03653, 38.897676]
674+
}
675+
"""),
676+
Map.entry(DataType.AGGREGATE_METRIC_DOUBLE, """
677+
{
678+
"max": 14983.1
679+
}
680+
""")
681+
);
682+
Set<DataType> shouldBeSupported = Stream.of(DataType.values()).filter(DataType::isRepresentable).collect(Collectors.toSet());
683+
shouldBeSupported.remove(DataType.CARTESIAN_POINT);
684+
shouldBeSupported.remove(DataType.CARTESIAN_SHAPE);
685+
shouldBeSupported.remove(DataType.NULL);
686+
shouldBeSupported.remove(DataType.DOC_DATA_TYPE);
687+
shouldBeSupported.remove(DataType.TSID_DATA_TYPE);
688+
for (DataType type : shouldBeSupported) {
689+
assertTrue(typesAndValues.containsKey(type));
690+
}
691+
assertThat(typesAndValues.size(), equalTo(shouldBeSupported.size()));
692+
693+
for (DataType type : typesAndValues.keySet()) {
694+
String additionalProperties = "";
695+
if (type == DataType.AGGREGATE_METRIC_DOUBLE) {
696+
additionalProperties += """
697+
,
698+
"metrics": ["max"],
699+
"default_metric": "max"
700+
""";
701+
}
702+
createIndex("index-" + type.esType(), null, """
703+
"properties": {
704+
"my_field": {
705+
"type": "%s" %s
706+
}
707+
}
708+
""".formatted(type.esType(), additionalProperties));
709+
Request doc = new Request("PUT", "index-" + type.esType() + "/_doc/1");
710+
doc.setJsonEntity("{\"my_field\": " + typesAndValues.get(type) + "}");
711+
client().performRequest(doc);
712+
}
713+
714+
List<DataType> listOfTypes = new ArrayList<>(typesAndValues.keySet());
715+
listOfTypes.sort(Comparator.comparing(DataType::typeName));
716+
717+
for (int i = 0; i < listOfTypes.size(); i++) {
718+
for (int j = i + 1; j < listOfTypes.size(); j++) {
719+
String query = """
720+
{
721+
"query": "FROM index-%s,index-%s | LIMIT 100 | KEEP my_field"
722+
}
723+
""".formatted(listOfTypes.get(i).esType(), listOfTypes.get(j).esType());
724+
Request request = new Request("POST", "/_query");
725+
request.setJsonEntity(query);
726+
Response resp = client().performRequest(request);
727+
Map<String, Object> results = entityAsMap(resp);
728+
List<?> columns = (List<?>) results.get("columns");
729+
DataType suggestedCast = DataType.suggestedCast(Set.of(listOfTypes.get(i), listOfTypes.get(j)));
730+
assertThat(
731+
columns,
732+
equalTo(
733+
List.of(
734+
Map.ofEntries(
735+
Map.entry("name", "my_field"),
736+
Map.entry("type", "unsupported"),
737+
Map.entry("original_types", List.of(listOfTypes.get(i).typeName(), listOfTypes.get(j).typeName())),
738+
Map.entry("suggested_cast", suggestedCast.typeName())
739+
)
740+
)
741+
)
742+
);
743+
744+
String castedQuery = """
745+
{
746+
"query": "FROM index-%s,index-%s | LIMIT 100 | EVAL my_field = my_field::%s"
747+
}
748+
""".formatted(
749+
listOfTypes.get(i).esType(),
750+
listOfTypes.get(j).esType(),
751+
suggestedCast == DataType.KEYWORD ? "STRING" : suggestedCast.nameUpper()
752+
);
753+
Request castedRequest = new Request("POST", "/_query");
754+
castedRequest.setJsonEntity(castedQuery);
755+
Response castedResponse = client().performRequest(castedRequest);
756+
Map<String, Object> castedResults = entityAsMap(castedResponse);
757+
List<?> castedColumns = (List<?>) castedResults.get("columns");
758+
assertThat(
759+
castedColumns,
760+
equalTo(List.of(Map.ofEntries(Map.entry("name", "my_field"), Map.entry("type", suggestedCast.typeName()))))
761+
);
762+
}
763+
}
764+
for (DataType type : typesAndValues.keySet()) {
765+
deleteIndex("index-" + type.esType());
766+
}
767+
}
768+
651769
static MapMatcher commonProfile() {
652770
return matchesMap() //
653771
.entry("description", any(String.class))

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/FieldExtractorTestCase.java

+27-6
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.elasticsearch.xcontent.XContentType;
3333
import org.elasticsearch.xcontent.json.JsonXContent;
3434
import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
35+
import org.elasticsearch.xpack.esql.core.type.DataType;
3536
import org.elasticsearch.xpack.esql.plugin.QueryPragmas;
3637
import org.hamcrest.Matcher;
3738
import org.junit.Before;
@@ -45,8 +46,10 @@
4546
import java.util.List;
4647
import java.util.Locale;
4748
import java.util.Map;
49+
import java.util.Objects;
4850
import java.util.TreeMap;
4951
import java.util.function.Function;
52+
import java.util.stream.Collectors;
5053

5154
import static org.elasticsearch.test.ListMatcher.matchesList;
5255
import static org.elasticsearch.test.MapMatcher.assertMap;
@@ -690,7 +693,7 @@ public void testByteFieldWithIntSubfieldTooBig() throws IOException {
690693
* </pre>.
691694
*/
692695
public void testIncompatibleTypes() throws IOException {
693-
assumeOriginalTypesReported();
696+
assumeSuggestedCastReported();
694697
keywordTest().createIndex("test1", "f");
695698
index("test1", """
696699
{"f": "f1"}""");
@@ -764,7 +767,7 @@ public void testDistinctInEachIndex() throws IOException {
764767
* </pre>.
765768
*/
766769
public void testMergeKeywordAndObject() throws IOException {
767-
assumeOriginalTypesReported();
770+
assumeSuggestedCastReported();
768771
keywordTest().createIndex("test1", "file");
769772
index("test1", """
770773
{"file": "f1"}""");
@@ -959,7 +962,7 @@ public void testIntegerDocValuesConflict() throws IOException {
959962
* In an ideal world we'd promote the {@code integer} to an {@code long} and just go.
960963
*/
961964
public void testLongIntegerConflict() throws IOException {
962-
assumeOriginalTypesReported();
965+
assumeSuggestedCastReported();
963966
longTest().sourceMode(SourceMode.DEFAULT).createIndex("test1", "emp_no");
964967
index("test1", """
965968
{"emp_no": 1}""");
@@ -1002,7 +1005,7 @@ public void testLongIntegerConflict() throws IOException {
10021005
* In an ideal world we'd promote the {@code short} to an {@code integer} and just go.
10031006
*/
10041007
public void testIntegerShortConflict() throws IOException {
1005-
assumeOriginalTypesReported();
1008+
assumeSuggestedCastReported();
10061009
intTest().sourceMode(SourceMode.DEFAULT).createIndex("test1", "emp_no");
10071010
index("test1", """
10081011
{"emp_no": 1}""");
@@ -1051,7 +1054,7 @@ public void testIntegerShortConflict() throws IOException {
10511054
* </pre>.
10521055
*/
10531056
public void testTypeConflictInObject() throws IOException {
1054-
assumeOriginalTypesReported();
1057+
assumeSuggestedCastReported();
10551058
createIndex("test1", empNoInObject("integer"));
10561059
index("test1", """
10571060
{"foo": {"emp_no": 1}}""");
@@ -1379,6 +1382,12 @@ private void assumeOriginalTypesReported() throws IOException {
13791382
assumeTrue("This test makes sense for versions that report original types", requiredClusterCapability);
13801383
}
13811384

1385+
private void assumeSuggestedCastReported() throws IOException {
1386+
var capsName = EsqlCapabilities.Cap.SUGGESTED_CAST.name().toLowerCase(Locale.ROOT);
1387+
boolean requiredClusterCapability = clusterHasCapability("POST", "/_query", List.of(), List.of(capsName)).orElse(false);
1388+
assumeTrue("This test makes sense for versions that report suggested casts", requiredClusterCapability);
1389+
}
1390+
13821391
private CheckedConsumer<XContentBuilder, IOException> empNoInObject(String empNoType) {
13831392
return index -> {
13841393
index.startObject("properties");
@@ -1715,7 +1724,19 @@ private static Map<String, Object> columnInfo(String name, String type) {
17151724
}
17161725

17171726
private static Map<String, Object> unsupportedColumnInfo(String name, String... originalTypes) {
1718-
return Map.of("name", name, "type", "unsupported", "original_types", List.of(originalTypes));
1727+
DataType suggested = DataType.suggestedCast(
1728+
List.of(originalTypes).stream().map(DataType::fromTypeName).filter(Objects::nonNull).collect(Collectors.toSet())
1729+
);
1730+
if (suggested == null) {
1731+
return Map.of("name", name, "type", "unsupported", "original_types", List.of(originalTypes));
1732+
} else {
1733+
return Map.ofEntries(
1734+
Map.entry("name", name),
1735+
Map.entry("type", "unsupported"),
1736+
Map.entry("original_types", List.of(originalTypes)),
1737+
Map.entry("suggested_cast", suggested.typeName())
1738+
);
1739+
}
17191740
}
17201741

17211742
private static void index(String name, String... docs) throws IOException {

x-pack/plugin/esql/qa/testFixtures/src/main/resources/convert.csv-spec

+20
Original file line numberDiff line numberDiff line change
@@ -481,3 +481,23 @@ x:integer | agg_metric:aggregate_metric_double
481481
[5032, 11111, 40814] | {"min":5032.0,"max":40814.0,"sum":56957.0,"value_count":3}
482482
//end::toAggregateMetricDoubleMv-result[]
483483
;
484+
485+
convertToAggregateMetricDoubleCastingOperatorFromDouble
486+
required_capability: suggested_cast
487+
ROW x = 29384.1256
488+
| EVAL agg_metric = x::aggregate_metric_double
489+
;
490+
491+
x:double | agg_metric:aggregate_metric_double
492+
29384.1256 | {"min":29384.1256,"max":29384.1256,"sum":29384.1256,"value_count":1}
493+
;
494+
495+
convertToAggregateMetricDoubleCastingOperatorFromInt
496+
required_capability: suggested_cast
497+
ROW x = 55555
498+
| EVAL agg_metric = x::aggregate_metric_double
499+
;
500+
501+
x:integer | agg_metric:aggregate_metric_double
502+
55555 | {"min":55555,"max":55555,"sum":55555,"value_count":1}
503+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ColumnInfoImpl.java

+19
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.io.IOException;
2424
import java.util.List;
2525
import java.util.Objects;
26+
import java.util.stream.Collectors;
2627

2728
import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
2829
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
@@ -72,6 +73,9 @@ public static ColumnInfo fromXContent(XContentParser parser) {
7273
@Nullable
7374
private final List<String> originalTypes;
7475

76+
@Nullable
77+
private final DataType suggestedCast;
78+
7579
@ParserConstructor
7680
public ColumnInfoImpl(String name, String type, @Nullable List<String> originalTypes) {
7781
this(name, DataType.fromEs(type), originalTypes);
@@ -81,15 +85,27 @@ public ColumnInfoImpl(String name, DataType type, @Nullable List<String> origina
8185
this.name = name;
8286
this.type = type;
8387
this.originalTypes = originalTypes;
88+
this.suggestedCast = calculateSuggestedCast(this.originalTypes);
89+
}
90+
91+
private static DataType calculateSuggestedCast(List<String> originalTypes) {
92+
if (originalTypes == null) {
93+
return null;
94+
}
95+
return DataType.suggestedCast(
96+
originalTypes.stream().map(DataType::fromTypeName).filter(Objects::nonNull).collect(Collectors.toSet())
97+
);
8498
}
8599

86100
public ColumnInfoImpl(StreamInput in) throws IOException {
87101
this.name = in.readString();
88102
this.type = DataType.fromEs(in.readString());
89103
if (in.getTransportVersion().onOrAfter(TransportVersions.ESQL_REPORT_ORIGINAL_TYPES)) {
90104
this.originalTypes = in.readOptionalStringCollectionAsList();
105+
this.suggestedCast = calculateSuggestedCast(this.originalTypes);
91106
} else {
92107
this.originalTypes = null;
108+
this.suggestedCast = null;
93109
}
94110
}
95111

@@ -110,6 +126,9 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par
110126
if (originalTypes != null) {
111127
builder.field("original_types", originalTypes);
112128
}
129+
if (suggestedCast != null) {
130+
builder.field("suggested_cast", suggestedCast.typeName());
131+
}
113132
builder.endObject();
114133
return builder;
115134
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -1044,7 +1044,12 @@ public enum Cap {
10441044
/**
10451045
* Support for the SAMPLE command
10461046
*/
1047-
SAMPLE(Build.current().isSnapshot());
1047+
SAMPLE(Build.current().isSnapshot()),
1048+
1049+
/**
1050+
* The {@code _query} API now gives a cast recommendation if multiple types are found in certain instances.
1051+
*/
1052+
SUGGESTED_CAST;
10481053

10491054
private final boolean enabled;
10501055

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java

+3
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.elasticsearch.xpack.esql.core.util.NumericUtils;
3737
import org.elasticsearch.xpack.esql.core.util.StringUtils;
3838
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
39+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToAggregateMetricDouble;
3940
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToBoolean;
4041
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianPoint;
4142
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianShape;
@@ -70,6 +71,7 @@
7071
import java.util.function.Function;
7172

7273
import static java.util.Map.entry;
74+
import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE;
7375
import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN;
7476
import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT;
7577
import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE;
@@ -112,6 +114,7 @@ public class EsqlDataTypeConverter {
112114
public static final DateFormatter HOUR_MINUTE_SECOND = DateFormatter.forPattern("strict_hour_minute_second_fraction");
113115

114116
private static final Map<DataType, BiFunction<Source, Expression, AbstractConvertFunction>> TYPE_TO_CONVERTER_FUNCTION = Map.ofEntries(
117+
entry(AGGREGATE_METRIC_DOUBLE, ToAggregateMetricDouble::new),
115118
entry(BOOLEAN, ToBoolean::new),
116119
entry(CARTESIAN_POINT, ToCartesianPoint::new),
117120
entry(CARTESIAN_SHAPE, ToCartesianShape::new),

0 commit comments

Comments
 (0)