use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class HashJoinSegmentStorageAdapterTest method test_getColumnCapabilities_factToCountryNonexistentFactColumn.
@Test
public void test_getColumnCapabilities_factToCountryNonexistentFactColumn() {
final ColumnCapabilities capabilities = makeFactToCountrySegment().getColumnCapabilities("nonexistent");
Assert.assertNull(capabilities);
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class ExpressionPlannerTest method testScalarOutputMultiValueInput.
@Test
public void testScalarOutputMultiValueInput() {
ExpressionPlan thePlan = plan("array_to_string(array_append(scalar_string, 'x'), ',')");
assertArrayInput(thePlan);
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(ColumnType.STRING);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertTrue(inferred.hasNulls().isTrue());
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
Assert.assertEquals("array_to_string(array_append(\"scalar_string\", 'x'), ',')", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("array_to_string(array_append(\"scalar_string\", 'x'), ',')", thePlan.getAppliedFoldExpression("__acc").stringify());
Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
// what about a multi-valued input
thePlan = plan("array_to_string(array_append(scalar_string, multi_dictionary_string), ',')");
assertArrayInput(thePlan);
Assert.assertEquals("array_to_string(map((\"multi_dictionary_string\") -> array_append(\"scalar_string\", \"multi_dictionary_string\"), \"multi_dictionary_string\"), ',')", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("array_to_string(fold((\"multi_dictionary_string\", \"scalar_string\") -> array_append(\"scalar_string\", \"multi_dictionary_string\"), \"multi_dictionary_string\", \"scalar_string\"), ',')", thePlan.getAppliedFoldExpression("scalar_string").stringify());
// why is this null
Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class ExpressionPlannerTest method testMultiValueStringDictionaryEncoded.
@Test
public void testMultiValueStringDictionaryEncoded() {
ExpressionPlan thePlan = plan("concat(multi_dictionary_string, 'x')");
Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE));
Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.VECTORIZABLE));
Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertTrue(inferred.hasNulls().isMaybeTrue());
Assert.assertTrue(inferred.isDictionaryEncoded().isTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
thePlan = plan("concat(scalar_string, multi_dictionary_string_nonunique)");
Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED));
Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.VECTORIZABLE));
Assert.assertEquals("map((\"multi_dictionary_string_nonunique\") -> concat(\"scalar_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string_nonunique\")", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("fold((\"multi_dictionary_string_nonunique\", \"scalar_string\") -> concat(\"scalar_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string_nonunique\", \"scalar_string\")", thePlan.getAppliedFoldExpression("scalar_string").stringify());
Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
thePlan = plan("concat(multi_dictionary_string, multi_dictionary_string_nonunique)");
Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED));
Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.VECTORIZABLE));
Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
// whoa
Assert.assertEquals("cartesian_map((\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\") -> concat(\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string\", \"multi_dictionary_string_nonunique\")", thePlan.getAppliedExpression().stringify());
// sort of funny, but technically correct
Assert.assertEquals("cartesian_fold((\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\", \"__acc\") -> concat(\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string\", \"multi_dictionary_string_nonunique\", \"__acc\")", thePlan.getAppliedFoldExpression("__acc").stringify());
inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
thePlan = plan("array_append(multi_dictionary_string, 'foo')");
Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT));
Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.VECTORIZABLE));
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class ExpressionPlannerTest method testUnknown.
@Test
public void testUnknown() {
// column has no capabilities
// the vectorize query engine contracts is such that the lack of column capabilities is indicative of a nil column
// so this is vectorizable
// for non-vectorized expression processing, this will probably end up using a selector that examines inputs on a
// row by row basis to determine if the expression needs applied to multi-valued inputs
ExpressionPlan thePlan = plan("concat(x, 'x')");
Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.VECTORIZABLE));
Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.CONSTANT));
// this expression has no "unapplied bindings", nothing to apply
Assert.assertEquals("concat(\"x\", 'x')", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("concat(\"x\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify());
Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertTrue(inferred.hasNulls().isTrue());
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
// what if both inputs are unknown, can we know things?
thePlan = plan("x * y");
Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.UNKNOWN_INPUTS));
Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.VECTORIZABLE, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.CONSTANT));
Assert.assertEquals("(\"x\" * \"y\")", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("(\"x\" * \"y\")", thePlan.getAppliedFoldExpression("__acc").stringify());
Assert.assertNull(thePlan.getOutputType());
Assert.assertNull(thePlan.inferColumnCapabilities(null));
// no we cannot
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class ExpressionPlannerTest method testArrayOutput.
@Test
public void testArrayOutput() {
// its ok to use scalar inputs to array expressions, string columns cant help it if sometimes they are single
// valued and sometimes they are multi-valued
ExpressionPlan thePlan = plan("array_append(scalar_string, 'x')");
assertArrayInAndOut(thePlan);
// with a string hint, it should look like a multi-valued string
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(ColumnType.STRING);
Assert.assertNotNull(inferred);
Assert.assertEquals(ValueType.STRING, inferred.getType());
Assert.assertTrue(inferred.hasNulls().isMaybeTrue());
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
// with no hint though, let the array free
inferred = thePlan.inferColumnCapabilities(ColumnType.STRING_ARRAY);
Assert.assertNotNull(inferred);
Assert.assertEquals(ColumnType.STRING_ARRAY, inferred.toColumnType());
Assert.assertTrue(inferred.hasNulls().isMaybeTrue());
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
Assert.assertEquals("array_append(\"scalar_string\", 'x')", thePlan.getAppliedExpression().stringify());
Assert.assertEquals("array_append(\"scalar_string\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify());
Assert.assertEquals(ExpressionType.STRING_ARRAY, thePlan.getOutputType());
// multi-valued are cool too
thePlan = plan("array_append(multi_dictionary_string, 'x')");
assertArrayInAndOut(thePlan);
// what about incomplete inputs with arrays? they are not reported as incomplete because they are treated as arrays
thePlan = plan("array_append(string_unknown, 'x')");
assertArrayInAndOut(thePlan);
Assert.assertEquals(ExpressionType.STRING_ARRAY, thePlan.getOutputType());
// what about if it is the scalar argument? there it is
thePlan = plan("array_append(multi_dictionary_string, string_unknown)");
Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT));
Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.VECTORIZABLE));
// incomplete and unknown skip output type since we don't reliably know
Assert.assertNull(thePlan.getOutputType());
// array types are cool too
thePlan = plan("array_append(string_array_1, 'x')");
assertArrayInAndOut(thePlan);
thePlan = plan("array_append(string_array_1, 'x')");
assertArrayInAndOut(thePlan);
}
Aggregations