Search in sources :

Example 61 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class HashJoinSegmentStorageAdapterTest method test_getColumnCapabilities_factToCountryNonexistentFactColumn.

@Test
public void test_getColumnCapabilities_factToCountryNonexistentFactColumn() {
    final ColumnCapabilities capabilities = makeFactToCountrySegment().getColumnCapabilities("nonexistent");
    Assert.assertNull(capabilities);
}
Also used : ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Test(org.junit.Test)

Example 62 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class ExpressionPlannerTest method testScalarOutputMultiValueInput.

@Test
public void testScalarOutputMultiValueInput() {
    ExpressionPlan thePlan = plan("array_to_string(array_append(scalar_string, 'x'), ',')");
    assertArrayInput(thePlan);
    ColumnCapabilities inferred = thePlan.inferColumnCapabilities(ColumnType.STRING);
    Assert.assertNotNull(inferred);
    Assert.assertEquals(ValueType.STRING, inferred.getType());
    Assert.assertTrue(inferred.hasNulls().isTrue());
    Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
    Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
    Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
    Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
    Assert.assertFalse(inferred.hasBitmapIndexes());
    Assert.assertFalse(inferred.hasSpatialIndexes());
    Assert.assertEquals("array_to_string(array_append(\"scalar_string\", 'x'), ',')", thePlan.getAppliedExpression().stringify());
    Assert.assertEquals("array_to_string(array_append(\"scalar_string\", 'x'), ',')", thePlan.getAppliedFoldExpression("__acc").stringify());
    Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
    // what about a multi-valued input
    thePlan = plan("array_to_string(array_append(scalar_string, multi_dictionary_string), ',')");
    assertArrayInput(thePlan);
    Assert.assertEquals("array_to_string(map((\"multi_dictionary_string\") -> array_append(\"scalar_string\", \"multi_dictionary_string\"), \"multi_dictionary_string\"), ',')", thePlan.getAppliedExpression().stringify());
    Assert.assertEquals("array_to_string(fold((\"multi_dictionary_string\", \"scalar_string\") -> array_append(\"scalar_string\", \"multi_dictionary_string\"), \"multi_dictionary_string\", \"scalar_string\"), ',')", thePlan.getAppliedFoldExpression("scalar_string").stringify());
    // why is this null
    Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
}
Also used : ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 63 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class ExpressionPlannerTest method testMultiValueStringDictionaryEncoded.

@Test
public void testMultiValueStringDictionaryEncoded() {
    ExpressionPlan thePlan = plan("concat(multi_dictionary_string, 'x')");
    Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE));
    Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.VECTORIZABLE));
    Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
    ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null);
    Assert.assertNotNull(inferred);
    Assert.assertEquals(ValueType.STRING, inferred.getType());
    Assert.assertTrue(inferred.hasNulls().isMaybeTrue());
    Assert.assertTrue(inferred.isDictionaryEncoded().isTrue());
    Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
    Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
    Assert.assertTrue(inferred.hasMultipleValues().isTrue());
    Assert.assertFalse(inferred.hasBitmapIndexes());
    Assert.assertFalse(inferred.hasSpatialIndexes());
    thePlan = plan("concat(scalar_string, multi_dictionary_string_nonunique)");
    Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED));
    Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.VECTORIZABLE));
    Assert.assertEquals("map((\"multi_dictionary_string_nonunique\") -> concat(\"scalar_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string_nonunique\")", thePlan.getAppliedExpression().stringify());
    Assert.assertEquals("fold((\"multi_dictionary_string_nonunique\", \"scalar_string\") -> concat(\"scalar_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string_nonunique\", \"scalar_string\")", thePlan.getAppliedFoldExpression("scalar_string").stringify());
    Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
    inferred = thePlan.inferColumnCapabilities(null);
    Assert.assertNotNull(inferred);
    Assert.assertEquals(ValueType.STRING, inferred.getType());
    Assert.assertTrue(inferred.hasMultipleValues().isTrue());
    thePlan = plan("concat(multi_dictionary_string, multi_dictionary_string_nonunique)");
    Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED));
    Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.VECTORIZABLE));
    Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
    // whoa
    Assert.assertEquals("cartesian_map((\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\") -> concat(\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string\", \"multi_dictionary_string_nonunique\")", thePlan.getAppliedExpression().stringify());
    // sort of funny, but technically correct
    Assert.assertEquals("cartesian_fold((\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\", \"__acc\") -> concat(\"multi_dictionary_string\", \"multi_dictionary_string_nonunique\"), \"multi_dictionary_string\", \"multi_dictionary_string_nonunique\", \"__acc\")", thePlan.getAppliedFoldExpression("__acc").stringify());
    inferred = thePlan.inferColumnCapabilities(null);
    Assert.assertNotNull(inferred);
    Assert.assertEquals(ValueType.STRING, inferred.getType());
    Assert.assertTrue(inferred.hasMultipleValues().isTrue());
    thePlan = plan("array_append(multi_dictionary_string, 'foo')");
    Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT));
    Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.VECTORIZABLE));
}
Also used : ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 64 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class ExpressionPlannerTest method testUnknown.

@Test
public void testUnknown() {
    // column has no capabilities
    // the vectorize query engine contracts is such that the lack of column capabilities is indicative of a nil column
    // so this is vectorizable
    // for non-vectorized expression processing, this will probably end up using a selector that examines inputs on a
    // row by row basis to determine if the expression needs applied to multi-valued inputs
    ExpressionPlan thePlan = plan("concat(x, 'x')");
    Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.VECTORIZABLE));
    Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.CONSTANT));
    // this expression has no "unapplied bindings", nothing to apply
    Assert.assertEquals("concat(\"x\", 'x')", thePlan.getAppliedExpression().stringify());
    Assert.assertEquals("concat(\"x\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify());
    Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
    ColumnCapabilities inferred = thePlan.inferColumnCapabilities(null);
    Assert.assertNotNull(inferred);
    Assert.assertEquals(ValueType.STRING, inferred.getType());
    Assert.assertTrue(inferred.hasNulls().isTrue());
    Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
    Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
    Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
    Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
    Assert.assertFalse(inferred.hasBitmapIndexes());
    Assert.assertFalse(inferred.hasSpatialIndexes());
    // what if both inputs are unknown, can we know things?
    thePlan = plan("x * y");
    Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.UNKNOWN_INPUTS));
    Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.VECTORIZABLE, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.CONSTANT));
    Assert.assertEquals("(\"x\" * \"y\")", thePlan.getAppliedExpression().stringify());
    Assert.assertEquals("(\"x\" * \"y\")", thePlan.getAppliedFoldExpression("__acc").stringify());
    Assert.assertNull(thePlan.getOutputType());
    Assert.assertNull(thePlan.inferColumnCapabilities(null));
// no we cannot
}
Also used : ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 65 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class ExpressionPlannerTest method testArrayOutput.

@Test
public void testArrayOutput() {
    // its ok to use scalar inputs to array expressions, string columns cant help it if sometimes they are single
    // valued and sometimes they are multi-valued
    ExpressionPlan thePlan = plan("array_append(scalar_string, 'x')");
    assertArrayInAndOut(thePlan);
    // with a string hint, it should look like a multi-valued string
    ColumnCapabilities inferred = thePlan.inferColumnCapabilities(ColumnType.STRING);
    Assert.assertNotNull(inferred);
    Assert.assertEquals(ValueType.STRING, inferred.getType());
    Assert.assertTrue(inferred.hasNulls().isMaybeTrue());
    Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
    Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
    Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
    Assert.assertTrue(inferred.hasMultipleValues().isTrue());
    Assert.assertFalse(inferred.hasBitmapIndexes());
    Assert.assertFalse(inferred.hasSpatialIndexes());
    // with no hint though, let the array free
    inferred = thePlan.inferColumnCapabilities(ColumnType.STRING_ARRAY);
    Assert.assertNotNull(inferred);
    Assert.assertEquals(ColumnType.STRING_ARRAY, inferred.toColumnType());
    Assert.assertTrue(inferred.hasNulls().isMaybeTrue());
    Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
    Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
    Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
    Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
    Assert.assertFalse(inferred.hasBitmapIndexes());
    Assert.assertFalse(inferred.hasSpatialIndexes());
    Assert.assertEquals("array_append(\"scalar_string\", 'x')", thePlan.getAppliedExpression().stringify());
    Assert.assertEquals("array_append(\"scalar_string\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify());
    Assert.assertEquals(ExpressionType.STRING_ARRAY, thePlan.getOutputType());
    // multi-valued are cool too
    thePlan = plan("array_append(multi_dictionary_string, 'x')");
    assertArrayInAndOut(thePlan);
    // what about incomplete inputs with arrays? they are not reported as incomplete because they are treated as arrays
    thePlan = plan("array_append(string_unknown, 'x')");
    assertArrayInAndOut(thePlan);
    Assert.assertEquals(ExpressionType.STRING_ARRAY, thePlan.getOutputType());
    // what about if it is the scalar argument? there it is
    thePlan = plan("array_append(multi_dictionary_string, string_unknown)");
    Assert.assertTrue(thePlan.is(ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT));
    Assert.assertFalse(thePlan.is(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.VECTORIZABLE));
    // incomplete and unknown skip output type since we don't reliably know
    Assert.assertNull(thePlan.getOutputType());
    // array types are cool too
    thePlan = plan("array_append(string_array_1, 'x')");
    assertArrayInAndOut(thePlan);
    thePlan = plan("array_append(string_array_1, 'x')");
    assertArrayInAndOut(thePlan);
}
Also used : ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

ColumnCapabilities (org.apache.druid.segment.column.ColumnCapabilities)170 Test (org.junit.Test)106 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)60 ColumnCapabilitiesImpl (org.apache.druid.segment.column.ColumnCapabilitiesImpl)18 ArrayList (java.util.ArrayList)12 VectorValueSelector (org.apache.druid.segment.vector.VectorValueSelector)12 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)10 ColumnHolder (org.apache.druid.segment.column.ColumnHolder)10 ValueType (org.apache.druid.segment.column.ValueType)9 Nullable (javax.annotation.Nullable)8 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)8 ColumnValueSelector (org.apache.druid.segment.ColumnValueSelector)8 ColumnType (org.apache.druid.segment.column.ColumnType)8 RowSignature (org.apache.druid.segment.column.RowSignature)8 BaseLongVectorValueSelector (org.apache.druid.segment.vector.BaseLongVectorValueSelector)8 File (java.io.File)6 List (java.util.List)6 TreeMap (java.util.TreeMap)6 NullHandlingTest (org.apache.druid.common.config.NullHandlingTest)6 Pair (org.apache.druid.java.util.common.Pair)6