Search in sources :

Example 6 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class TestCastFunctions method testCastBigInt.

//private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestSimpleFunctions.class);
@Test
public // cast to bigint.
void testCastBigInt(@Injectable final DrillbitContext bitContext, @Injectable UserClientConnection connection) throws Throwable {
    mockDrillbitContext(bitContext);
    final PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(CONFIG);
    final PhysicalPlan plan = reader.readPhysicalPlan(Files.toString(FileUtils.getResourceAsFile("/functions/cast/testCastBigInt.json"), Charsets.UTF_8));
    final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(CONFIG);
    final FragmentContext context = new FragmentContext(bitContext, PlanFragment.getDefaultInstance(), connection, registry);
    final SimpleRootExec exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
    while (exec.next()) {
        final BigIntVector c0 = exec.getValueVectorById(new SchemaPath("varchar_cast", ExpressionPosition.UNKNOWN), BigIntVector.class);
        final BigIntVector.Accessor a0 = c0.getAccessor();
        int count = 0;
        for (int i = 0; i < c0.getAccessor().getValueCount(); i++) {
            BigIntHolder holder0 = new BigIntHolder();
            a0.get(i, holder0);
            assertEquals(1256, holder0.value);
            ++count;
        }
        assertEquals(5, count);
    }
    exec.close();
    context.close();
    if (context.getFailureCause() != null) {
        throw context.getFailureCause();
    }
    assertTrue(!context.isFailed());
}
Also used : PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) BigIntHolder(org.apache.drill.exec.expr.holders.BigIntHolder) FragmentContext(org.apache.drill.exec.ops.FragmentContext) SchemaPath(org.apache.drill.common.expression.SchemaPath) PhysicalPlanReader(org.apache.drill.exec.planner.PhysicalPlanReader) FragmentRoot(org.apache.drill.exec.physical.base.FragmentRoot) FunctionImplementationRegistry(org.apache.drill.exec.expr.fn.FunctionImplementationRegistry) BigIntVector(org.apache.drill.exec.vector.BigIntVector) Test(org.junit.Test)

Example 7 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class TestCastFunctions method testCastInt.

@Test
public //cast to int
void testCastInt(@Injectable final DrillbitContext bitContext, @Injectable UserClientConnection connection) throws Throwable {
    mockDrillbitContext(bitContext);
    final PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(CONFIG);
    final PhysicalPlan plan = reader.readPhysicalPlan(Files.toString(FileUtils.getResourceAsFile("/functions/cast/testCastInt.json"), Charsets.UTF_8));
    final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(CONFIG);
    final FragmentContext context = new FragmentContext(bitContext, PlanFragment.getDefaultInstance(), connection, registry);
    final SimpleRootExec exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
    while (exec.next()) {
        final IntVector c0 = exec.getValueVectorById(new SchemaPath("varchar_cast", ExpressionPosition.UNKNOWN), IntVector.class);
        final IntVector.Accessor a0 = c0.getAccessor();
        int count = 0;
        for (int i = 0; i < c0.getAccessor().getValueCount(); i++) {
            final IntHolder holder0 = new IntHolder();
            a0.get(i, holder0);
            assertEquals(1256, holder0.value);
            ++count;
        }
        assertEquals(5, count);
    }
    exec.close();
    context.close();
    if (context.getFailureCause() != null) {
        throw context.getFailureCause();
    }
    assertTrue(!context.isFailed());
}
Also used : PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) FragmentContext(org.apache.drill.exec.ops.FragmentContext) BigIntVector(org.apache.drill.exec.vector.BigIntVector) IntVector(org.apache.drill.exec.vector.IntVector) SchemaPath(org.apache.drill.common.expression.SchemaPath) PhysicalPlanReader(org.apache.drill.exec.planner.PhysicalPlanReader) IntHolder(org.apache.drill.exec.expr.holders.IntHolder) BigIntHolder(org.apache.drill.exec.expr.holders.BigIntHolder) FragmentRoot(org.apache.drill.exec.physical.base.FragmentRoot) FunctionImplementationRegistry(org.apache.drill.exec.expr.fn.FunctionImplementationRegistry) Test(org.junit.Test)

Example 8 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class TestCastFunctions method testCastNested.

@Test
public //nested: cast is nested in another cast, or another function.
void testCastNested(@Injectable final DrillbitContext bitContext, @Injectable UserClientConnection connection) throws Throwable {
    mockDrillbitContext(bitContext);
    final PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(CONFIG);
    final PhysicalPlan plan = reader.readPhysicalPlan(Files.toString(FileUtils.getResourceAsFile("/functions/cast/testCastNested.json"), Charsets.UTF_8));
    final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(CONFIG);
    final FragmentContext context = new FragmentContext(bitContext, PlanFragment.getDefaultInstance(), connection, registry);
    final SimpleRootExec exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
    while (exec.next()) {
        final IntVector c0 = exec.getValueVectorById(new SchemaPath("add_cast", ExpressionPosition.UNKNOWN), IntVector.class);
        final IntVector.Accessor a0 = c0.getAccessor();
        int count = 0;
        for (int i = 0; i < c0.getAccessor().getValueCount(); i++) {
            final IntHolder holder0 = new IntHolder();
            a0.get(i, holder0);
            assertEquals(300, holder0.value);
            ++count;
        }
        assertEquals(5, count);
    }
    exec.close();
    context.close();
    if (context.getFailureCause() != null) {
        throw context.getFailureCause();
    }
    assertTrue(!context.isFailed());
}
Also used : PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) FragmentContext(org.apache.drill.exec.ops.FragmentContext) BigIntVector(org.apache.drill.exec.vector.BigIntVector) IntVector(org.apache.drill.exec.vector.IntVector) SchemaPath(org.apache.drill.common.expression.SchemaPath) PhysicalPlanReader(org.apache.drill.exec.planner.PhysicalPlanReader) IntHolder(org.apache.drill.exec.expr.holders.IntHolder) BigIntHolder(org.apache.drill.exec.expr.holders.BigIntHolder) FragmentRoot(org.apache.drill.exec.physical.base.FragmentRoot) FunctionImplementationRegistry(org.apache.drill.exec.expr.fn.FunctionImplementationRegistry) Test(org.junit.Test)

Example 9 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class TestCastFunctions method testCastVarChar.

@Test
public //cast to varchar(length)
void testCastVarChar(@Injectable final DrillbitContext bitContext, @Injectable UserClientConnection connection) throws Throwable {
    mockDrillbitContext(bitContext);
    final PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(CONFIG);
    final PhysicalPlan plan = reader.readPhysicalPlan(Files.toString(FileUtils.getResourceAsFile("/functions/cast/testCastVarChar.json"), Charsets.UTF_8));
    final FunctionImplementationRegistry registry = new FunctionImplementationRegistry(CONFIG);
    final FragmentContext context = new FragmentContext(bitContext, PlanFragment.getDefaultInstance(), connection, registry);
    final SimpleRootExec exec = new SimpleRootExec(ImplCreator.getExec(context, (FragmentRoot) plan.getSortedOperators(false).iterator().next()));
    while (exec.next()) {
        final VarCharVector c0 = exec.getValueVectorById(new SchemaPath("int_lit_cast", ExpressionPosition.UNKNOWN), VarCharVector.class);
        final VarCharVector.Accessor a0 = c0.getAccessor();
        int count = 0;
        for (int i = 0; i < c0.getAccessor().getValueCount(); i++) {
            final VarCharHolder holder0 = new VarCharHolder();
            a0.get(i, holder0);
            assertEquals("123", StringFunctionHelpers.toStringFromUTF8(holder0.start, holder0.end, holder0.buffer));
            ++count;
        }
        assertEquals(5, count);
    }
    exec.close();
    context.close();
    if (context.getFailureCause() != null) {
        throw context.getFailureCause();
    }
    assertTrue(!context.isFailed());
}
Also used : PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) FragmentContext(org.apache.drill.exec.ops.FragmentContext) SchemaPath(org.apache.drill.common.expression.SchemaPath) PhysicalPlanReader(org.apache.drill.exec.planner.PhysicalPlanReader) VarCharVector(org.apache.drill.exec.vector.VarCharVector) FragmentRoot(org.apache.drill.exec.physical.base.FragmentRoot) VarCharHolder(org.apache.drill.exec.expr.holders.VarCharHolder) FunctionImplementationRegistry(org.apache.drill.exec.expr.fn.FunctionImplementationRegistry) Test(org.junit.Test)

Example 10 with SchemaPath

use of org.apache.drill.common.expression.SchemaPath in project drill by apache.

the class HiveAbstractReader method init.

private void init() throws ExecutionSetupException {
    final JobConf job = new JobConf(hiveConf);
    // Get the configured default val
    defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);
    Properties tableProperties;
    try {
        tableProperties = HiveUtilities.getTableMetadata(table);
        final Properties partitionProperties = (partition == null) ? tableProperties : HiveUtilities.getPartitionMetadata(partition, table);
        HiveUtilities.addConfToJob(job, partitionProperties);
        final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties);
        final StructObjectInspector tableOI = getStructOI(tableSerDe);
        if (partition != null) {
            partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties);
            partitionOI = getStructOI(partitionSerDe);
            finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI);
            partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI);
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table));
        } else {
            // For non-partitioned tables, there is no need to create converter as there are no schema changes expected.
            partitionSerDe = tableSerDe;
            partitionOI = tableOI;
            partTblObjectInspectorConverter = null;
            finalOI = tableOI;
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table));
        }
        if (logger.isTraceEnabled()) {
            for (StructField field : finalOI.getAllStructFieldRefs()) {
                logger.trace("field in finalOI: {}", field.getClass().getName());
            }
            logger.trace("partitionSerDe class is {} {}", partitionSerDe.getClass().getName());
        }
        // Get list of partition column names
        final List<String> partitionNames = Lists.newArrayList();
        for (FieldSchema field : table.getPartitionKeys()) {
            partitionNames.add(field.getName());
        }
        // We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore
        // may not contain the schema, instead it is derived from other sources such as table properties or external file.
        // SerDe object knows how to get the schema with all the config and table properties passed in initialization.
        // ObjectInspector created from the SerDe object has the schema.
        final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI);
        final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames();
        // Select list of columns for project pushdown into Hive SerDe readers.
        final List<Integer> columnIds = Lists.newArrayList();
        if (isStarQuery()) {
            selectedColumnNames = tableColumnNames;
            for (int i = 0; i < selectedColumnNames.size(); i++) {
                columnIds.add(i);
            }
            selectedPartitionNames = partitionNames;
        } else {
            selectedColumnNames = Lists.newArrayList();
            for (SchemaPath field : getColumns()) {
                String columnName = field.getRootSegment().getPath();
                if (partitionNames.contains(columnName)) {
                    selectedPartitionNames.add(columnName);
                } else {
                    columnIds.add(tableColumnNames.indexOf(columnName));
                    selectedColumnNames.add(columnName);
                }
            }
        }
        ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames);
        for (String columnName : selectedColumnNames) {
            StructField fieldRef = finalOI.getStructFieldRef(columnName);
            selectedStructFieldRefs.add(fieldRef);
            ObjectInspector fieldOI = fieldRef.getFieldObjectInspector();
            TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName());
            selectedColumnObjInspectors.add(fieldOI);
            selectedColumnTypes.add(typeInfo);
            selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext));
        }
        for (int i = 0; i < selectedColumnNames.size(); ++i) {
            logger.trace("inspector:typeName={}, className={}, TypeInfo: {}, converter:{}", selectedColumnObjInspectors.get(i).getTypeName(), selectedColumnObjInspectors.get(i).getClass().getName(), selectedColumnTypes.get(i).toString(), selectedColumnFieldConverters.get(i).getClass().getName());
        }
        for (int i = 0; i < table.getPartitionKeys().size(); i++) {
            FieldSchema field = table.getPartitionKeys().get(i);
            if (selectedPartitionNames.contains(field.getName())) {
                TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
                selectedPartitionTypes.add(pType);
                if (partition != null) {
                    selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType, partition.getValues().get(i), defaultPartitionValue));
                }
            }
        }
    } catch (Exception e) {
        throw new ExecutionSetupException("Failure while initializing Hive Reader " + this.getClass().getName(), e);
    }
    if (!empty) {
        try {
            reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat().getRecordReader(inputSplit, job, Reporter.NULL);
            logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString());
        } catch (Exception e) {
            throw new ExecutionSetupException("Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
        }
        internalInit(tableProperties, reader);
    }
}
Also used : SerDe(org.apache.hadoop.hive.serde2.SerDe) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) Properties(java.util.Properties) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) ExecutionException(java.util.concurrent.ExecutionException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) SchemaPath(org.apache.drill.common.expression.SchemaPath) JobConf(org.apache.hadoop.mapred.JobConf) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

SchemaPath (org.apache.drill.common.expression.SchemaPath)74 Test (org.junit.Test)23 FunctionImplementationRegistry (org.apache.drill.exec.expr.fn.FunctionImplementationRegistry)17 FragmentContext (org.apache.drill.exec.ops.FragmentContext)16 PhysicalPlan (org.apache.drill.exec.physical.PhysicalPlan)15 FragmentRoot (org.apache.drill.exec.physical.base.FragmentRoot)15 PhysicalPlanReader (org.apache.drill.exec.planner.PhysicalPlanReader)15 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)12 ExecTest (org.apache.drill.exec.ExecTest)12 BigIntVector (org.apache.drill.exec.vector.BigIntVector)9 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)8 IntVector (org.apache.drill.exec.vector.IntVector)8 Path (org.apache.hadoop.fs.Path)8 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)7 ValueVector (org.apache.drill.exec.vector.ValueVector)7 IOException (java.io.IOException)6 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)6 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)6 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)5 FieldReference (org.apache.drill.common.expression.FieldReference)5