Search in sources :

Example 6 with StructField

use of org.apache.carbondata.core.metadata.datatype.StructField in project carbondata by apache.

the class CarbonTable method buildFromDataFile.

public static CarbonTable buildFromDataFile(String tableName, String tablePath, String filePath) throws IOException {
    CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(filePath);
    FileHeader fileHeader = carbonHeaderReader.readHeader();
    TableSchemaBuilder builder = TableSchema.builder();
    ThriftWrapperSchemaConverterImpl schemaConverter = new ThriftWrapperSchemaConverterImpl();
    for (org.apache.carbondata.format.ColumnSchema column : fileHeader.getColumn_schema()) {
        ColumnSchema columnSchema = schemaConverter.fromExternalToWrapperColumnSchema(column);
        builder.addColumn(new StructField(columnSchema.getColumnName(), columnSchema.getDataType()), false);
    }
    TableSchema tableSchema = builder.tableName(tableName).build();
    TableInfo tableInfo = new TableInfo();
    tableInfo.setFactTable(tableSchema);
    tableInfo.setTablePath(tablePath);
    tableInfo.setDatabaseName("default");
    tableInfo.setTableUniqueName(CarbonTable.buildUniqueName("default", tableSchema.getTableName()));
    return buildFromTableInfo(tableInfo);
}
Also used : CarbonHeaderReader(org.apache.carbondata.core.reader.CarbonHeaderReader) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ThriftWrapperSchemaConverterImpl(org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) FileHeader(org.apache.carbondata.format.FileHeader)

Example 7 with StructField

use of org.apache.carbondata.core.metadata.datatype.StructField in project carbondata by apache.

the class PrestoCarbonVectorizedRecordReader method initBatch.

/**
 * Returns the ColumnarBatch object that will be used for all rows returned by this reader.
 * This object is reused. Calling this enables the vectorized reader. This should be called
 * before any calls to nextKeyValue/nextBatch.
 */
private void initBatch() {
    List<ProjectionDimension> queryDimension = queryModel.getProjectionDimensions();
    List<ProjectionMeasure> queryMeasures = queryModel.getProjectionMeasures();
    StructField[] fields = new StructField[queryDimension.size() + queryMeasures.size()];
    for (int i = 0; i < queryDimension.size(); i++) {
        ProjectionDimension dim = queryDimension.get(i);
        if (dim.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
            DirectDictionaryGenerator generator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dim.getDimension().getDataType());
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), generator.getReturnType());
        } else if (!dim.getDimension().hasEncoding(Encoding.DICTIONARY)) {
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), dim.getDimension().getDataType());
        } else if (dim.getDimension().isComplex()) {
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), dim.getDimension().getDataType());
        } else {
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), DataTypes.INT);
        }
    }
    for (ProjectionMeasure msr : queryMeasures) {
        DataType dataType = msr.getMeasure().getDataType();
        if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.SHORT || dataType == DataTypes.INT || dataType == DataTypes.LONG) {
            fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), msr.getMeasure().getDataType());
        } else if (DataTypes.isDecimal(dataType)) {
            fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), msr.getMeasure().getDataType());
        } else {
            fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), DataTypes.DOUBLE);
        }
    }
    columnarBatch = CarbonVectorBatch.allocate(fields);
    CarbonColumnVector[] vectors = new CarbonColumnVector[fields.length];
    boolean[] filteredRows = new boolean[columnarBatch.capacity()];
    for (int i = 0; i < fields.length; i++) {
        vectors[i] = new CarbonColumnVectorWrapper(columnarBatch.column(i), filteredRows);
    }
    carbonColumnarBatch = new CarbonColumnarBatch(vectors, columnarBatch.capacity(), filteredRows);
}
Also used : CarbonColumnarBatch(org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch) CarbonColumnVector(org.apache.carbondata.core.scan.result.vector.CarbonColumnVector) ProjectionDimension(org.apache.carbondata.core.scan.model.ProjectionDimension) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ProjectionMeasure(org.apache.carbondata.core.scan.model.ProjectionMeasure) DataType(org.apache.carbondata.core.metadata.datatype.DataType) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)

Aggregations

StructField (org.apache.carbondata.core.metadata.datatype.StructField)7 DataType (org.apache.carbondata.core.metadata.datatype.DataType)2 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)2 Test (org.junit.Test)2 ArrayList (java.util.ArrayList)1 DirectDictionaryGenerator (org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)1 ThriftWrapperSchemaConverterImpl (org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl)1 StructType (org.apache.carbondata.core.metadata.datatype.StructType)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1 TableSchema (org.apache.carbondata.core.metadata.schema.table.TableSchema)1 TableSchemaBuilder (org.apache.carbondata.core.metadata.schema.table.TableSchemaBuilder)1 CarbonHeaderReader (org.apache.carbondata.core.reader.CarbonHeaderReader)1 ProjectionDimension (org.apache.carbondata.core.scan.model.ProjectionDimension)1 ProjectionMeasure (org.apache.carbondata.core.scan.model.ProjectionMeasure)1 CarbonColumnVector (org.apache.carbondata.core.scan.result.vector.CarbonColumnVector)1 CarbonColumnarBatch (org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch)1 FileHeader (org.apache.carbondata.format.FileHeader)1 ArrayType (org.apache.spark.sql.types.ArrayType)1 BooleanType (org.apache.spark.sql.types.BooleanType)1 DateType (org.apache.spark.sql.types.DateType)1