Search in sources :

Example 11 with DirectDictionaryGenerator

use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.

the class RowLevelRangeLessThanEqualFilterExecuterImpl method getFilteredIndexes.

private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage, int numerOfRows) {
    byte[] defaultValue = null;
    if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
        DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dimColEvaluatorInfoList.get(0).getDimension().getDataType());
        int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
        CarbonDimension currentBlockDimension = segmentProperties.getDimensions().get(dimensionChunkIndex[0]);
        if (currentBlockDimension.isSortColumn()) {
            defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension, this.segmentProperties.getSortColumnsGenerator());
        } else {
            defaultValue = ByteUtil.toBytes(key);
        }
    } else if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() != DataTypes.STRING) {
        defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
    }
    BitSet bitSet = null;
    if (dimensionColumnPage.isExplicitSorted()) {
        bitSet = setFilterdIndexToBitSetWithColumnIndex(dimensionColumnPage, numerOfRows, defaultValue);
    } else {
        bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows, defaultValue);
    }
    if (dimensionColumnPage.isNoDicitionaryColumn()) {
        FilterUtil.removeNullValues(dimensionColumnPage, bitSet, CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
    }
    return bitSet;
}
Also used : BitSet(java.util.BitSet) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 12 with DirectDictionaryGenerator

use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.

the class RowLevelRangeLessThanFiterExecuterImpl method getFilteredIndexes.

private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage, int numerOfRows) {
    byte[] defaultValue = null;
    if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
        DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dimColEvaluatorInfoList.get(0).getDimension().getDataType());
        int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
        CarbonDimension currentBlockDimension = segmentProperties.getDimensions().get(dimensionChunkIndex[0]);
        if (currentBlockDimension.isSortColumn()) {
            defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension, this.segmentProperties.getSortColumnsGenerator());
        } else {
            defaultValue = ByteUtil.toBytes(key);
        }
    } else if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() != DataTypes.STRING) {
        defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
    }
    BitSet bitSet = null;
    if (dimensionColumnPage.isExplicitSorted()) {
        bitSet = setFilterdIndexToBitSetWithColumnIndex(dimensionColumnPage, numerOfRows, defaultValue);
    } else {
        bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows, defaultValue);
    }
    if (dimensionColumnPage.isNoDicitionaryColumn()) {
        FilterUtil.removeNullValues(dimensionColumnPage, bitSet, CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
    }
    return bitSet;
}
Also used : BitSet(java.util.BitSet) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 13 with DirectDictionaryGenerator

use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.

the class CustomTypeDictionaryVisitor method getDirectDictionaryValKeyMemberForFilter.

protected ColumnFilterInfo getDirectDictionaryValKeyMemberForFilter(ColumnExpression columnExpression, List<String> evaluateResultListFinal, boolean isIncludeFilter, DataType dataType) {
    List<Integer> surrogates = new ArrayList<Integer>(20);
    DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(columnExpression.getDimension().getDataType());
    // Reading the dictionary value direct
    getSurrogateValuesForDictionary(evaluateResultListFinal, surrogates, directDictionaryGenerator, dataType);
    Collections.sort(surrogates);
    ColumnFilterInfo columnFilterInfo = null;
    if (surrogates.size() > 0) {
        columnFilterInfo = new ColumnFilterInfo();
        columnFilterInfo.setIncludeFilter(isIncludeFilter);
        if (!isIncludeFilter) {
            columnFilterInfo.setExcludeFilterList(surrogates);
        } else {
            columnFilterInfo.setFilterList(surrogates);
        }
    }
    return columnFilterInfo;
}
Also used : ArrayList(java.util.ArrayList) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator) ColumnFilterInfo(org.apache.carbondata.core.scan.filter.ColumnFilterInfo)

Example 14 with DirectDictionaryGenerator

use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.

the class PrestoCarbonVectorizedRecordReader method initBatch.

/**
 * Returns the ColumnarBatch object that will be used for all rows returned by this reader.
 * This object is reused. Calling this enables the vectorized reader. This should be called
 * before any calls to nextKeyValue/nextBatch.
 */
private void initBatch() {
    List<ProjectionDimension> queryDimension = queryModel.getProjectionDimensions();
    List<ProjectionMeasure> queryMeasures = queryModel.getProjectionMeasures();
    StructField[] fields = new StructField[queryDimension.size() + queryMeasures.size()];
    for (int i = 0; i < queryDimension.size(); i++) {
        ProjectionDimension dim = queryDimension.get(i);
        if (dim.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
            DirectDictionaryGenerator generator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dim.getDimension().getDataType());
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), generator.getReturnType());
        } else if (!dim.getDimension().hasEncoding(Encoding.DICTIONARY)) {
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), dim.getDimension().getDataType());
        } else if (dim.getDimension().isComplex()) {
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), dim.getDimension().getDataType());
        } else {
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), DataTypes.INT);
        }
    }
    for (ProjectionMeasure msr : queryMeasures) {
        DataType dataType = msr.getMeasure().getDataType();
        if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.SHORT || dataType == DataTypes.INT || dataType == DataTypes.LONG) {
            fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), msr.getMeasure().getDataType());
        } else if (DataTypes.isDecimal(dataType)) {
            fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), msr.getMeasure().getDataType());
        } else {
            fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), DataTypes.DOUBLE);
        }
    }
    columnarBatch = CarbonVectorBatch.allocate(fields);
    CarbonColumnVector[] vectors = new CarbonColumnVector[fields.length];
    boolean[] filteredRows = new boolean[columnarBatch.capacity()];
    for (int i = 0; i < fields.length; i++) {
        vectors[i] = new CarbonColumnVectorWrapper(columnarBatch.column(i), filteredRows);
    }
    carbonColumnarBatch = new CarbonColumnarBatch(vectors, columnarBatch.capacity(), filteredRows);
}
Also used : CarbonColumnarBatch(org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch) CarbonColumnVector(org.apache.carbondata.core.scan.result.vector.CarbonColumnVector) ProjectionDimension(org.apache.carbondata.core.scan.model.ProjectionDimension) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ProjectionMeasure(org.apache.carbondata.core.scan.model.ProjectionMeasure) DataType(org.apache.carbondata.core.metadata.datatype.DataType) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)

Example 15 with DirectDictionaryGenerator

use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.

the class SparkDataTypeConverterImpl method convertCarbonSchemaToSparkSchema.

/**
 * convert from CarbonColumn array to Spark's StructField array
 */
@Override
public Object[] convertCarbonSchemaToSparkSchema(CarbonColumn[] carbonColumns) {
    StructField[] fields = new StructField[carbonColumns.length];
    for (int i = 0; i < carbonColumns.length; i++) {
        CarbonColumn carbonColumn = carbonColumns[i];
        if (carbonColumn.isDimension()) {
            if (carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY)) {
                DirectDictionaryGenerator generator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(carbonColumn.getDataType());
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(generator.getReturnType()), true, null);
            } else if (!carbonColumn.hasEncoding(Encoding.DICTIONARY)) {
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(carbonColumn.getDataType()), true, null);
            } else if (carbonColumn.isComplex()) {
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(carbonColumn.getDataType()), true, null);
            } else {
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(org.apache.carbondata.core.metadata.datatype.DataTypes.INT), true, null);
            }
        } else if (carbonColumn.isMeasure()) {
            DataType dataType = carbonColumn.getDataType();
            if (dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BOOLEAN || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.INT || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.LONG) {
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(dataType), true, null);
            } else if (org.apache.carbondata.core.metadata.datatype.DataTypes.isDecimal(dataType)) {
                CarbonMeasure measure = (CarbonMeasure) carbonColumn;
                fields[i] = new StructField(carbonColumn.getColName(), new DecimalType(measure.getPrecision(), measure.getScale()), true, null);
            } else {
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(org.apache.carbondata.core.metadata.datatype.DataTypes.DOUBLE), true, null);
            }
        }
    }
    return fields;
}
Also used : StructField(org.apache.spark.sql.types.StructField) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) DataType(org.apache.carbondata.core.metadata.datatype.DataType) DecimalType(org.apache.spark.sql.types.DecimalType) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)

Aggregations

DirectDictionaryGenerator (org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)15 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)5 DataType (org.apache.carbondata.core.metadata.datatype.DataType)4 BitSet (java.util.BitSet)3 ArrayList (java.util.ArrayList)2 Bits (org.apache.carbondata.core.keygenerator.mdkey.Bits)2 ProjectionDimension (org.apache.carbondata.core.scan.model.ProjectionDimension)2 ProjectionMeasure (org.apache.carbondata.core.scan.model.ProjectionMeasure)2 CarbonColumnVector (org.apache.carbondata.core.scan.result.vector.CarbonColumnVector)2 CarbonColumnarBatch (org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch)2 DecimalType (org.apache.spark.sql.types.DecimalType)2 StructField (org.apache.spark.sql.types.StructField)2 BigDecimal (java.math.BigDecimal)1 ParseException (java.text.ParseException)1 Date (java.util.Date)1 StructField (org.apache.carbondata.core.metadata.datatype.StructField)1 CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)1 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)1 ExpressionResult (org.apache.carbondata.core.scan.expression.ExpressionResult)1 BinaryConditionalExpression (org.apache.carbondata.core.scan.expression.conditional.BinaryConditionalExpression)1