Search in sources :

Example 6 with DirectDictionaryGenerator

use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.

the class DataTypeUtil method convertDataToBytesBasedOnDataType.

/**
 * Below method will be used to convert the data into byte[]
 *
 * @param data
 * @param columnSchema
 * @return actual data in byte[]
 */
public static byte[] convertDataToBytesBasedOnDataType(String data, ColumnSchema columnSchema) {
    if (null == data) {
        return null;
    } else if (CarbonCommonConstants.MEMBER_DEFAULT_VAL.equals(data)) {
        LOGGER.error("Default value should not be carbon specific null value : " + data);
        return null;
    }
    try {
        long parsedIntVal = 0;
        DataType dataType = columnSchema.getDataType();
        if (dataType == DataTypes.INT) {
            parsedIntVal = (long) Integer.parseInt(data);
            return String.valueOf(parsedIntVal).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
        } else if (dataType == DataTypes.SHORT) {
            parsedIntVal = (long) Short.parseShort(data);
            return String.valueOf(parsedIntVal).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
        } else if (dataType == DataTypes.DOUBLE) {
            return String.valueOf(Double.parseDouble(data)).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
        } else if (dataType == DataTypes.LONG) {
            return String.valueOf(Long.parseLong(data)).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
        } else if (dataType == DataTypes.DATE) {
            DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(columnSchema.getDataType());
            int value = directDictionaryGenerator.generateDirectSurrogateKey(data);
            return String.valueOf(value).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
        } else if (dataType == DataTypes.TIMESTAMP) {
            if (columnSchema.hasEncoding(Encoding.DIRECT_DICTIONARY)) {
                DirectDictionaryGenerator directDictionaryGenerator1 = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(columnSchema.getDataType());
                int value1 = directDictionaryGenerator1.generateDirectSurrogateKey(data);
                return String.valueOf(value1).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
            } else {
                try {
                    Date dateToStr = timeStampformatter.get().parse(data);
                    return ByteUtil.toBytes(dateToStr.getTime());
                } catch (ParseException e) {
                    LOGGER.error("Cannot convert value to Time/Long type value. Value is considered as null" + e.getMessage());
                    return null;
                }
            }
        } else if (DataTypes.isDecimal(dataType)) {
            String parsedValue = parseStringToBigDecimal(data, columnSchema);
            if (null == parsedValue) {
                return null;
            }
            java.math.BigDecimal javaDecVal = new java.math.BigDecimal(parsedValue);
            return bigDecimalToByte(javaDecVal);
        } else {
            return getDataTypeConverter().convertFromStringToByte(data);
        }
    } catch (NumberFormatException ex) {
        LOGGER.error("Problem while converting data type" + data);
        return null;
    }
}
Also used : BigDecimal(java.math.BigDecimal) DataType(org.apache.carbondata.core.metadata.datatype.DataType) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator) ParseException(java.text.ParseException) Date(java.util.Date) BigDecimal(java.math.BigDecimal)

Example 7 with DirectDictionaryGenerator

use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.

the class RowLevelFilterExecuterImpl method getFilterActualValueFromDirectDictionaryValue.

/**
   * method will read the actual data from the direct dictionary generator
   * by passing direct dictionary value.
   *
   * @param dimColumnEvaluatorInfo
   * @param dictionaryValue
   * @return
   */
private Object getFilterActualValueFromDirectDictionaryValue(DimColumnResolvedFilterInfo dimColumnEvaluatorInfo, int dictionaryValue) {
    Object memberString = null;
    DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dimColumnEvaluatorInfo.getDimension().getDataType());
    if (null != directDictionaryGenerator) {
        memberString = directDictionaryGenerator.getValueFromSurrogate(dictionaryValue);
    }
    return memberString;
}
Also used : DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)

Example 8 with DirectDictionaryGenerator

use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.

the class RowLevelRangeLessThanEqualFilterExecuterImpl method getFilteredIndexes.

private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows) {
    byte[] defaultValue = null;
    if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
        DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dimColEvaluatorInfoList.get(0).getDimension().getDataType());
        int key = directDictionaryGenerator.generateDirectSurrogateKey(null) + 1;
        CarbonDimension currentBlockDimension = segmentProperties.getDimensions().get(dimensionBlocksIndex[0]);
        defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension, this.segmentProperties.getSortColumnsGenerator());
    }
    if (dimensionColumnDataChunk.isExplicitSorted()) {
        return setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows, defaultValue);
    }
    return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows, defaultValue);
}
Also used : DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 9 with DirectDictionaryGenerator

use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.

the class RowLevelRangeLessThanFiterExecuterImpl method getFilteredIndexes.

private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows) {
    byte[] defaultValue = null;
    if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
        DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dimColEvaluatorInfoList.get(0).getDimension().getDataType());
        int key = directDictionaryGenerator.generateDirectSurrogateKey(null) + 1;
        CarbonDimension currentBlockDimension = segmentProperties.getDimensions().get(dimensionBlocksIndex[0]);
        defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension, this.segmentProperties.getSortColumnsGenerator());
    }
    if (dimensionColumnDataChunk.isExplicitSorted()) {
        return setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows, defaultValue);
    }
    return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows, defaultValue);
}
Also used : DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 10 with DirectDictionaryGenerator

use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.

the class RangeValueFilterExecuterImpl method setFilterdIndexToBitSet.

/**
 * Method will scan the block and finds the range start index from which all
 * members will be considered for applying range filters. this method will
 * be called if the column is sorted default so column index
 * mapping will be present for accesaing the members from the block.
 *
 * @param dimensionColumnPage
 * @param numerOfRows
 * @return BitSet.
 */
private BitSet setFilterdIndexToBitSet(DimensionColumnPage dimensionColumnPage, int numerOfRows) {
    BitSet bitSet = new BitSet(numerOfRows);
    // if (dimensionColumnPage instanceof FixedLengthDimensionColumnPage) {
    byte[][] filterValues = this.filterRangesValues;
    if (dimensionColumnPage.isExplicitSorted()) {
        int start = 0;
        int startMin = 0;
        int endMax = 0;
        int startIndex = 0;
        // Max value.
        if (!startBlockMinIsDefaultStart) {
            start = CarbonUtil.getFirstIndexUsingBinarySearch(dimensionColumnPage, startIndex, numerOfRows - 1, filterValues[0], greaterThanExp);
            if (greaterThanExp && start >= 0) {
                start = CarbonUtil.nextGreaterValueToTarget(start, dimensionColumnPage, filterValues[0], numerOfRows);
            }
            if (start < 0) {
                start = -(start + 1);
                if (start == numerOfRows) {
                    start = start - 1;
                }
                // index the bitset will be considered for filtering process.
                if ((ByteUtil.compare(filterValues[0], dimensionColumnPage.getChunkData(start))) > 0) {
                    start = start + 1;
                }
            }
            startMin = start;
        } else {
            startMin = startIndex;
        }
        if (!endBlockMaxisDefaultEnd) {
            start = CarbonUtil.getFirstIndexUsingBinarySearch(dimensionColumnPage, startIndex, numerOfRows - 1, filterValues[1], lessThanEqualExp);
            if (lessThanExp && start >= 0) {
                start = CarbonUtil.nextLesserValueToTarget(start, dimensionColumnPage, filterValues[1]);
            }
            if (start < 0) {
                start = -(start + 1);
                if (start == numerOfRows) {
                    start = start - 1;
                }
                // value of the searched key. So move to the previous one.
                if ((ByteUtil.compare(filterValues[1], dimensionColumnPage.getChunkData(start)) < 0)) {
                    start = start - 1;
                }
            }
            endMax = start;
        } else {
            endMax = numerOfRows - 1;
        }
        for (int j = startMin; j <= endMax; j++) {
            bitSet.set(j);
        }
        // matching row.
        if (dimensionColumnPage.isNoDicitionaryColumn()) {
            updateForNoDictionaryColumn(startMin, endMax, dimensionColumnPage, bitSet);
        }
    } else {
        byte[] defaultValue = null;
        if (dimColEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
            DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dimColEvaluatorInfo.getDimension().getDataType());
            int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
            CarbonDimension currentBlockDimension = segmentProperties.getDimensions().get(dimensionChunkIndex);
            if (currentBlockDimension.isSortColumn()) {
                defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension, this.segmentProperties.getSortColumnsGenerator());
            } else {
                defaultValue = ByteUtil.toBytes(key);
            }
        } else {
            if (dimColEvaluatorInfo.getDimension().getDataType() == DataTypes.STRING) {
                defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
            } else {
                defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
            }
        }
        // evaluate result for lower range value first and then perform and operation in the
        // upper range value in order to compute the final result
        bitSet = evaluateGreaterThanFilterForUnsortedColumn(dimensionColumnPage, filterValues[0], numerOfRows);
        BitSet upperRangeBitSet = evaluateLessThanFilterForUnsortedColumn(dimensionColumnPage, filterValues[1], numerOfRows);
        bitSet.and(upperRangeBitSet);
        FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
    }
    return bitSet;
}
Also used : BitSet(java.util.BitSet) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Aggregations

DirectDictionaryGenerator (org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)15 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)5 DataType (org.apache.carbondata.core.metadata.datatype.DataType)4 BitSet (java.util.BitSet)3 ArrayList (java.util.ArrayList)2 Bits (org.apache.carbondata.core.keygenerator.mdkey.Bits)2 ProjectionDimension (org.apache.carbondata.core.scan.model.ProjectionDimension)2 ProjectionMeasure (org.apache.carbondata.core.scan.model.ProjectionMeasure)2 CarbonColumnVector (org.apache.carbondata.core.scan.result.vector.CarbonColumnVector)2 CarbonColumnarBatch (org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch)2 DecimalType (org.apache.spark.sql.types.DecimalType)2 StructField (org.apache.spark.sql.types.StructField)2 BigDecimal (java.math.BigDecimal)1 ParseException (java.text.ParseException)1 Date (java.util.Date)1 StructField (org.apache.carbondata.core.metadata.datatype.StructField)1 CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)1 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)1 ExpressionResult (org.apache.carbondata.core.scan.expression.ExpressionResult)1 BinaryConditionalExpression (org.apache.carbondata.core.scan.expression.conditional.BinaryConditionalExpression)1