use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.
the class DataTypeUtil method convertDataToBytesBasedOnDataType.
/**
* Below method will be used to convert the data into byte[]
*
* @param data
* @param columnSchema
* @return actual data in byte[]
*/
public static byte[] convertDataToBytesBasedOnDataType(String data, ColumnSchema columnSchema) {
if (null == data) {
return null;
} else if (CarbonCommonConstants.MEMBER_DEFAULT_VAL.equals(data)) {
LOGGER.error("Default value should not be carbon specific null value : " + data);
return null;
}
try {
long parsedIntVal = 0;
DataType dataType = columnSchema.getDataType();
if (dataType == DataTypes.INT) {
parsedIntVal = (long) Integer.parseInt(data);
return String.valueOf(parsedIntVal).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
} else if (dataType == DataTypes.SHORT) {
parsedIntVal = (long) Short.parseShort(data);
return String.valueOf(parsedIntVal).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
} else if (dataType == DataTypes.DOUBLE) {
return String.valueOf(Double.parseDouble(data)).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
} else if (dataType == DataTypes.LONG) {
return String.valueOf(Long.parseLong(data)).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
} else if (dataType == DataTypes.DATE) {
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(columnSchema.getDataType());
int value = directDictionaryGenerator.generateDirectSurrogateKey(data);
return String.valueOf(value).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
} else if (dataType == DataTypes.TIMESTAMP) {
if (columnSchema.hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator directDictionaryGenerator1 = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(columnSchema.getDataType());
int value1 = directDictionaryGenerator1.generateDirectSurrogateKey(data);
return String.valueOf(value1).getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
} else {
try {
Date dateToStr = timeStampformatter.get().parse(data);
return ByteUtil.toBytes(dateToStr.getTime());
} catch (ParseException e) {
LOGGER.error("Cannot convert value to Time/Long type value. Value is considered as null" + e.getMessage());
return null;
}
}
} else if (DataTypes.isDecimal(dataType)) {
String parsedValue = parseStringToBigDecimal(data, columnSchema);
if (null == parsedValue) {
return null;
}
java.math.BigDecimal javaDecVal = new java.math.BigDecimal(parsedValue);
return bigDecimalToByte(javaDecVal);
} else {
return getDataTypeConverter().convertFromStringToByte(data);
}
} catch (NumberFormatException ex) {
LOGGER.error("Problem while converting data type" + data);
return null;
}
}
use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.
the class RowLevelFilterExecuterImpl method getFilterActualValueFromDirectDictionaryValue.
/**
* method will read the actual data from the direct dictionary generator
* by passing direct dictionary value.
*
* @param dimColumnEvaluatorInfo
* @param dictionaryValue
* @return
*/
private Object getFilterActualValueFromDirectDictionaryValue(DimColumnResolvedFilterInfo dimColumnEvaluatorInfo, int dictionaryValue) {
Object memberString = null;
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dimColumnEvaluatorInfo.getDimension().getDataType());
if (null != directDictionaryGenerator) {
memberString = directDictionaryGenerator.getValueFromSurrogate(dictionaryValue);
}
return memberString;
}
use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.
the class RowLevelRangeLessThanEqualFilterExecuterImpl method getFilteredIndexes.
private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows) {
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dimColEvaluatorInfoList.get(0).getDimension().getDataType());
int key = directDictionaryGenerator.generateDirectSurrogateKey(null) + 1;
CarbonDimension currentBlockDimension = segmentProperties.getDimensions().get(dimensionBlocksIndex[0]);
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension, this.segmentProperties.getSortColumnsGenerator());
}
if (dimensionColumnDataChunk.isExplicitSorted()) {
return setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows, defaultValue);
}
return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows, defaultValue);
}
use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.
the class RowLevelRangeLessThanFiterExecuterImpl method getFilteredIndexes.
private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows) {
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dimColEvaluatorInfoList.get(0).getDimension().getDataType());
int key = directDictionaryGenerator.generateDirectSurrogateKey(null) + 1;
CarbonDimension currentBlockDimension = segmentProperties.getDimensions().get(dimensionBlocksIndex[0]);
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension, this.segmentProperties.getSortColumnsGenerator());
}
if (dimensionColumnDataChunk.isExplicitSorted()) {
return setFilterdIndexToBitSetWithColumnIndex(dimensionColumnDataChunk, numerOfRows, defaultValue);
}
return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows, defaultValue);
}
use of org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator in project carbondata by apache.
the class RangeValueFilterExecuterImpl method setFilterdIndexToBitSet.
/**
* Method will scan the block and finds the range start index from which all
* members will be considered for applying range filters. this method will
* be called if the column is sorted default so column index
* mapping will be present for accesaing the members from the block.
*
* @param dimensionColumnPage
* @param numerOfRows
* @return BitSet.
*/
private BitSet setFilterdIndexToBitSet(DimensionColumnPage dimensionColumnPage, int numerOfRows) {
BitSet bitSet = new BitSet(numerOfRows);
// if (dimensionColumnPage instanceof FixedLengthDimensionColumnPage) {
byte[][] filterValues = this.filterRangesValues;
if (dimensionColumnPage.isExplicitSorted()) {
int start = 0;
int startMin = 0;
int endMax = 0;
int startIndex = 0;
// Max value.
if (!startBlockMinIsDefaultStart) {
start = CarbonUtil.getFirstIndexUsingBinarySearch(dimensionColumnPage, startIndex, numerOfRows - 1, filterValues[0], greaterThanExp);
if (greaterThanExp && start >= 0) {
start = CarbonUtil.nextGreaterValueToTarget(start, dimensionColumnPage, filterValues[0], numerOfRows);
}
if (start < 0) {
start = -(start + 1);
if (start == numerOfRows) {
start = start - 1;
}
// index the bitset will be considered for filtering process.
if ((ByteUtil.compare(filterValues[0], dimensionColumnPage.getChunkData(start))) > 0) {
start = start + 1;
}
}
startMin = start;
} else {
startMin = startIndex;
}
if (!endBlockMaxisDefaultEnd) {
start = CarbonUtil.getFirstIndexUsingBinarySearch(dimensionColumnPage, startIndex, numerOfRows - 1, filterValues[1], lessThanEqualExp);
if (lessThanExp && start >= 0) {
start = CarbonUtil.nextLesserValueToTarget(start, dimensionColumnPage, filterValues[1]);
}
if (start < 0) {
start = -(start + 1);
if (start == numerOfRows) {
start = start - 1;
}
// value of the searched key. So move to the previous one.
if ((ByteUtil.compare(filterValues[1], dimensionColumnPage.getChunkData(start)) < 0)) {
start = start - 1;
}
}
endMax = start;
} else {
endMax = numerOfRows - 1;
}
for (int j = startMin; j <= endMax; j++) {
bitSet.set(j);
}
// matching row.
if (dimensionColumnPage.isNoDicitionaryColumn()) {
updateForNoDictionaryColumn(startMin, endMax, dimensionColumnPage, bitSet);
}
} else {
byte[] defaultValue = null;
if (dimColEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dimColEvaluatorInfo.getDimension().getDataType());
int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
CarbonDimension currentBlockDimension = segmentProperties.getDimensions().get(dimensionChunkIndex);
if (currentBlockDimension.isSortColumn()) {
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension, this.segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toBytes(key);
}
} else {
if (dimColEvaluatorInfo.getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
} else {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
}
// evaluate result for lower range value first and then perform and operation in the
// upper range value in order to compute the final result
bitSet = evaluateGreaterThanFilterForUnsortedColumn(dimensionColumnPage, filterValues[0], numerOfRows);
BitSet upperRangeBitSet = evaluateLessThanFilterForUnsortedColumn(dimensionColumnPage, filterValues[1], numerOfRows);
bitSet.and(upperRangeBitSet);
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
}
Aggregations