use of org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator in project carbondata by apache.
the class TestLocalDictionaryGenerator method testColumnLocalDictionaryGeneratorForNullValueIsPresentWithoutAddingAnyData.
@Test
public void testColumnLocalDictionaryGeneratorForNullValueIsPresentWithoutAddingAnyData() {
LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
ByteBuffer byteBuffer = ByteBuffer.allocate(2 + CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length);
byteBuffer.putShort((short) CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length);
byteBuffer.put(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
Assert.assertTrue(Arrays.equals(generator.getDictionaryKeyBasedOnValue(1), byteBuffer.array()));
}
use of org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator in project carbondata by apache.
the class ComplexColumnPage method initialize.
/**
* below method will be used to initialize the column page of complex type
* @param columnToDictMap dictionary map
* @param pageSize number of records
*/
public void initialize(Map<String, LocalDictionaryGenerator> columnToDictMap, int pageSize, String columnCompressor) {
DataType dataType;
for (int i = 0; i < this.columnPages.length; i++) {
LocalDictionaryGenerator localDictionaryGenerator = columnToDictMap.get(complexColumnInfoList.get(i).getColumnNames());
TableSpec.ColumnSpec spec = getColumnSpec(i, localDictionaryGenerator);
if (null == localDictionaryGenerator) {
dataType = complexColumnInfoList.get(i).getColumnDataTypes();
if (isColumnPageBasedOnDataType(i)) {
// no dictionary primitive types need adaptive encoding,
// hence store as actual value instead of byte array
this.columnPages[i] = ColumnPage.newPage(new ColumnPageEncoderMeta(spec, dataType, columnCompressor), pageSize);
this.columnPages[i].setStatsCollector(PrimitivePageStatsCollector.newInstance(dataType));
} else {
this.columnPages[i] = ColumnPage.newPage(new ColumnPageEncoderMeta(spec, DataTypes.BYTE_ARRAY, columnCompressor), pageSize);
this.columnPages[i].setStatsCollector(new DummyStatsCollector());
}
} else {
this.columnPages[i] = ColumnPage.newLocalDictPage(new ColumnPageEncoderMeta(spec, DataTypes.BYTE_ARRAY, columnCompressor), pageSize, localDictionaryGenerator, true);
this.columnPages[i].setStatsCollector(new DummyStatsCollector());
}
}
}
use of org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator in project carbondata by apache.
the class TestPageLevelDictionary method testPageLevelDictionaryContainsOnlyUsedDictionaryValuesWhenMultiplePagesUseSameDictionary.
@Test
public void testPageLevelDictionaryContainsOnlyUsedDictionaryValuesWhenMultiplePagesUseSameDictionary() {
LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
String columnName = "column1";
PageLevelDictionary pageLevelDictionary1 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
byte[][] validateData = new byte[10][];
int index = 0;
try {
for (int i = 1; i <= 5; i++) {
byte[] data = ("vishal" + i).getBytes();
ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
byteBuffer.putShort((short) data.length);
byteBuffer.put(data);
validateData[index] = data;
pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
index++;
}
Assert.assertTrue(true);
} catch (DictionaryThresholdReachedException e) {
Assert.assertTrue(false);
}
PageLevelDictionary pageLevelDictionary2 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
try {
for (int i = 1; i <= 5; i++) {
byte[] data = ("vikas" + i).getBytes();
ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
byteBuffer.putShort((short) data.length);
byteBuffer.put(data);
pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
}
Assert.assertTrue(true);
} catch (DictionaryThresholdReachedException e) {
Assert.assertTrue(false);
}
try {
for (int i = 6; i <= 10; i++) {
byte[] data = ("vishal" + i).getBytes();
ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
byteBuffer.putShort((short) data.length);
byteBuffer.put(data);
validateData[index] = data;
pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
index++;
}
Assert.assertTrue(true);
} catch (DictionaryThresholdReachedException e) {
Assert.assertTrue(false);
}
try {
for (int i = 6; i <= 10; i++) {
byte[] data = ("vikas" + i).getBytes();
ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
byteBuffer.putShort((short) data.length);
byteBuffer.put(data);
pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
}
Assert.assertTrue(true);
} catch (DictionaryThresholdReachedException e) {
Assert.assertTrue(false);
}
try {
LocalDictionaryChunk localDictionaryChunkForBlocklet = pageLevelDictionary1.getLocalDictionaryChunkForBlocklet();
List<Encoding> encodings = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoders();
EncodingFactory encodingFactory = DefaultEncodingFactory.getInstance();
List<ByteBuffer> encoderMetas = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoder_meta();
ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, compressorName);
ColumnPage decode = decoder.decode(localDictionaryChunkForBlocklet.getDictionary_data(), 0, localDictionaryChunkForBlocklet.getDictionary_data().length);
BitSet bitSet = BitSet.valueOf(CompressorFactory.getInstance().getCompressor(compressorName).unCompressByte(localDictionaryChunkForBlocklet.getDictionary_values()));
Assert.assertTrue(bitSet.cardinality() == validateData.length);
for (int i = 0; i < validateData.length; i++) {
Assert.assertTrue(Arrays.equals(decode.getBytes(i), validateData[i]));
}
} catch (IOException e) {
Assert.assertTrue(false);
}
}
use of org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator in project carbondata by apache.
the class TestPageLevelDictionary method testPageLevelDictionaryGenerateDataIsGenertingProperDictionaryValues.
@Test
public void testPageLevelDictionaryGenerateDataIsGenertingProperDictionaryValues() {
LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
String columnName = "column1";
PageLevelDictionary pageLevelDictionary = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
try {
for (int i = 1; i <= 1000; i++) {
Assert.assertTrue((i + 1) == pageLevelDictionary.getDictionaryValue(("" + i).getBytes()));
}
Assert.assertTrue(true);
} catch (DictionaryThresholdReachedException e) {
Assert.assertTrue(false);
}
}
use of org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator in project carbondata by apache.
the class TestPageLevelDictionary method testPageLevelDictionaryContainsOnlyUsedDictionaryValues.
@Test
public void testPageLevelDictionaryContainsOnlyUsedDictionaryValues() {
LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
String columnName = "column1";
PageLevelDictionary pageLevelDictionary1 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
byte[][] validateData = new byte[500][];
try {
for (int i = 1; i <= 500; i++) {
byte[] data = ("vishal" + i).getBytes();
ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
byteBuffer.putShort((short) data.length);
byteBuffer.put(data);
validateData[i - 1] = data;
pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
}
Assert.assertTrue(true);
} catch (DictionaryThresholdReachedException e) {
Assert.assertTrue(false);
}
PageLevelDictionary pageLevelDictionary2 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
try {
for (int i = 1; i <= 500; i++) {
byte[] data = ("vikas" + i).getBytes();
ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
byteBuffer.putShort((short) data.length);
byteBuffer.put(data);
pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
}
Assert.assertTrue(true);
} catch (DictionaryThresholdReachedException e) {
Assert.assertTrue(false);
}
try {
LocalDictionaryChunk localDictionaryChunkForBlocklet = pageLevelDictionary1.getLocalDictionaryChunkForBlocklet();
List<Encoding> encodings = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoders();
EncodingFactory encodingFactory = DefaultEncodingFactory.getInstance();
List<ByteBuffer> encoderMetas = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoder_meta();
ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, compressorName);
ColumnPage decode = decoder.decode(localDictionaryChunkForBlocklet.getDictionary_data(), 0, localDictionaryChunkForBlocklet.getDictionary_data().length);
for (int i = 0; i < 500; i++) {
Arrays.equals(decode.getBytes(i), validateData[i]);
}
} catch (IOException e) {
Assert.assertTrue(false);
}
}
Aggregations