use of org.apache.carbondata.format.Encoding in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV3 method decodeDimensionByMeta.
private ColumnPage decodeDimensionByMeta(DataChunk2 pageMetadata, ByteBuffer pageData, int offset) throws IOException, MemoryException {
List<Encoding> encodings = pageMetadata.getEncoders();
List<ByteBuffer> encoderMetas = pageMetadata.getEncoder_meta();
ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas);
return decoder.decode(pageData.array(), offset, pageMetadata.data_page_length);
}
use of org.apache.carbondata.format.Encoding in project carbondata by apache.
the class DataFileFooterConverterTest method testGetIndexInfo.
@Test
public void testGetIndexInfo() throws Exception {
DataFileFooterConverter dataFileFooterConverter = new DataFileFooterConverter();
final ThriftReader thriftReader = new ThriftReader("file");
List<Encoding> encoders = new ArrayList<>();
encoders.add(Encoding.INVERTED_INDEX);
encoders.add(Encoding.BIT_PACKED);
encoders.add(Encoding.DELTA);
encoders.add(Encoding.DICTIONARY);
encoders.add(Encoding.DIRECT_DICTIONARY);
encoders.add(Encoding.RLE);
ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
final List<ColumnSchema> columnSchemas = new ArrayList<>();
columnSchemas.add(columnSchema);
columnSchemas.add(columnSchema1);
columnSchemas.add(columnSchema2);
columnSchemas.add(columnSchema3);
columnSchemas.add(columnSchema4);
columnSchemas.add(columnSchema5);
columnSchemas.add(columnSchema6);
columnSchemas.add(columnSchema7);
final BlockIndex blockIndex = new BlockIndex();
blockIndex.setBlock_index(new org.apache.carbondata.format.BlockletIndex());
org.apache.carbondata.format.BlockletIndex blockletIndex1 = new org.apache.carbondata.format.BlockletIndex();
BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
blockletBTreeIndex.setStart_key("1".getBytes());
blockletBTreeIndex.setEnd_key("3".getBytes());
blockletIndex1.setB_tree_index(blockletBTreeIndex);
BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
blockletMinMaxIndex.setMax_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 2)));
blockletMinMaxIndex.setMin_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 1)));
blockletIndex1.setMin_max_index(blockletMinMaxIndex);
blockIndex.setBlock_index(blockletIndex1);
List<Integer> column_cardinalities = new ArrayList<>();
column_cardinalities.add(new Integer("1"));
final org.apache.carbondata.format.SegmentInfo segmentInfo1 = new org.apache.carbondata.format.SegmentInfo(3, column_cardinalities);
new MockUp<CarbonIndexFileReader>() {
boolean mockedHasNextStatus = true;
@SuppressWarnings("unused")
@Mock
public boolean hasNext() throws IOException {
boolean temp = mockedHasNextStatus;
mockedHasNextStatus = false;
return temp;
}
@SuppressWarnings("unused")
@Mock
public void openThriftReader(String filePath) throws IOException {
thriftReader.open();
}
@SuppressWarnings("unused")
@Mock
public IndexHeader readIndexHeader() throws IOException {
return new IndexHeader(1, columnSchemas, segmentInfo1);
}
@SuppressWarnings("unused")
@Mock
public BlockIndex readBlockIndexInfo() throws IOException {
return blockIndex;
}
@SuppressWarnings("unused")
@Mock
public void closeThriftReader() {
thriftReader.close();
}
};
new MockUp<IndexHeader>() {
@SuppressWarnings("unused")
@Mock
public List<ColumnSchema> getTable_columns() {
return columnSchemas;
}
};
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream("1".getBytes());
final DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream);
new MockUp<FileFactory>() {
@SuppressWarnings("unused")
@Mock
public DataInputStream getDataInputStream(String path, FileFactory.FileType fileType, int bufferSize) {
return dataInputStream;
}
};
String[] arr = { "a", "b", "c" };
String fileName = "/part-0-0_batchno0-0-1495074251740.carbondata";
TableBlockInfo tableBlockInfo = new TableBlockInfo(fileName, 3, "id", arr, 3, ColumnarFormatVersion.V1, null);
tableBlockInfo.getBlockletInfos().setNoOfBlockLets(3);
List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
tableBlockInfoList.add(tableBlockInfo);
String idxFileName = "0_batchno0-0-1495074251740.carbonindex";
List<DataFileFooter> dataFileFooterList = dataFileFooterConverter.getIndexInfo(idxFileName, tableBlockInfoList);
byte[] exp = dataFileFooterList.get(0).getBlockletIndex().getBtreeIndex().getStartKey();
byte[] res = "1".getBytes();
for (int i = 0; i < exp.length; i++) {
assertEquals(exp[i], res[i]);
}
}
use of org.apache.carbondata.format.Encoding in project carbondata by apache.
the class CarbonMetadataUtilTest method setUp.
@BeforeClass
public static void setUp() {
objMaxArr = new Long[6];
objMaxArr[0] = new Long("111111");
objMaxArr[1] = new Long("121111");
objMaxArr[2] = new Long("131111");
objMaxArr[3] = new Long("141111");
objMaxArr[4] = new Long("151111");
objMaxArr[5] = new Long("161111");
objMinArr = new Long[6];
objMinArr[0] = new Long("119");
objMinArr[1] = new Long("121");
objMinArr[2] = new Long("131");
objMinArr[3] = new Long("141");
objMinArr[4] = new Long("151");
objMinArr[5] = new Long("161");
objDecimal = new int[] { 0, 0, 0, 0, 0, 0 };
columnSchemaList = new ArrayList<>();
List<Encoding> encodingList = new ArrayList<>();
encodingList.add(Encoding.BIT_PACKED);
encodingList.add(Encoding.DELTA);
encodingList.add(Encoding.INVERTED_INDEX);
encodingList.add(Encoding.DIRECT_DICTIONARY);
byteArr = "412111".getBytes();
byte[] byteArr1 = "321".getBytes();
byte[] byteArr2 = "356".getBytes();
byteBufferList = new ArrayList<>();
ByteBuffer bb = ByteBuffer.allocate(byteArr.length);
bb.put(byteArr);
ByteBuffer bb1 = ByteBuffer.allocate(byteArr1.length);
bb1.put(byteArr1);
ByteBuffer bb2 = ByteBuffer.allocate(byteArr2.length);
bb2.put(byteArr2);
byteBufferList.add(bb);
byteBufferList.add(bb1);
byteBufferList.add(bb2);
DataChunk dataChunk = new DataChunk();
dataChunk.setEncoders(encodingList);
dataChunk.setEncoder_meta(byteBufferList);
List<DataChunk> dataChunkList = new ArrayList<>();
dataChunkList.add(dataChunk);
dataChunkList.add(dataChunk);
BlockletInfo blockletInfo = new BlockletInfo();
blockletInfo.setColumn_data_chunks(dataChunkList);
blockletInfoList = new ArrayList<>();
blockletInfoList.add(blockletInfo);
blockletInfoList.add(blockletInfo);
ValueEncoderMeta meta = CarbonTestUtil.createValueEncoderMeta();
meta.setDecimal(5);
meta.setMinValue(objMinArr);
meta.setMaxValue(objMaxArr);
meta.setType(org.apache.carbondata.core.metadata.datatype.DataType.DOUBLE_MEASURE_CHAR);
List<Encoding> encoders = new ArrayList<>();
encoders.add(Encoding.INVERTED_INDEX);
encoders.add(Encoding.BIT_PACKED);
encoders.add(Encoding.DELTA);
encoders.add(Encoding.DICTIONARY);
encoders.add(Encoding.DIRECT_DICTIONARY);
encoders.add(Encoding.RLE);
ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
columnSchemas = new ArrayList<>();
columnSchemas.add(columnSchema);
columnSchemas.add(columnSchema1);
columnSchemas.add(columnSchema2);
columnSchemas.add(columnSchema3);
columnSchemas.add(columnSchema4);
columnSchemas.add(columnSchema5);
columnSchemas.add(columnSchema6);
columnSchemas.add(columnSchema7);
}
use of org.apache.carbondata.format.Encoding in project carbondata by apache.
the class EncodingFactory method createDecoder.
/**
* Return new decoder based on encoder metadata read from file
*/
public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer> encoderMetas) throws IOException {
assert (encodings.size() == 1);
assert (encoderMetas.size() == 1);
Encoding encoding = encodings.get(0);
byte[] encoderMeta = encoderMetas.get(0).array();
ByteArrayInputStream stream = new ByteArrayInputStream(encoderMeta);
DataInputStream in = new DataInputStream(stream);
if (encoding == DIRECT_COMPRESS) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
return new DirectCompressCodec(metadata.getStoreDataType()).createDecoder(metadata);
} else if (encoding == ADAPTIVE_INTEGRAL) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveIntegralCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
} else if (encoding == ADAPTIVE_DELTA_INTEGRAL) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveDeltaIntegralCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
} else if (encoding == ADAPTIVE_FLOATING) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveFloatingCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
} else if (encoding == ADAPTIVE_DELTA_FLOATING) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveDeltaFloatingCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
} else if (encoding == RLE_INTEGRAL) {
RLEEncoderMeta metadata = new RLEEncoderMeta();
metadata.readFields(in);
return new RLECodec().createDecoder(metadata);
} else if (encoding == BOOL_BYTE) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
return new DirectCompressCodec(metadata.getStoreDataType()).createDecoder(metadata);
} else {
// for backward compatibility
ValueEncoderMeta metadata = CarbonUtil.deserializeEncoderMetaV3(encoderMeta);
return createDecoderLegacy(metadata);
}
}
use of org.apache.carbondata.format.Encoding in project carbondata by apache.
the class AdaptiveFloatingCodec method createEncoder.
@Override
public ColumnPageEncoder createEncoder(Map<String, String> parameter) {
final Compressor compressor = CompressorFactory.getInstance().getCompressor();
return new ColumnPageEncoder() {
@Override
protected byte[] encodeData(ColumnPage input) throws MemoryException, IOException {
if (encodedPage != null) {
throw new IllegalStateException("already encoded");
}
encodedPage = ColumnPage.newPage(input.getColumnSpec(), targetDataType, input.getPageSize());
input.convertValue(converter);
byte[] result = encodedPage.compress(compressor);
encodedPage.freeMemory();
return result;
}
@Override
protected List<Encoding> getEncodingList() {
List<Encoding> encodings = new ArrayList<Encoding>();
encodings.add(Encoding.ADAPTIVE_FLOATING);
return encodings;
}
@Override
protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) {
return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), targetDataType, stats, compressor.getName());
}
};
}
Aggregations