use of org.apache.carbondata.format.Encoding in project carbondata by apache.
the class CarbonMetadataUtil method getDatachunk2.
/**
* Below method will be used to get the data chunk object for all the columns
*
* @param blockletInfoColumnar blocklet info
* @param columnSchema list of columns
* @param segmentProperties segment properties
* @return list of data chunks
* @throws IOException
*/
public static List<DataChunk2> getDatachunk2(BlockletInfoColumnar blockletInfoColumnar, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties) throws IOException {
List<DataChunk2> colDataChunks = new ArrayList<DataChunk2>();
int rowIdIndex = 0;
int aggregateIndex = 0;
boolean[] isSortedKeyColumn = blockletInfoColumnar.getIsSortedKeyColumn();
boolean[] aggKeyBlock = blockletInfoColumnar.getAggKeyBlock();
boolean[] colGrpblock = blockletInfoColumnar.getColGrpBlocks();
for (int i = 0; i < blockletInfoColumnar.getKeyLengths().length; i++) {
DataChunk2 dataChunk = new DataChunk2();
dataChunk.setChunk_meta(getChunkCompressionMeta());
List<Encoding> encodings = new ArrayList<Encoding>();
if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
encodings.add(Encoding.DICTIONARY);
}
if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
encodings.add(Encoding.DIRECT_DICTIONARY);
}
dataChunk.setRowMajor(colGrpblock[i]);
// TODO : Once schema PR is merged and information needs to be passed
// here.
dataChunk.setData_page_length(blockletInfoColumnar.getKeyLengths()[i]);
if (aggKeyBlock[i]) {
dataChunk.setRle_page_length(blockletInfoColumnar.getDataIndexMapLength()[aggregateIndex]);
encodings.add(Encoding.RLE);
aggregateIndex++;
}
dataChunk.setSort_state(isSortedKeyColumn[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
if (!isSortedKeyColumn[i]) {
dataChunk.setRowid_page_length(blockletInfoColumnar.getKeyBlockIndexLength()[rowIdIndex]);
encodings.add(Encoding.INVERTED_INDEX);
rowIdIndex++;
}
// TODO : Right now the encodings are happening at runtime. change as per
// this encoders.
dataChunk.setEncoders(encodings);
colDataChunks.add(dataChunk);
}
for (int i = 0; i < blockletInfoColumnar.getMeasureLength().length; i++) {
DataChunk2 dataChunk = new DataChunk2();
dataChunk.setChunk_meta(getChunkCompressionMeta());
dataChunk.setRowMajor(false);
// TODO : Once schema PR is merged and information needs to be passed
// here.
dataChunk.setData_page_length(blockletInfoColumnar.getMeasureLength()[i]);
// TODO : Right now the encodings are happening at runtime. change as per
// this encoders.
List<Encoding> encodings = new ArrayList<Encoding>();
encodings.add(Encoding.DELTA);
dataChunk.setEncoders(encodings);
// TODO writing dummy presence meta need to set actual presence
// meta
PresenceMeta presenceMeta = new PresenceMeta();
presenceMeta.setPresent_bit_streamIsSet(true);
presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor().compressByte(blockletInfoColumnar.getMeasureNullValueIndex()[i].toByteArray()));
dataChunk.setPresence(presenceMeta);
// TODO : PresenceMeta needs to be implemented and set here
// dataChunk.setPresence(new PresenceMeta());
// TODO : Need to write ValueCompression meta here.
List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
encoderMetaList.add(ByteBuffer.wrap(serializeEncoderMeta(createValueEncoderMeta(blockletInfoColumnar.getCompressionModel(), i))));
dataChunk.setEncoder_meta(encoderMetaList);
colDataChunks.add(dataChunk);
}
return colDataChunks;
}
use of org.apache.carbondata.format.Encoding in project carbondata by apache.
the class DataFileFooterConverterTest method testReadDataFileFooter.
@Test
public void testReadDataFileFooter() throws Exception {
DataFileFooterConverter dataFileFooterConverter = new DataFileFooterConverter();
DataFileFooter dataFileFooter = new DataFileFooter();
List<Integer> column_cardinalities = new ArrayList<>();
column_cardinalities.add(new Integer("1"));
column_cardinalities.add(new Integer("2"));
column_cardinalities.add(new Integer("3"));
org.apache.carbondata.format.SegmentInfo segmentInfo1 = new org.apache.carbondata.format.SegmentInfo(3, column_cardinalities);
List<Encoding> encoders = new ArrayList<>();
encoders.add(Encoding.INVERTED_INDEX);
encoders.add(Encoding.BIT_PACKED);
encoders.add(Encoding.DELTA);
encoders.add(Encoding.DICTIONARY);
encoders.add(Encoding.DIRECT_DICTIONARY);
encoders.add(Encoding.RLE);
ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
final List<ColumnSchema> columnSchemas = new ArrayList<>();
columnSchemas.add(columnSchema);
columnSchemas.add(columnSchema1);
columnSchemas.add(columnSchema2);
columnSchemas.add(columnSchema3);
columnSchemas.add(columnSchema4);
columnSchemas.add(columnSchema5);
columnSchemas.add(columnSchema6);
columnSchemas.add(columnSchema7);
org.apache.carbondata.format.BlockletIndex blockletIndex1 = new org.apache.carbondata.format.BlockletIndex();
List<org.apache.carbondata.format.BlockletIndex> blockletIndexArrayList = new ArrayList<>();
blockletIndexArrayList.add(blockletIndex1);
org.apache.carbondata.format.BlockletInfo blockletInfo = new org.apache.carbondata.format.BlockletInfo();
List<org.apache.carbondata.format.BlockletInfo> blockletInfoArrayList = new ArrayList<>();
blockletInfoArrayList.add(blockletInfo);
final FileFooter fileFooter = new FileFooter(1, 3, columnSchemas, segmentInfo1, blockletIndexArrayList);
fileFooter.setBlocklet_info_list(blockletInfoArrayList);
BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
blockletBTreeIndex.setStart_key("1".getBytes());
blockletBTreeIndex.setEnd_key("3".getBytes());
blockletIndex1.setB_tree_index(blockletBTreeIndex);
BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
blockletMinMaxIndex.setMax_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 2)));
blockletMinMaxIndex.setMin_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 1)));
blockletIndex1.setMin_max_index(blockletMinMaxIndex);
new MockUp<FileFactory>() {
@SuppressWarnings("unused")
@Mock
public FileFactory.FileType getFileType(String path) {
return FileFactory.FileType.LOCAL;
}
@SuppressWarnings("unused")
@Mock
public FileReader getFileHolder(FileFactory.FileType fileType) {
return new FileReaderImpl();
}
};
new MockUp<FileReaderImpl>() {
@SuppressWarnings("unused")
@Mock
public long readLong(String filePath, long offset) {
return 1;
}
};
new MockUp<CarbonFooterReader>() {
@SuppressWarnings("unused")
@Mock
public FileFooter readFooter() throws IOException {
return fileFooter;
}
};
SegmentInfo segmentInfo = new SegmentInfo();
int[] arr = { 1, 2, 3 };
segmentInfo.setColumnCardinality(arr);
dataFileFooter.setNumberOfRows(3);
dataFileFooter.setSegmentInfo(segmentInfo);
TableBlockInfo info = new TableBlockInfo("/file.carbondata", 1, "0", new String[0], 1, ColumnarFormatVersion.V1, null);
DataFileFooter result = dataFileFooterConverter.readDataFileFooter(info);
assertEquals(result.getNumberOfRows(), 3);
}
use of org.apache.carbondata.format.Encoding in project carbondata by apache.
the class AdaptiveDeltaFloatingCodec method createEncoder.
@Override
public ColumnPageEncoder createEncoder(Map<String, String> parameter) {
final Compressor compressor = CompressorFactory.getInstance().getCompressor();
return new ColumnPageEncoder() {
@Override
protected byte[] encodeData(ColumnPage input) throws MemoryException, IOException {
if (encodedPage != null) {
throw new IllegalStateException("already encoded");
}
encodedPage = ColumnPage.newPage(input.getColumnSpec(), targetDataType, input.getPageSize());
input.convertValue(converter);
byte[] result = encodedPage.compress(compressor);
encodedPage.freeMemory();
return result;
}
@Override
protected List<Encoding> getEncodingList() {
List<Encoding> encodings = new ArrayList<Encoding>();
encodings.add(Encoding.ADAPTIVE_DELTA_FLOATING);
return encodings;
}
@Override
protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) {
return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), targetDataType, stats, compressor.getName());
}
};
}
use of org.apache.carbondata.format.Encoding in project carbondata by apache.
the class AdaptiveIntegralCodec method createEncoder.
@Override
public ColumnPageEncoder createEncoder(Map<String, String> parameter) {
final Compressor compressor = CompressorFactory.getInstance().getCompressor();
return new ColumnPageEncoder() {
@Override
protected byte[] encodeData(ColumnPage input) throws MemoryException, IOException {
if (encodedPage != null) {
throw new IllegalStateException("already encoded");
}
encodedPage = ColumnPage.newPage(input.getColumnSpec(), targetDataType, input.getPageSize());
input.convertValue(converter);
byte[] result = encodedPage.compress(compressor);
encodedPage.freeMemory();
return result;
}
@Override
protected List<Encoding> getEncodingList() {
List<Encoding> encodings = new ArrayList<Encoding>();
encodings.add(Encoding.ADAPTIVE_INTEGRAL);
return encodings;
}
@Override
protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) {
return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), targetDataType, stats, compressor.getName());
}
};
}
use of org.apache.carbondata.format.Encoding in project carbondata by apache.
the class ComplexDimensionIndexCodec method createEncoder.
@Override
public ColumnPageEncoder createEncoder(Map<String, String> parameter) {
return new IndexStorageEncoder() {
@Override
void encodeIndexStorage(ColumnPage inputPage) {
IndexStorage indexStorage = new BlockIndexerStorageForShort(inputPage.getByteArrayPage(), false, false, false);
byte[] flattened = ByteUtil.flatten(indexStorage.getDataPage());
byte[] compressed = compressor.compressByte(flattened);
super.indexStorage = indexStorage;
super.compressedDataPage = compressed;
}
@Override
protected List<Encoding> getEncodingList() {
List<Encoding> encodings = new ArrayList<>();
encodings.add(Encoding.DICTIONARY);
encodings.add(Encoding.INVERTED_INDEX);
return encodings;
}
};
}
Aggregations