use of org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage in project carbondata by apache.
the class CarbonFactDataWriterImplV3 method writeBlockletToFile.
/**
* Write one blocklet data into file
* File format:
* <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>>
* <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>>
* <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>>
* <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>>
*/
private void writeBlockletToFile(byte[][] dataChunkBytes) throws IOException {
long offset = currentOffsetInFile;
// to maintain the offset of each data chunk in blocklet
List<Long> currentDataChunksOffset = new ArrayList<>();
// to maintain the length of each data chunk in blocklet
List<Integer> currentDataChunksLength = new ArrayList<>();
List<EncodedTablePage> encodedTablePages = blockletDataHolder.getEncodedTablePages();
int numberOfDimension = encodedTablePages.get(0).getNumDimensions();
int numberOfMeasures = encodedTablePages.get(0).getNumMeasures();
ByteBuffer buffer = null;
long dimensionOffset = 0;
long measureOffset = 0;
int numberOfRows = 0;
// calculate the number of rows in each blocklet
for (EncodedTablePage encodedTablePage : encodedTablePages) {
numberOfRows += encodedTablePage.getPageSize();
}
for (int i = 0; i < numberOfDimension; i++) {
currentDataChunksOffset.add(offset);
currentDataChunksLength.add(dataChunkBytes[i].length);
buffer = ByteBuffer.wrap(dataChunkBytes[i]);
currentOffsetInFile += fileChannel.write(buffer);
offset += dataChunkBytes[i].length;
for (EncodedTablePage encodedTablePage : encodedTablePages) {
EncodedColumnPage dimension = encodedTablePage.getDimension(i);
buffer = dimension.getEncodedData();
int bufferSize = buffer.limit();
currentOffsetInFile += fileChannel.write(buffer);
offset += bufferSize;
}
}
dimensionOffset = offset;
int dataChunkStartIndex = encodedTablePages.get(0).getNumDimensions();
for (int i = 0; i < numberOfMeasures; i++) {
currentDataChunksOffset.add(offset);
currentDataChunksLength.add(dataChunkBytes[dataChunkStartIndex].length);
buffer = ByteBuffer.wrap(dataChunkBytes[dataChunkStartIndex]);
currentOffsetInFile += fileChannel.write(buffer);
offset += dataChunkBytes[dataChunkStartIndex].length;
dataChunkStartIndex++;
for (EncodedTablePage encodedTablePage : encodedTablePages) {
EncodedColumnPage measure = encodedTablePage.getMeasure(i);
buffer = measure.getEncodedData();
int bufferSize = buffer.limit();
currentOffsetInFile += fileChannel.write(buffer);
offset += bufferSize;
}
}
measureOffset = offset;
blockletIndex.add(CarbonMetadataUtil.getBlockletIndex(encodedTablePages, model.getSegmentProperties().getMeasures()));
BlockletInfo3 blockletInfo3 = new BlockletInfo3(numberOfRows, currentDataChunksOffset, currentDataChunksLength, dimensionOffset, measureOffset, blockletDataHolder.getEncodedTablePages().size());
blockletMetadata.add(blockletInfo3);
}
use of org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage in project carbondata by apache.
the class TablePage method encodeAndCompressDimensions.
// apply and compress each dimension, set encoded data in `encodedData`
private EncodedColumnPage[] encodeAndCompressDimensions() throws KeyGenException, IOException, MemoryException {
List<EncodedColumnPage> encodedDimensions = new ArrayList<>();
List<EncodedColumnPage> encodedComplexDimenions = new ArrayList<>();
TableSpec tableSpec = model.getTableSpec();
int dictIndex = 0;
int noDictIndex = 0;
int complexDimIndex = 0;
int numDimensions = tableSpec.getNumDimensions();
for (int i = 0; i < numDimensions; i++) {
ColumnPageEncoder columnPageEncoder;
EncodedColumnPage encodedPage;
TableSpec.DimensionSpec spec = tableSpec.getDimensionSpec(i);
switch(spec.getColumnType()) {
case GLOBAL_DICTIONARY:
case DIRECT_DICTIONARY:
columnPageEncoder = encodingFactory.createEncoder(spec, dictDimensionPages[dictIndex]);
encodedPage = columnPageEncoder.encode(dictDimensionPages[dictIndex++]);
encodedDimensions.add(encodedPage);
break;
case PLAIN_VALUE:
columnPageEncoder = encodingFactory.createEncoder(spec, noDictDimensionPages[noDictIndex]);
encodedPage = columnPageEncoder.encode(noDictDimensionPages[noDictIndex++]);
encodedDimensions.add(encodedPage);
break;
case COMPLEX:
EncodedColumnPage[] encodedPages = ColumnPageEncoder.encodeComplexColumn(complexDimensionPages[complexDimIndex++]);
encodedComplexDimenions.addAll(Arrays.asList(encodedPages));
break;
default:
throw new IllegalArgumentException("unsupported dimension type:" + spec.getColumnType());
}
}
encodedDimensions.addAll(encodedComplexDimenions);
return encodedDimensions.toArray(new EncodedColumnPage[encodedDimensions.size()]);
}
use of org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage in project carbondata by apache.
the class TablePage method encodeAndCompressMeasures.
// apply measure and set encodedData in `encodedData`
private EncodedColumnPage[] encodeAndCompressMeasures() throws MemoryException, IOException {
EncodedColumnPage[] encodedMeasures = new EncodedColumnPage[measurePages.length];
for (int i = 0; i < measurePages.length; i++) {
ColumnPageEncoder encoder = encodingFactory.createEncoder(model.getTableSpec().getMeasureSpec(i), measurePages[i]);
encodedMeasures[i] = encoder.encode(measurePages[i]);
}
return encodedMeasures;
}
use of org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage in project carbondata by apache.
the class CarbonMetadataUtilTest method testConvertFileFooter.
@Test
public void testConvertFileFooter() throws Exception {
int[] cardinality = { 1, 2, 3, 4, 5 };
org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema1 = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
List<org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema> columnSchemaList = new ArrayList<>();
columnSchemaList.add(colSchema);
columnSchemaList.add(colSchema1);
SegmentProperties segmentProperties = new SegmentProperties(columnSchemaList, cardinality);
final EncodedColumnPage measure = new EncodedColumnPage(new DataChunk2(), new byte[] { 0, 1 }, PrimitivePageStatsCollector.newInstance(org.apache.carbondata.core.metadata.datatype.DataTypes.BYTE));
new MockUp<EncodedTablePage>() {
@SuppressWarnings("unused")
@Mock
public EncodedColumnPage getMeasure(int measureIndex) {
return measure;
}
};
new MockUp<TablePageKey>() {
@SuppressWarnings("unused")
@Mock
public byte[] serializeStartKey() {
return new byte[] { 1, 2 };
}
@SuppressWarnings("unused")
@Mock
public byte[] serializeEndKey() {
return new byte[] { 1, 2 };
}
};
TablePageKey key = new TablePageKey(3, segmentProperties, false);
EncodedTablePage encodedTablePage = EncodedTablePage.newInstance(3, new EncodedColumnPage[0], new EncodedColumnPage[0], key);
List<EncodedTablePage> encodedTablePageList = new ArrayList<>();
encodedTablePageList.add(encodedTablePage);
BlockletInfo3 blockletInfoColumnar1 = new BlockletInfo3();
List<BlockletInfo3> blockletInfoColumnarList = new ArrayList<>();
blockletInfoColumnarList.add(blockletInfoColumnar1);
byte[] byteMaxArr = "1".getBytes();
byte[] byteMinArr = "2".getBytes();
BlockletIndex index = getBlockletIndex(encodedTablePageList, segmentProperties.getMeasures());
List<BlockletIndex> indexList = new ArrayList<>();
indexList.add(index);
BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(byteMaxArr));
blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(byteMinArr));
FileFooter3 footer = convertFileFooterVersion3(blockletInfoColumnarList, indexList, cardinality, 2);
assertEquals(footer.getBlocklet_index_list(), indexList);
}
Aggregations