use of org.apache.carbondata.core.datastore.page.EncodedTablePage in project carbondata by apache.
the class CarbonMetadataUtil method getBlockletIndex.
public static BlockletIndex getBlockletIndex(List<EncodedTablePage> encodedTablePageList, List<CarbonMeasure> carbonMeasureList) {
BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
// Calculating min/max for every each column.
TablePageStatistics stats = new TablePageStatistics(encodedTablePageList.get(0).getDimensions(), encodedTablePageList.get(0).getMeasures());
byte[][] minCol = stats.getDimensionMinValue().clone();
byte[][] maxCol = stats.getDimensionMaxValue().clone();
for (EncodedTablePage encodedTablePage : encodedTablePageList) {
stats = new TablePageStatistics(encodedTablePage.getDimensions(), encodedTablePage.getMeasures());
byte[][] columnMaxData = stats.getDimensionMaxValue();
byte[][] columnMinData = stats.getDimensionMinValue();
for (int i = 0; i < maxCol.length; i++) {
if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMaxData[i], maxCol[i]) > 0) {
maxCol[i] = columnMaxData[i];
}
if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMinData[i], minCol[i]) < 0) {
minCol[i] = columnMinData[i];
}
}
}
// Writing min/max to thrift file
for (byte[] max : maxCol) {
blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
}
for (byte[] min : minCol) {
blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
}
stats = new TablePageStatistics(encodedTablePageList.get(0).getDimensions(), encodedTablePageList.get(0).getMeasures());
byte[][] measureMaxValue = stats.getMeasureMaxValue().clone();
byte[][] measureMinValue = stats.getMeasureMinValue().clone();
byte[] minVal = null;
byte[] maxVal = null;
for (int i = 1; i < encodedTablePageList.size(); i++) {
for (int j = 0; j < measureMinValue.length; j++) {
stats = new TablePageStatistics(encodedTablePageList.get(i).getDimensions(), encodedTablePageList.get(i).getMeasures());
minVal = stats.getMeasureMinValue()[j];
maxVal = stats.getMeasureMaxValue()[j];
if (compareMeasureData(measureMaxValue[j], maxVal, carbonMeasureList.get(j).getDataType()) < 0) {
measureMaxValue[j] = maxVal.clone();
}
if (compareMeasureData(measureMinValue[j], minVal, carbonMeasureList.get(j).getDataType()) > 0) {
measureMinValue[j] = minVal.clone();
}
}
}
for (byte[] max : measureMaxValue) {
blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
}
for (byte[] min : measureMinValue) {
blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
}
BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
byte[] startKey = encodedTablePageList.get(0).getPageKey().serializeStartKey();
blockletBTreeIndex.setStart_key(startKey);
byte[] endKey = encodedTablePageList.get(encodedTablePageList.size() - 1).getPageKey().serializeEndKey();
blockletBTreeIndex.setEnd_key(endKey);
BlockletIndex blockletIndex = new BlockletIndex();
blockletIndex.setMin_max_index(blockletMinMaxIndex);
blockletIndex.setB_tree_index(blockletBTreeIndex);
return blockletIndex;
}
use of org.apache.carbondata.core.datastore.page.EncodedTablePage in project carbondata by apache.
the class CarbonFactDataWriterImplV3 method writeBlockletToFile.
/**
* Write the collect blocklet data (blockletDataHolder) to file
*/
private void writeBlockletToFile() {
// get the list of all encoded table page
List<EncodedTablePage> encodedTablePageList = blockletDataHolder.getEncodedTablePages();
int numDimensions = encodedTablePageList.get(0).getNumDimensions();
int numMeasures = encodedTablePageList.get(0).getNumMeasures();
// get data chunks for all the column
byte[][] dataChunkBytes = new byte[numDimensions + numMeasures][];
long metadataSize = fillDataChunk(encodedTablePageList, dataChunkBytes);
// calculate the total size of data to be written
long blockletSize = blockletDataHolder.getSize() + metadataSize;
// to check if data size will exceed the block size then create a new file
createNewFileIfReachThreshold(blockletSize);
// write data to file
try {
if (currentOffsetInFile == 0) {
// write the header if file is empty
writeHeaderToFile();
}
writeBlockletToFile(dataChunkBytes);
if (listener != null) {
listener.onBlockletEnd(blockletId++);
}
pageId = 0;
} catch (IOException e) {
LOGGER.error(e, "Problem while writing file");
throw new CarbonDataWriterException("Problem while writing file", e);
} finally {
// clear the data holder
blockletDataHolder.clear();
}
}
use of org.apache.carbondata.core.datastore.page.EncodedTablePage in project carbondata by apache.
the class CarbonFactDataWriterImplV3 method writeBlockletToFile.
/**
* Write one blocklet data into file
* File format:
* <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>>
* <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>>
* <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>>
* <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>>
*/
private void writeBlockletToFile(byte[][] dataChunkBytes) throws IOException {
long offset = currentOffsetInFile;
// to maintain the offset of each data chunk in blocklet
List<Long> currentDataChunksOffset = new ArrayList<>();
// to maintain the length of each data chunk in blocklet
List<Integer> currentDataChunksLength = new ArrayList<>();
List<EncodedTablePage> encodedTablePages = blockletDataHolder.getEncodedTablePages();
int numberOfDimension = encodedTablePages.get(0).getNumDimensions();
int numberOfMeasures = encodedTablePages.get(0).getNumMeasures();
ByteBuffer buffer = null;
long dimensionOffset = 0;
long measureOffset = 0;
int numberOfRows = 0;
// calculate the number of rows in each blocklet
for (EncodedTablePage encodedTablePage : encodedTablePages) {
numberOfRows += encodedTablePage.getPageSize();
}
for (int i = 0; i < numberOfDimension; i++) {
currentDataChunksOffset.add(offset);
currentDataChunksLength.add(dataChunkBytes[i].length);
buffer = ByteBuffer.wrap(dataChunkBytes[i]);
currentOffsetInFile += fileChannel.write(buffer);
offset += dataChunkBytes[i].length;
for (EncodedTablePage encodedTablePage : encodedTablePages) {
EncodedColumnPage dimension = encodedTablePage.getDimension(i);
buffer = dimension.getEncodedData();
int bufferSize = buffer.limit();
currentOffsetInFile += fileChannel.write(buffer);
offset += bufferSize;
}
}
dimensionOffset = offset;
int dataChunkStartIndex = encodedTablePages.get(0).getNumDimensions();
for (int i = 0; i < numberOfMeasures; i++) {
currentDataChunksOffset.add(offset);
currentDataChunksLength.add(dataChunkBytes[dataChunkStartIndex].length);
buffer = ByteBuffer.wrap(dataChunkBytes[dataChunkStartIndex]);
currentOffsetInFile += fileChannel.write(buffer);
offset += dataChunkBytes[dataChunkStartIndex].length;
dataChunkStartIndex++;
for (EncodedTablePage encodedTablePage : encodedTablePages) {
EncodedColumnPage measure = encodedTablePage.getMeasure(i);
buffer = measure.getEncodedData();
int bufferSize = buffer.limit();
currentOffsetInFile += fileChannel.write(buffer);
offset += bufferSize;
}
}
measureOffset = offset;
blockletIndex.add(CarbonMetadataUtil.getBlockletIndex(encodedTablePages, model.getSegmentProperties().getMeasures()));
BlockletInfo3 blockletInfo3 = new BlockletInfo3(numberOfRows, currentDataChunksOffset, currentDataChunksLength, dimensionOffset, measureOffset, blockletDataHolder.getEncodedTablePages().size());
blockletMetadata.add(blockletInfo3);
}
use of org.apache.carbondata.core.datastore.page.EncodedTablePage in project carbondata by apache.
the class BlockletDataHolder method addPage.
public void addPage(TablePage rawTablePage) {
EncodedTablePage encodedTablePage = rawTablePage.getEncodedTablePage();
this.encodedTablePage.add(encodedTablePage);
currentSize += encodedTablePage.getEncodedSize();
}
use of org.apache.carbondata.core.datastore.page.EncodedTablePage in project carbondata by apache.
the class CarbonMetadataUtilTest method testConvertFileFooter.
@Test
public void testConvertFileFooter() throws Exception {
int[] cardinality = { 1, 2, 3, 4, 5 };
org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema1 = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
List<org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema> columnSchemaList = new ArrayList<>();
columnSchemaList.add(colSchema);
columnSchemaList.add(colSchema1);
SegmentProperties segmentProperties = new SegmentProperties(columnSchemaList, cardinality);
final EncodedColumnPage measure = new EncodedColumnPage(new DataChunk2(), new byte[] { 0, 1 }, PrimitivePageStatsCollector.newInstance(org.apache.carbondata.core.metadata.datatype.DataTypes.BYTE));
new MockUp<EncodedTablePage>() {
@SuppressWarnings("unused")
@Mock
public EncodedColumnPage getMeasure(int measureIndex) {
return measure;
}
};
new MockUp<TablePageKey>() {
@SuppressWarnings("unused")
@Mock
public byte[] serializeStartKey() {
return new byte[] { 1, 2 };
}
@SuppressWarnings("unused")
@Mock
public byte[] serializeEndKey() {
return new byte[] { 1, 2 };
}
};
TablePageKey key = new TablePageKey(3, segmentProperties, false);
EncodedTablePage encodedTablePage = EncodedTablePage.newInstance(3, new EncodedColumnPage[0], new EncodedColumnPage[0], key);
List<EncodedTablePage> encodedTablePageList = new ArrayList<>();
encodedTablePageList.add(encodedTablePage);
BlockletInfo3 blockletInfoColumnar1 = new BlockletInfo3();
List<BlockletInfo3> blockletInfoColumnarList = new ArrayList<>();
blockletInfoColumnarList.add(blockletInfoColumnar1);
byte[] byteMaxArr = "1".getBytes();
byte[] byteMinArr = "2".getBytes();
BlockletIndex index = getBlockletIndex(encodedTablePageList, segmentProperties.getMeasures());
List<BlockletIndex> indexList = new ArrayList<>();
indexList.add(index);
BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(byteMaxArr));
blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(byteMinArr));
FileFooter3 footer = convertFileFooterVersion3(blockletInfoColumnarList, indexList, cardinality, 2);
assertEquals(footer.getBlocklet_index_list(), indexList);
}
Aggregations