use of org.apache.carbondata.core.datastore.FileReader in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV1 method decodeColumnPage.
@Override
public DimensionColumnPage decodeColumnPage(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
int blockIndex = dimensionRawColumnChunk.getColumnIndex();
byte[] dataPage = null;
int[] invertedIndexes = null;
int[] invertedIndexesReverse = null;
int[] rlePage = null;
FileReader fileReader = dimensionRawColumnChunk.getFileReader();
ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
dataPage = COMPRESSOR.unCompressByte(rawData.array(), (int) dimensionRawColumnChunk.getOffSet(), dimensionRawColumnChunk.getLength());
// if row id block is present then read the row id chunk and uncompress it
DataChunk dataChunk = dimensionColumnChunk.get(blockIndex);
if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.INVERTED_INDEX)) {
byte[] columnIndexData;
synchronized (fileReader) {
columnIndexData = fileReader.readByteArray(filePath, dataChunk.getRowIdPageOffset(), dataChunk.getRowIdPageLength());
}
invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dataChunk.getRowIdPageLength(), columnIndexData, numberComressor, 0);
// get the reverse index
invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
}
// then actual data based on rle block
if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.RLE)) {
// read and uncompress the rle block
byte[] key;
synchronized (fileReader) {
key = fileReader.readByteArray(filePath, dataChunk.getRlePageOffset(), dataChunk.getRlePageLength());
}
rlePage = numberComressor.unCompress(key, 0, dataChunk.getRlePageLength());
// uncompress the data with rle indexes
dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
rlePage = null;
}
// fill chunk attributes
DimensionColumnPage columnDataChunk = null;
if (dataChunk.isRowMajor()) {
// to store fixed length column chunk values
columnDataChunk = new ColumnGroupDimensionColumnPage(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
} else // and set to data chunk instance
if (!CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.DICTIONARY)) {
columnDataChunk = new VariableLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
} else {
// to store fixed length column chunk values
columnDataChunk = new FixedLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
}
return columnDataChunk;
}
use of org.apache.carbondata.core.datastore.FileReader in project carbondata by apache.
the class CarbonUtil method calculateMetaSize.
/**
* The method calculate the B-Tree metadata size.
*
* @param tableBlockInfo
* @return
*/
public static long calculateMetaSize(TableBlockInfo tableBlockInfo) throws IOException {
FileReader fileReader = null;
try {
long completeBlockLength = tableBlockInfo.getBlockLength();
long footerPointer = completeBlockLength - 8;
String filePath = tableBlockInfo.getFilePath();
fileReader = FileFactory.getFileHolder(FileFactory.getFileType(filePath));
long actualFooterOffset = fileReader.readLong(filePath, footerPointer);
return footerPointer - actualFooterOffset;
} finally {
if (null != fileReader) {
try {
fileReader.finish();
} catch (IOException e) {
// ignore the exception as nothing we can do about it
fileReader = null;
}
}
}
}
use of org.apache.carbondata.core.datastore.FileReader in project carbondata by apache.
the class DataFileFooterConverter method readDataFileFooter.
/**
* Below method will be used to convert thrift file meta to wrapper file meta
*/
@Override
public DataFileFooter readDataFileFooter(TableBlockInfo tableBlockInfo) throws IOException {
DataFileFooter dataFileFooter = new DataFileFooter();
FileReader fileReader = null;
try {
long completeBlockLength = tableBlockInfo.getBlockLength();
long footerPointer = completeBlockLength - 8;
fileReader = FileFactory.getFileHolder(FileFactory.getFileType(tableBlockInfo.getFilePath()));
long actualFooterOffset = fileReader.readLong(tableBlockInfo.getFilePath(), footerPointer);
CarbonFooterReader reader = new CarbonFooterReader(tableBlockInfo.getFilePath(), actualFooterOffset);
FileFooter footer = reader.readFooter();
dataFileFooter.setVersionId(ColumnarFormatVersion.valueOf((short) footer.getVersion()));
dataFileFooter.setNumberOfRows(footer.getNum_rows());
dataFileFooter.setSegmentInfo(getSegmentInfo(footer.getSegment_info()));
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
List<org.apache.carbondata.format.ColumnSchema> table_columns = footer.getTable_columns();
for (int i = 0; i < table_columns.size(); i++) {
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
}
dataFileFooter.setColumnInTable(columnSchemaList);
List<org.apache.carbondata.format.BlockletIndex> leaf_node_indices_Thrift = footer.getBlocklet_index_list();
List<BlockletIndex> blockletIndexList = new ArrayList<BlockletIndex>();
for (int i = 0; i < leaf_node_indices_Thrift.size(); i++) {
BlockletIndex blockletIndex = getBlockletIndex(leaf_node_indices_Thrift.get(i));
blockletIndexList.add(blockletIndex);
}
List<org.apache.carbondata.format.BlockletInfo> leaf_node_infos_Thrift = footer.getBlocklet_info_list();
List<BlockletInfo> blockletInfoList = new ArrayList<BlockletInfo>();
for (int i = 0; i < leaf_node_infos_Thrift.size(); i++) {
BlockletInfo blockletInfo = getBlockletInfo(leaf_node_infos_Thrift.get(i));
blockletInfo.setBlockletIndex(blockletIndexList.get(i));
blockletInfoList.add(blockletInfo);
}
dataFileFooter.setBlockletList(blockletInfoList);
dataFileFooter.setBlockletIndex(getBlockletIndexForDataFileFooter(blockletIndexList));
} finally {
if (null != fileReader) {
fileReader.finish();
}
}
return dataFileFooter;
}
use of org.apache.carbondata.core.datastore.FileReader in project carbondata by apache.
the class DataFileFooterConverter method getSchema.
@Override
public List<ColumnSchema> getSchema(TableBlockInfo tableBlockInfo) throws IOException {
FileReader fileReader = null;
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
try {
long completeBlockLength = tableBlockInfo.getBlockLength();
long footerPointer = completeBlockLength - 8;
fileReader = FileFactory.getFileHolder(FileFactory.getFileType(tableBlockInfo.getFilePath()));
long actualFooterOffset = fileReader.readLong(tableBlockInfo.getFilePath(), footerPointer);
CarbonFooterReader reader = new CarbonFooterReader(tableBlockInfo.getFilePath(), actualFooterOffset);
FileFooter footer = reader.readFooter();
List<org.apache.carbondata.format.ColumnSchema> table_columns = footer.getTable_columns();
for (int i = 0; i < table_columns.size(); i++) {
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
}
} finally {
if (null != fileReader) {
fileReader.finish();
}
}
return columnSchemaList;
}
use of org.apache.carbondata.core.datastore.FileReader in project carbondata by apache.
the class BlockletFilterScanner method executeFilter.
/**
* This method will process the data in below order
* 1. first apply min max on the filter tree and check whether any of the filter
* is fall on the range of min max, if not then return empty result
* 2. If filter falls on min max range then apply filter on actual
* data and get the filtered row index
* 3. if row index is empty then return the empty result
* 4. if row indexes is not empty then read only those blocks(measure or dimension)
* which was present in the query but not present in the filter, as while applying filter
* some of the blocks where already read and present in chunk holder so not need to
* read those blocks again, this is to avoid reading of same blocks which was already read
* 5. Set the blocks and filter indexes to result
*
* @param rawBlockletColumnChunks
* @throws FilterUnsupportedException
*/
private BlockletScannedResult executeFilter(RawBlockletColumnChunks rawBlockletColumnChunks) throws FilterUnsupportedException, IOException {
long startTime = System.currentTimeMillis();
QueryStatistic totalBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM);
totalBlockletStatistic.addCountStatistic(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM, totalBlockletStatistic.getCount() + 1);
// set the indexed data if it has any during fgdatamap pruning.
rawBlockletColumnChunks.setBitSetGroup(rawBlockletColumnChunks.getDataBlock().getIndexedData());
// apply filter on actual data, for each page
BitSetGroup bitSetGroup = this.filterExecuter.applyFilter(rawBlockletColumnChunks, useBitSetPipeLine);
// if filter result is empty then return with empty result
if (bitSetGroup.isEmpty()) {
CarbonUtil.freeMemory(rawBlockletColumnChunks.getDimensionRawColumnChunks(), rawBlockletColumnChunks.getMeasureRawColumnChunks());
QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, scanTime.getCount() + (System.currentTimeMillis() - startTime));
QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.PAGE_SCANNED);
scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, scannedPages.getCount() + bitSetGroup.getScannedPages());
return createEmptyResult();
}
BlockletScannedResult scannedResult = new FilterQueryScannedResult(blockExecutionInfo);
scannedResult.setBlockletId(blockExecutionInfo.getBlockIdString() + CarbonCommonConstants.FILE_SEPARATOR + rawBlockletColumnChunks.getDataBlock().blockletIndex());
// valid scanned blocklet
QueryStatistic validScannedBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM);
validScannedBlockletStatistic.addCountStatistic(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM, validScannedBlockletStatistic.getCount() + 1);
// adding statistics for valid number of pages
QueryStatistic validPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_PAGE_SCANNED);
validPages.addCountStatistic(QueryStatisticsConstants.VALID_PAGE_SCANNED, validPages.getCount() + bitSetGroup.getValidPages());
QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.PAGE_SCANNED);
scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, scannedPages.getCount() + bitSetGroup.getScannedPages());
int[] pageFilteredRowCount = new int[bitSetGroup.getNumberOfPages()];
// get the row indexes from bit set for each page
int[][] pageFilteredRowId = new int[bitSetGroup.getNumberOfPages()][];
int numPages = pageFilteredRowId.length;
for (int pageId = 0; pageId < numPages; pageId++) {
BitSet bitSet = bitSetGroup.getBitSet(pageId);
if (bitSet != null && !bitSet.isEmpty()) {
int[] matchedRowId = new int[bitSet.cardinality()];
int index = 0;
for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) {
matchedRowId[index++] = i;
}
pageFilteredRowCount[pageId] = matchedRowId.length;
pageFilteredRowId[pageId] = matchedRowId;
}
}
long dimensionReadTime = System.currentTimeMillis();
dimensionReadTime = System.currentTimeMillis() - dimensionReadTime;
FileReader fileReader = rawBlockletColumnChunks.getFileReader();
DimensionRawColumnChunk[] dimensionRawColumnChunks = new DimensionRawColumnChunk[blockExecutionInfo.getTotalNumberDimensionToRead()];
int numDimensionChunks = dimensionRawColumnChunks.length;
// read dimension chunk blocks from file which is not present
for (int chunkIndex = 0; chunkIndex < numDimensionChunks; chunkIndex++) {
dimensionRawColumnChunks[chunkIndex] = rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex];
}
int[][] allSelectedDimensionColumnIndexRange = blockExecutionInfo.getAllSelectedDimensionColumnIndexRange();
DimensionRawColumnChunk[] projectionListDimensionChunk = rawBlockletColumnChunks.getDataBlock().readDimensionChunks(fileReader, allSelectedDimensionColumnIndexRange);
for (int[] columnIndexRange : allSelectedDimensionColumnIndexRange) {
System.arraycopy(projectionListDimensionChunk, columnIndexRange[0], dimensionRawColumnChunks, columnIndexRange[0], columnIndexRange[1] + 1 - columnIndexRange[0]);
}
/*
* in case projection if the projected dimension are not loaded in the dimensionColumnDataChunk
* then loading them
*/
int[] projectionListDimensionIndexes = blockExecutionInfo.getProjectionListDimensionIndexes();
for (int projectionListDimensionIndex : projectionListDimensionIndexes) {
if (null == dimensionRawColumnChunks[projectionListDimensionIndex]) {
dimensionRawColumnChunks[projectionListDimensionIndex] = rawBlockletColumnChunks.getDataBlock().readDimensionChunk(fileReader, projectionListDimensionIndex);
}
}
DimensionColumnPage[][] dimensionColumnPages = new DimensionColumnPage[numDimensionChunks][numPages];
for (int chunkIndex = 0; chunkIndex < numDimensionChunks; chunkIndex++) {
if (dimensionRawColumnChunks[chunkIndex] != null) {
for (int pageId = 0; pageId < numPages; pageId++) {
dimensionColumnPages[chunkIndex][pageId] = dimensionRawColumnChunks[chunkIndex].decodeColumnPage(pageId);
}
}
}
MeasureRawColumnChunk[] measureRawColumnChunks = new MeasureRawColumnChunk[blockExecutionInfo.getTotalNumberOfMeasureToRead()];
int numMeasureChunks = measureRawColumnChunks.length;
// read the measure chunk blocks which is not present
for (int chunkIndex = 0; chunkIndex < numMeasureChunks; chunkIndex++) {
if (null != rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]) {
measureRawColumnChunks[chunkIndex] = rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex];
}
}
int[][] allSelectedMeasureColumnIndexRange = blockExecutionInfo.getAllSelectedMeasureIndexRange();
MeasureRawColumnChunk[] projectionListMeasureChunk = rawBlockletColumnChunks.getDataBlock().readMeasureChunks(fileReader, allSelectedMeasureColumnIndexRange);
for (int[] columnIndexRange : allSelectedMeasureColumnIndexRange) {
System.arraycopy(projectionListMeasureChunk, columnIndexRange[0], measureRawColumnChunks, columnIndexRange[0], columnIndexRange[1] + 1 - columnIndexRange[0]);
}
/*
* in case projection if the projected measure are not loaded in the ColumnPage
* then loading them
*/
int[] projectionListMeasureIndexes = blockExecutionInfo.getProjectionListMeasureIndexes();
for (int projectionListMeasureIndex : projectionListMeasureIndexes) {
if (null == measureRawColumnChunks[projectionListMeasureIndex]) {
measureRawColumnChunks[projectionListMeasureIndex] = rawBlockletColumnChunks.getDataBlock().readMeasureChunk(fileReader, projectionListMeasureIndex);
}
}
ColumnPage[][] measureColumnPages = new ColumnPage[numMeasureChunks][numPages];
for (int chunkIndex = 0; chunkIndex < numMeasureChunks; chunkIndex++) {
if (measureRawColumnChunks[chunkIndex] != null) {
for (int pageId = 0; pageId < numPages; pageId++) {
measureColumnPages[chunkIndex][pageId] = measureRawColumnChunks[chunkIndex].decodeColumnPage(pageId);
}
}
}
scannedResult.setDimensionColumnPages(dimensionColumnPages);
scannedResult.setPageFilteredRowId(pageFilteredRowId);
scannedResult.setMeasureColumnPages(measureColumnPages);
scannedResult.setDimRawColumnChunks(dimensionRawColumnChunks);
scannedResult.setMsrRawColumnChunks(measureRawColumnChunks);
scannedResult.setPageFilteredRowCount(pageFilteredRowCount);
// adding statistics for carbon scan time
QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, scanTime.getCount() + (System.currentTimeMillis() - startTime - dimensionReadTime));
QueryStatistic readTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.READ_BLOCKlET_TIME);
readTime.addCountStatistic(QueryStatisticsConstants.READ_BLOCKlET_TIME, readTime.getCount() + dimensionReadTime);
return scannedResult;
}
Aggregations