Search in sources :

Example 1 with FileReader

use of org.apache.carbondata.core.datastore.FileReader in project carbondata by apache.

the class CompressedDimensionChunkFileBasedReaderV1 method decodeColumnPage.

@Override
public DimensionColumnPage decodeColumnPage(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
    int blockIndex = dimensionRawColumnChunk.getColumnIndex();
    byte[] dataPage = null;
    int[] invertedIndexes = null;
    int[] invertedIndexesReverse = null;
    int[] rlePage = null;
    FileReader fileReader = dimensionRawColumnChunk.getFileReader();
    ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
    dataPage = COMPRESSOR.unCompressByte(rawData.array(), (int) dimensionRawColumnChunk.getOffSet(), dimensionRawColumnChunk.getLength());
    // if row id block is present then read the row id chunk and uncompress it
    DataChunk dataChunk = dimensionColumnChunk.get(blockIndex);
    if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.INVERTED_INDEX)) {
        byte[] columnIndexData;
        synchronized (fileReader) {
            columnIndexData = fileReader.readByteArray(filePath, dataChunk.getRowIdPageOffset(), dataChunk.getRowIdPageLength());
        }
        invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dataChunk.getRowIdPageLength(), columnIndexData, numberComressor, 0);
        // get the reverse index
        invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
    }
    // then actual data based on rle block
    if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.RLE)) {
        // read and uncompress the rle block
        byte[] key;
        synchronized (fileReader) {
            key = fileReader.readByteArray(filePath, dataChunk.getRlePageOffset(), dataChunk.getRlePageLength());
        }
        rlePage = numberComressor.unCompress(key, 0, dataChunk.getRlePageLength());
        // uncompress the data with rle indexes
        dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
        rlePage = null;
    }
    // fill chunk attributes
    DimensionColumnPage columnDataChunk = null;
    if (dataChunk.isRowMajor()) {
        // to store fixed length column chunk values
        columnDataChunk = new ColumnGroupDimensionColumnPage(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
    } else // and set to data chunk instance
    if (!CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.DICTIONARY)) {
        columnDataChunk = new VariableLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
    } else {
        // to store fixed length column chunk values
        columnDataChunk = new FixedLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
    }
    return columnDataChunk;
}
Also used : FixedLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage) VariableLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage) DimensionColumnPage(org.apache.carbondata.core.datastore.chunk.DimensionColumnPage) ColumnGroupDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionColumnPage) FixedLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage) FileReader(org.apache.carbondata.core.datastore.FileReader) DataChunk(org.apache.carbondata.core.metadata.blocklet.datachunk.DataChunk) VariableLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage) ByteBuffer(java.nio.ByteBuffer) ColumnGroupDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionColumnPage)

Example 2 with FileReader

use of org.apache.carbondata.core.datastore.FileReader in project carbondata by apache.

the class CarbonUtil method calculateMetaSize.

/**
 * The method calculate the B-Tree metadata size.
 *
 * @param tableBlockInfo
 * @return
 */
public static long calculateMetaSize(TableBlockInfo tableBlockInfo) throws IOException {
    FileReader fileReader = null;
    try {
        long completeBlockLength = tableBlockInfo.getBlockLength();
        long footerPointer = completeBlockLength - 8;
        String filePath = tableBlockInfo.getFilePath();
        fileReader = FileFactory.getFileHolder(FileFactory.getFileType(filePath));
        long actualFooterOffset = fileReader.readLong(filePath, footerPointer);
        return footerPointer - actualFooterOffset;
    } finally {
        if (null != fileReader) {
            try {
                fileReader.finish();
            } catch (IOException e) {
                // ignore the exception as nothing we can do about it
                fileReader = null;
            }
        }
    }
}
Also used : FileReader(org.apache.carbondata.core.datastore.FileReader)

Example 3 with FileReader

use of org.apache.carbondata.core.datastore.FileReader in project carbondata by apache.

the class DataFileFooterConverter method readDataFileFooter.

/**
 * Below method will be used to convert thrift file meta to wrapper file meta
 */
@Override
public DataFileFooter readDataFileFooter(TableBlockInfo tableBlockInfo) throws IOException {
    DataFileFooter dataFileFooter = new DataFileFooter();
    FileReader fileReader = null;
    try {
        long completeBlockLength = tableBlockInfo.getBlockLength();
        long footerPointer = completeBlockLength - 8;
        fileReader = FileFactory.getFileHolder(FileFactory.getFileType(tableBlockInfo.getFilePath()));
        long actualFooterOffset = fileReader.readLong(tableBlockInfo.getFilePath(), footerPointer);
        CarbonFooterReader reader = new CarbonFooterReader(tableBlockInfo.getFilePath(), actualFooterOffset);
        FileFooter footer = reader.readFooter();
        dataFileFooter.setVersionId(ColumnarFormatVersion.valueOf((short) footer.getVersion()));
        dataFileFooter.setNumberOfRows(footer.getNum_rows());
        dataFileFooter.setSegmentInfo(getSegmentInfo(footer.getSegment_info()));
        List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
        List<org.apache.carbondata.format.ColumnSchema> table_columns = footer.getTable_columns();
        for (int i = 0; i < table_columns.size(); i++) {
            columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
        }
        dataFileFooter.setColumnInTable(columnSchemaList);
        List<org.apache.carbondata.format.BlockletIndex> leaf_node_indices_Thrift = footer.getBlocklet_index_list();
        List<BlockletIndex> blockletIndexList = new ArrayList<BlockletIndex>();
        for (int i = 0; i < leaf_node_indices_Thrift.size(); i++) {
            BlockletIndex blockletIndex = getBlockletIndex(leaf_node_indices_Thrift.get(i));
            blockletIndexList.add(blockletIndex);
        }
        List<org.apache.carbondata.format.BlockletInfo> leaf_node_infos_Thrift = footer.getBlocklet_info_list();
        List<BlockletInfo> blockletInfoList = new ArrayList<BlockletInfo>();
        for (int i = 0; i < leaf_node_infos_Thrift.size(); i++) {
            BlockletInfo blockletInfo = getBlockletInfo(leaf_node_infos_Thrift.get(i));
            blockletInfo.setBlockletIndex(blockletIndexList.get(i));
            blockletInfoList.add(blockletInfo);
        }
        dataFileFooter.setBlockletList(blockletInfoList);
        dataFileFooter.setBlockletIndex(getBlockletIndexForDataFileFooter(blockletIndexList));
    } finally {
        if (null != fileReader) {
            fileReader.finish();
        }
    }
    return dataFileFooter;
}
Also used : BlockletIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex) ArrayList(java.util.ArrayList) BlockletInfo(org.apache.carbondata.core.metadata.blocklet.BlockletInfo) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) FileFooter(org.apache.carbondata.format.FileFooter) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) FileReader(org.apache.carbondata.core.datastore.FileReader) CarbonFooterReader(org.apache.carbondata.core.reader.CarbonFooterReader)

Example 4 with FileReader

use of org.apache.carbondata.core.datastore.FileReader in project carbondata by apache.

the class DataFileFooterConverter method getSchema.

@Override
public List<ColumnSchema> getSchema(TableBlockInfo tableBlockInfo) throws IOException {
    FileReader fileReader = null;
    List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
    try {
        long completeBlockLength = tableBlockInfo.getBlockLength();
        long footerPointer = completeBlockLength - 8;
        fileReader = FileFactory.getFileHolder(FileFactory.getFileType(tableBlockInfo.getFilePath()));
        long actualFooterOffset = fileReader.readLong(tableBlockInfo.getFilePath(), footerPointer);
        CarbonFooterReader reader = new CarbonFooterReader(tableBlockInfo.getFilePath(), actualFooterOffset);
        FileFooter footer = reader.readFooter();
        List<org.apache.carbondata.format.ColumnSchema> table_columns = footer.getTable_columns();
        for (int i = 0; i < table_columns.size(); i++) {
            columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
        }
    } finally {
        if (null != fileReader) {
            fileReader.finish();
        }
    }
    return columnSchemaList;
}
Also used : ArrayList(java.util.ArrayList) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) FileFooter(org.apache.carbondata.format.FileFooter) FileReader(org.apache.carbondata.core.datastore.FileReader) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonFooterReader(org.apache.carbondata.core.reader.CarbonFooterReader)

Example 5 with FileReader

use of org.apache.carbondata.core.datastore.FileReader in project carbondata by apache.

the class BlockletFilterScanner method executeFilter.

/**
 * This method will process the data in below order
 * 1. first apply min max on the filter tree and check whether any of the filter
 * is fall on the range of min max, if not then return empty result
 * 2. If filter falls on min max range then apply filter on actual
 * data and get the filtered row index
 * 3. if row index is empty then return the empty result
 * 4. if row indexes is not empty then read only those blocks(measure or dimension)
 * which was present in the query but not present in the filter, as while applying filter
 * some of the blocks where already read and present in chunk holder so not need to
 * read those blocks again, this is to avoid reading of same blocks which was already read
 * 5. Set the blocks and filter indexes to result
 *
 * @param rawBlockletColumnChunks
 * @throws FilterUnsupportedException
 */
private BlockletScannedResult executeFilter(RawBlockletColumnChunks rawBlockletColumnChunks) throws FilterUnsupportedException, IOException {
    long startTime = System.currentTimeMillis();
    QueryStatistic totalBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM);
    totalBlockletStatistic.addCountStatistic(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM, totalBlockletStatistic.getCount() + 1);
    // set the indexed data if it has any during fgdatamap pruning.
    rawBlockletColumnChunks.setBitSetGroup(rawBlockletColumnChunks.getDataBlock().getIndexedData());
    // apply filter on actual data, for each page
    BitSetGroup bitSetGroup = this.filterExecuter.applyFilter(rawBlockletColumnChunks, useBitSetPipeLine);
    // if filter result is empty then return with empty result
    if (bitSetGroup.isEmpty()) {
        CarbonUtil.freeMemory(rawBlockletColumnChunks.getDimensionRawColumnChunks(), rawBlockletColumnChunks.getMeasureRawColumnChunks());
        QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
        scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, scanTime.getCount() + (System.currentTimeMillis() - startTime));
        QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.PAGE_SCANNED);
        scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, scannedPages.getCount() + bitSetGroup.getScannedPages());
        return createEmptyResult();
    }
    BlockletScannedResult scannedResult = new FilterQueryScannedResult(blockExecutionInfo);
    scannedResult.setBlockletId(blockExecutionInfo.getBlockIdString() + CarbonCommonConstants.FILE_SEPARATOR + rawBlockletColumnChunks.getDataBlock().blockletIndex());
    // valid scanned blocklet
    QueryStatistic validScannedBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM);
    validScannedBlockletStatistic.addCountStatistic(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM, validScannedBlockletStatistic.getCount() + 1);
    // adding statistics for valid number of pages
    QueryStatistic validPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_PAGE_SCANNED);
    validPages.addCountStatistic(QueryStatisticsConstants.VALID_PAGE_SCANNED, validPages.getCount() + bitSetGroup.getValidPages());
    QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.PAGE_SCANNED);
    scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, scannedPages.getCount() + bitSetGroup.getScannedPages());
    int[] pageFilteredRowCount = new int[bitSetGroup.getNumberOfPages()];
    // get the row indexes from bit set for each page
    int[][] pageFilteredRowId = new int[bitSetGroup.getNumberOfPages()][];
    int numPages = pageFilteredRowId.length;
    for (int pageId = 0; pageId < numPages; pageId++) {
        BitSet bitSet = bitSetGroup.getBitSet(pageId);
        if (bitSet != null && !bitSet.isEmpty()) {
            int[] matchedRowId = new int[bitSet.cardinality()];
            int index = 0;
            for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) {
                matchedRowId[index++] = i;
            }
            pageFilteredRowCount[pageId] = matchedRowId.length;
            pageFilteredRowId[pageId] = matchedRowId;
        }
    }
    long dimensionReadTime = System.currentTimeMillis();
    dimensionReadTime = System.currentTimeMillis() - dimensionReadTime;
    FileReader fileReader = rawBlockletColumnChunks.getFileReader();
    DimensionRawColumnChunk[] dimensionRawColumnChunks = new DimensionRawColumnChunk[blockExecutionInfo.getTotalNumberDimensionToRead()];
    int numDimensionChunks = dimensionRawColumnChunks.length;
    // read dimension chunk blocks from file which is not present
    for (int chunkIndex = 0; chunkIndex < numDimensionChunks; chunkIndex++) {
        dimensionRawColumnChunks[chunkIndex] = rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex];
    }
    int[][] allSelectedDimensionColumnIndexRange = blockExecutionInfo.getAllSelectedDimensionColumnIndexRange();
    DimensionRawColumnChunk[] projectionListDimensionChunk = rawBlockletColumnChunks.getDataBlock().readDimensionChunks(fileReader, allSelectedDimensionColumnIndexRange);
    for (int[] columnIndexRange : allSelectedDimensionColumnIndexRange) {
        System.arraycopy(projectionListDimensionChunk, columnIndexRange[0], dimensionRawColumnChunks, columnIndexRange[0], columnIndexRange[1] + 1 - columnIndexRange[0]);
    }
    /*
     * in case projection if the projected dimension are not loaded in the dimensionColumnDataChunk
     * then loading them
     */
    int[] projectionListDimensionIndexes = blockExecutionInfo.getProjectionListDimensionIndexes();
    for (int projectionListDimensionIndex : projectionListDimensionIndexes) {
        if (null == dimensionRawColumnChunks[projectionListDimensionIndex]) {
            dimensionRawColumnChunks[projectionListDimensionIndex] = rawBlockletColumnChunks.getDataBlock().readDimensionChunk(fileReader, projectionListDimensionIndex);
        }
    }
    DimensionColumnPage[][] dimensionColumnPages = new DimensionColumnPage[numDimensionChunks][numPages];
    for (int chunkIndex = 0; chunkIndex < numDimensionChunks; chunkIndex++) {
        if (dimensionRawColumnChunks[chunkIndex] != null) {
            for (int pageId = 0; pageId < numPages; pageId++) {
                dimensionColumnPages[chunkIndex][pageId] = dimensionRawColumnChunks[chunkIndex].decodeColumnPage(pageId);
            }
        }
    }
    MeasureRawColumnChunk[] measureRawColumnChunks = new MeasureRawColumnChunk[blockExecutionInfo.getTotalNumberOfMeasureToRead()];
    int numMeasureChunks = measureRawColumnChunks.length;
    // read the measure chunk blocks which is not present
    for (int chunkIndex = 0; chunkIndex < numMeasureChunks; chunkIndex++) {
        if (null != rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]) {
            measureRawColumnChunks[chunkIndex] = rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex];
        }
    }
    int[][] allSelectedMeasureColumnIndexRange = blockExecutionInfo.getAllSelectedMeasureIndexRange();
    MeasureRawColumnChunk[] projectionListMeasureChunk = rawBlockletColumnChunks.getDataBlock().readMeasureChunks(fileReader, allSelectedMeasureColumnIndexRange);
    for (int[] columnIndexRange : allSelectedMeasureColumnIndexRange) {
        System.arraycopy(projectionListMeasureChunk, columnIndexRange[0], measureRawColumnChunks, columnIndexRange[0], columnIndexRange[1] + 1 - columnIndexRange[0]);
    }
    /*
     * in case projection if the projected measure are not loaded in the ColumnPage
     * then loading them
     */
    int[] projectionListMeasureIndexes = blockExecutionInfo.getProjectionListMeasureIndexes();
    for (int projectionListMeasureIndex : projectionListMeasureIndexes) {
        if (null == measureRawColumnChunks[projectionListMeasureIndex]) {
            measureRawColumnChunks[projectionListMeasureIndex] = rawBlockletColumnChunks.getDataBlock().readMeasureChunk(fileReader, projectionListMeasureIndex);
        }
    }
    ColumnPage[][] measureColumnPages = new ColumnPage[numMeasureChunks][numPages];
    for (int chunkIndex = 0; chunkIndex < numMeasureChunks; chunkIndex++) {
        if (measureRawColumnChunks[chunkIndex] != null) {
            for (int pageId = 0; pageId < numPages; pageId++) {
                measureColumnPages[chunkIndex][pageId] = measureRawColumnChunks[chunkIndex].decodeColumnPage(pageId);
            }
        }
    }
    scannedResult.setDimensionColumnPages(dimensionColumnPages);
    scannedResult.setPageFilteredRowId(pageFilteredRowId);
    scannedResult.setMeasureColumnPages(measureColumnPages);
    scannedResult.setDimRawColumnChunks(dimensionRawColumnChunks);
    scannedResult.setMsrRawColumnChunks(measureRawColumnChunks);
    scannedResult.setPageFilteredRowCount(pageFilteredRowCount);
    // adding statistics for carbon scan time
    QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
    scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, scanTime.getCount() + (System.currentTimeMillis() - startTime - dimensionReadTime));
    QueryStatistic readTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.READ_BLOCKlET_TIME);
    readTime.addCountStatistic(QueryStatisticsConstants.READ_BLOCKlET_TIME, readTime.getCount() + dimensionReadTime);
    return scannedResult;
}
Also used : FilterQueryScannedResult(org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult) BlockletScannedResult(org.apache.carbondata.core.scan.result.BlockletScannedResult) BitSet(java.util.BitSet) DimensionColumnPage(org.apache.carbondata.core.datastore.chunk.DimensionColumnPage) BitSetGroup(org.apache.carbondata.core.util.BitSetGroup) DimensionColumnPage(org.apache.carbondata.core.datastore.chunk.DimensionColumnPage) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) FileReader(org.apache.carbondata.core.datastore.FileReader) MeasureRawColumnChunk(org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk) DimensionRawColumnChunk(org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic)

Aggregations

FileReader (org.apache.carbondata.core.datastore.FileReader)5 ArrayList (java.util.ArrayList)2 DimensionColumnPage (org.apache.carbondata.core.datastore.chunk.DimensionColumnPage)2 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)2 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)2 CarbonFooterReader (org.apache.carbondata.core.reader.CarbonFooterReader)2 FileFooter (org.apache.carbondata.format.FileFooter)2 ByteBuffer (java.nio.ByteBuffer)1 BitSet (java.util.BitSet)1 ColumnGroupDimensionColumnPage (org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionColumnPage)1 DimensionRawColumnChunk (org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk)1 FixedLengthDimensionColumnPage (org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage)1 MeasureRawColumnChunk (org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk)1 VariableLengthDimensionColumnPage (org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage)1 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)1 BlockletInfo (org.apache.carbondata.core.metadata.blocklet.BlockletInfo)1 DataChunk (org.apache.carbondata.core.metadata.blocklet.datachunk.DataChunk)1 BlockletIndex (org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex)1 BlockletScannedResult (org.apache.carbondata.core.scan.result.BlockletScannedResult)1 FilterQueryScannedResult (org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult)1