Search in sources :

Example 1 with BlockletRows

use of org.apache.carbondata.sdk.file.cache.BlockletRows in project carbondata by apache.

the class PaginationCarbonReader method getRows.

private Object[] getRows(long fromRowNumber, long toRowNumber) throws IOException, InterruptedException {
    int rowCount = 0;
    Object[] rows = new Object[(int) (toRowNumber - fromRowNumber + 1)];
    // get the matching split index (blocklets) range for the input range.
    Range blockletIndexRange = getBlockletIndexRange(fromRowNumber, toRowNumber);
    for (int i = blockletIndexRange.getFrom(); i <= blockletIndexRange.getTo(); i++) {
        String blockletUniqueId = String.valueOf(i);
        BlockletRows blockletRows;
        if (cache.get(blockletUniqueId) != null) {
            blockletRows = (BlockletRows) cache.get(blockletUniqueId);
        } else {
            BlockletDetailInfo detailInfo = ((CarbonInputSplit) allBlockletSplits.get(i)).getDetailInfo();
            List<Object> rowsInBlocklet = new ArrayList<>();
            // read the rows from the blocklet
            // TODO: read blocklets in multi-thread if there is a performance requirement.
            readerBuilder.setInputSplit(allBlockletSplits.get(i));
            CarbonReader<Object> carbonReader = readerBuilder.build();
            while (carbonReader.hasNext()) {
                rowsInBlocklet.add(carbonReader.readNextRow());
            }
            carbonReader.close();
            long fromRowId;
            if (i == 0) {
                fromRowId = 1;
            } else {
                // previous index will contain the sum of rows till previous blocklet.
                fromRowId = rowCountInSplits.get(i - 1) + 1;
            }
            blockletRows = new BlockletRows(fromRowId, detailInfo.getBlockSize(), rowsInBlocklet.toArray());
            // add entry to cache with no expiry time
            // key: unique blocklet id
            // value: BlockletRows
            cache.put(String.valueOf(i), blockletRows, blockletRows.getMemorySize(), Integer.MAX_VALUE);
        }
        long fromBlockletRow = blockletRows.getRowIdStartIndex();
        long toBlockletRow = fromBlockletRow + blockletRows.getRowsCount();
        Object[] rowsInBlocklet = blockletRows.getRows();
        if (toRowNumber >= toBlockletRow) {
            if (fromRowNumber >= fromBlockletRow) {
                // only fromRowNumber lies in this blocklet,
                // read from fromRowNumber to end of the blocklet.
                // -1 because row id starts form 0
                int start = (int) (fromRowNumber - blockletRows.getRowIdStartIndex());
                int end = blockletRows.getRowsCount();
                while (start < end) {
                    rows[rowCount++] = rowsInBlocklet[start++];
                }
            } else {
                // both fromRowNumber and toRowNumber doesn't lie in this blocklet.
                // Read the whole blocklet.
                System.arraycopy(rowsInBlocklet, 0, rows, rowCount, rowsInBlocklet.length);
                rowCount += rowsInBlocklet.length;
            }
        } else {
            if (fromRowNumber >= fromBlockletRow) {
                // both fromRowNumber and toRowNumber exist in this blocklet itself.
                // prune it and fill the results.
                int start = (int) (fromRowNumber - blockletRows.getRowIdStartIndex());
                int end = (int) (start + (toRowNumber + 1 - fromRowNumber));
                while (start < end) {
                    rows[rowCount++] = rowsInBlocklet[start++];
                }
            } else {
                // toRowNumber lies in this blocklet. Read from Starting of blocklet to toRowNumber.
                int start = 0;
                int end = (int) (toRowNumber + 1 - blockletRows.getRowIdStartIndex());
                while (start < end) {
                    rows[rowCount++] = rowsInBlocklet[start++];
                }
            }
        }
    }
    return rows;
}
Also used : BlockletDetailInfo(org.apache.carbondata.core.indexstore.BlockletDetailInfo) ArrayList(java.util.ArrayList) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) BlockletRows(org.apache.carbondata.sdk.file.cache.BlockletRows)

Aggregations

ArrayList (java.util.ArrayList)1 BlockletDetailInfo (org.apache.carbondata.core.indexstore.BlockletDetailInfo)1 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)1 BlockletRows (org.apache.carbondata.sdk.file.cache.BlockletRows)1