Search in sources :

Example 16 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS
   * are used to get table path to read.
   *
   * @return
   * @throws IOException
   */
private List<InputSplit> getSplits(JobContext job, FilterResolverIntf filterResolver, BitSet matchedPartitions, CacheClient cacheClient) throws IOException {
    List<InputSplit> result = new LinkedList<InputSplit>();
    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
    AbsoluteTableIdentifier absoluteTableIdentifier = getCarbonTable(job.getConfiguration()).getAbsoluteTableIdentifier();
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
    //for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : getSegmentsToAccess(job)) {
        List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor, absoluteTableIdentifier, filterResolver, matchedPartitions, segmentNo, cacheClient, updateStatusManager);
        for (DataRefNode dataRefNode : dataRefNodes) {
            BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
            TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
            if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, updateStatusManager.getInvalidTimestampRange(tableBlockInfo.getSegmentId()), updateStatusManager)) {
                continue;
            }
            result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), tableBlockInfo.getLocations(), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion()));
        }
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BlockBTreeLeafNode(org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)

Example 17 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbonInputFormat method getSegmentAbstractIndexs.

/**
   * It returns index for each task file.
   * @param job
   * @param absoluteTableIdentifier
   * @param segmentId
   * @return
   * @throws IOException
   */
private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
    Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
    SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
    boolean isSegmentUpdated = false;
    Set<SegmentTaskIndexStore.TaskBucketHolder> taskKeys = null;
    TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId);
    segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().getIfPresent(tableSegmentUniqueIdentifier);
    UpdateVO updateDetails = updateStatusManager.getInvalidTimestampRange(segmentId);
    if (null != segmentTaskIndexWrapper) {
        segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        if (isSegmentUpdate(segmentTaskIndexWrapper, updateDetails)) {
            taskKeys = segmentIndexMap.keySet();
            isSegmentUpdated = true;
        }
    }
    // if segment tree is not loaded, load the segment tree
    if (segmentIndexMap == null || isSegmentUpdated) {
        // if the segment is updated only the updated blocks TableInfo instance has to be
        // retrieved. the same will be filtered based on taskKeys , if the task is same
        // for the block then dont add it since already its btree is loaded.
        Set<SegmentTaskIndexStore.TaskBucketHolder> validTaskKeys = new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
        List<TableBlockInfo> tableBlockInfoList = getTableBlockInfo(job, tableSegmentUniqueIdentifier, taskKeys, updateStatusManager.getInvalidTimestampRange(segmentId), updateStatusManager, segmentId, validTaskKeys);
        if (!tableBlockInfoList.isEmpty()) {
            Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
            segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);
            // get Btree blocks for given segment
            tableSegmentUniqueIdentifier.setSegmentToTableBlocksInfos(segmentToTableBlocksInfos);
            tableSegmentUniqueIdentifier.setIsSegmentUpdated(isSegmentUpdated);
            segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().get(tableSegmentUniqueIdentifier);
            segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        }
        if (null != taskKeys) {
            Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> finalMap = new HashMap<>(validTaskKeys.size());
            for (SegmentTaskIndexStore.TaskBucketHolder key : validTaskKeys) {
                finalMap.put(key, segmentIndexMap.get(key));
            }
            segmentIndexMap = finalMap;
        }
    }
    return segmentIndexMap;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) SegmentTaskIndexWrapper(org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) SegmentTaskIndexStore(org.apache.carbondata.core.datastore.SegmentTaskIndexStore)

Example 18 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbonRecordReader method initialize.

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
    // The input split can contain single HDFS block or multiple blocks, so firstly get all the
    // blocks and then set them in the query model.
    List<CarbonInputSplit> splitList;
    if (inputSplit instanceof CarbonInputSplit) {
        splitList = new ArrayList<>(1);
        splitList.add((CarbonInputSplit) inputSplit);
    } else if (inputSplit instanceof CarbonMultiBlockSplit) {
        // contains multiple blocks, this is an optimization for concurrent query.
        CarbonMultiBlockSplit multiBlockSplit = (CarbonMultiBlockSplit) inputSplit;
        splitList = multiBlockSplit.getAllSplits();
    } else {
        throw new RuntimeException("unsupported input split type: " + inputSplit);
    }
    List<TableBlockInfo> tableBlockInfoList = CarbonInputSplit.createBlocks(splitList);
    queryModel.setTableBlockInfos(tableBlockInfoList);
    readSupport.initialize(queryModel.getProjectionColumns(), queryModel.getAbsoluteTableIdentifier());
    try {
        carbonIterator = new ChunkRowIterator(queryExecutor.execute(queryModel));
    } catch (QueryExecutionException e) {
        throw new InterruptedException(e.getMessage());
    }
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ChunkRowIterator(org.apache.carbondata.core.scan.result.iterator.ChunkRowIterator) QueryExecutionException(org.apache.carbondata.core.scan.executor.exception.QueryExecutionException)

Example 19 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class InMemoryBTreeIndex method getTableBlockInfo.

/**
   * Below method will be used to get the table block info
   *
   * @param job                     job context
   * @return list of table block
   * @throws IOException
   */
private List<TableBlockInfo> getTableBlockInfo(JobContext job) throws IOException {
    List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
    // identify table blocks from all file locations of given segment
    for (InputSplit inputSplit : segment.getAllSplits(job)) {
        CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
        BlockletInfos blockletInfos = new BlockletInfos(carbonInputSplit.getNumberOfBlocklets(), 0, carbonInputSplit.getNumberOfBlocklets());
        tableBlockInfoList.add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), segment.getId(), carbonInputSplit.getLocations(), carbonInputSplit.getLength(), blockletInfos, carbonInputSplit.getVersion()));
    }
    return tableBlockInfoList;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) BlockletInfos(org.apache.carbondata.core.datastore.block.BlockletInfos) ArrayList(java.util.ArrayList) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit)

Example 20 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class AbstractDataFileFooterConverter method getIndexInfo.

/**
   * Below method will be used to get the index info from index file
   *
   * @param filePath           file path of the index file
   * @param tableBlockInfoList table block index
   * @return list of index info
   * @throws IOException problem while reading the index file
   */
public List<DataFileFooter> getIndexInfo(String filePath, List<TableBlockInfo> tableBlockInfoList) throws IOException {
    CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
    List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
    try {
        // open the reader
        indexReader.openThriftReader(filePath);
        // get the index header
        org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
        List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
        List<org.apache.carbondata.format.ColumnSchema> table_columns = readIndexHeader.getTable_columns();
        for (int i = 0; i < table_columns.size(); i++) {
            columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
        }
        // get the segment info
        SegmentInfo segmentInfo = getSegmentInfo(readIndexHeader.getSegment_info());
        BlockletIndex blockletIndex = null;
        int counter = 0;
        int index = 0;
        DataFileFooter dataFileFooter = null;
        // read the block info from file
        while (indexReader.hasNext()) {
            BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
            blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
            dataFileFooter = new DataFileFooter();
            TableBlockInfo tableBlockInfo = tableBlockInfoList.get(index);
            if (Integer.parseInt(CarbonTablePath.DataFileUtil.getPartNo(tableBlockInfo.getFilePath())) == counter++) {
                tableBlockInfo.setBlockOffset(readBlockIndexInfo.getOffset());
                tableBlockInfo.setVersion(ColumnarFormatVersion.valueOf((short) readIndexHeader.getVersion()));
                int blockletSize = getBlockletSize(readBlockIndexInfo);
                tableBlockInfo.getBlockletInfos().setNoOfBlockLets(blockletSize);
                dataFileFooter.setBlockletIndex(blockletIndex);
                dataFileFooter.setColumnInTable(columnSchemaList);
                dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
                dataFileFooter.setBlockInfo(new BlockInfo(tableBlockInfo));
                dataFileFooter.setSegmentInfo(segmentInfo);
                dataFileFooters.add(dataFileFooter);
                if (++index == tableBlockInfoList.size()) {
                    break;
                }
            }
        }
    } finally {
        indexReader.closeThriftReader();
    }
    return dataFileFooters;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockletIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) BlockIndex(org.apache.carbondata.format.BlockIndex) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BlockInfo(org.apache.carbondata.core.datastore.block.BlockInfo) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) SegmentInfo(org.apache.carbondata.core.metadata.blocklet.SegmentInfo)

Aggregations

TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)33 ArrayList (java.util.ArrayList)19 Test (org.junit.Test)11 HashMap (java.util.HashMap)10 List (java.util.List)9 LinkedList (java.util.LinkedList)7 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)7 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)7 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)6 LinkedHashMap (java.util.LinkedHashMap)5 IOException (java.io.IOException)4 SegmentTaskIndexWrapper (org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper)4 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)4 File (java.io.File)3 Map (java.util.Map)3 MockUp (mockit.MockUp)3 BlockInfo (org.apache.carbondata.core.datastore.block.BlockInfo)3 TableBlockUniqueIdentifier (org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier)3 SegmentInfo (org.apache.carbondata.core.metadata.blocklet.SegmentInfo)3 QueryExecutionException (org.apache.carbondata.core.scan.executor.exception.QueryExecutionException)3