Search in sources :

Example 1 with TaskBlockInfo

use of org.apache.carbondata.core.datastore.block.TaskBlockInfo in project carbondata by apache.

the class CarbonCompactionUtil method createMappingForSegments.

/**
   * To create a mapping of Segment Id and TableBlockInfo.
   *
   * @param tableBlockInfoList
   * @return
   */
public static Map<String, TaskBlockInfo> createMappingForSegments(List<TableBlockInfo> tableBlockInfoList) {
    // stores taskBlockInfo of each segment
    Map<String, TaskBlockInfo> segmentBlockInfoMapping = new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    for (TableBlockInfo info : tableBlockInfoList) {
        String segId = info.getSegmentId();
        // check if segId is already present in map
        TaskBlockInfo taskBlockInfoMapping = segmentBlockInfoMapping.get(segId);
        // extract task ID from file Path.
        String taskNo = CarbonTablePath.DataFileUtil.getTaskNo(info.getFilePath());
        // if taskBlockInfo is not there, then create and add
        if (null == taskBlockInfoMapping) {
            taskBlockInfoMapping = new TaskBlockInfo();
            groupCorrespodingInfoBasedOnTask(info, taskBlockInfoMapping, taskNo);
            // put the taskBlockInfo with respective segment id
            segmentBlockInfoMapping.put(segId, taskBlockInfoMapping);
        } else {
            groupCorrespodingInfoBasedOnTask(info, taskBlockInfoMapping, taskNo);
        }
    }
    return segmentBlockInfoMapping;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) HashMap(java.util.HashMap) TaskBlockInfo(org.apache.carbondata.core.datastore.block.TaskBlockInfo)

Example 2 with TaskBlockInfo

use of org.apache.carbondata.core.datastore.block.TaskBlockInfo in project carbondata by apache.

the class CarbonCompactionUtil method checkIfAnyRestructuredBlockExists.

/**
   * This method will check for any restructured block in the blocks selected for compaction
   *
   * @param segmentMapping
   * @param dataFileMetadataSegMapping
   * @param tableLastUpdatedTime
   * @return
   */
public static boolean checkIfAnyRestructuredBlockExists(Map<String, TaskBlockInfo> segmentMapping, Map<String, List<DataFileFooter>> dataFileMetadataSegMapping, long tableLastUpdatedTime) {
    boolean restructuredBlockExists = false;
    for (Map.Entry<String, TaskBlockInfo> taskMap : segmentMapping.entrySet()) {
        String segmentId = taskMap.getKey();
        List<DataFileFooter> listMetadata = dataFileMetadataSegMapping.get(segmentId);
        for (DataFileFooter dataFileFooter : listMetadata) {
            // it indicates it is a restructured block
            if (tableLastUpdatedTime > dataFileFooter.getSchemaUpdatedTimeStamp()) {
                restructuredBlockExists = true;
                break;
            }
        }
        if (restructuredBlockExists) {
            break;
        }
    }
    return restructuredBlockExists;
}
Also used : DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) TaskBlockInfo(org.apache.carbondata.core.datastore.block.TaskBlockInfo) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with TaskBlockInfo

use of org.apache.carbondata.core.datastore.block.TaskBlockInfo in project carbondata by apache.

the class CarbonCompactionExecutor method processTableBlocks.

/**
   * For processing of the table blocks.
   *
   * @return List of Carbon iterators
   */
public List<RawResultIterator> processTableBlocks() throws QueryExecutionException, IOException {
    List<RawResultIterator> resultList = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    List<TableBlockInfo> list = null;
    queryModel = prepareQueryModel(list);
    // iterate each seg ID
    for (Map.Entry<String, TaskBlockInfo> taskMap : segmentMapping.entrySet()) {
        String segmentId = taskMap.getKey();
        List<DataFileFooter> listMetadata = dataFileMetadataSegMapping.get(segmentId);
        SegmentProperties sourceSegProperties = getSourceSegmentProperties(listMetadata);
        // for each segment get taskblock info
        TaskBlockInfo taskBlockInfo = taskMap.getValue();
        Set<String> taskBlockListMapping = taskBlockInfo.getTaskSet();
        for (String task : taskBlockListMapping) {
            list = taskBlockInfo.getTableBlockInfoList(task);
            Collections.sort(list);
            LOGGER.info("for task -" + task + "-block size is -" + list.size());
            queryModel.setTableBlockInfos(list);
            resultList.add(new RawResultIterator(executeBlockList(list), sourceSegProperties, destinationSegProperties));
        }
    }
    return resultList;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) RawResultIterator(org.apache.carbondata.core.scan.result.iterator.RawResultIterator) ArrayList(java.util.ArrayList) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) TaskBlockInfo(org.apache.carbondata.core.datastore.block.TaskBlockInfo) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

HashMap (java.util.HashMap)3 TaskBlockInfo (org.apache.carbondata.core.datastore.block.TaskBlockInfo)3 Map (java.util.Map)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)2 ArrayList (java.util.ArrayList)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 RawResultIterator (org.apache.carbondata.core.scan.result.iterator.RawResultIterator)1