Search in sources :

Example 6 with SegmentUpdateStatusManager

use of org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager in project carbondata by apache.

the class CarbonInputFormat method getBlockRowCount.

/**
   * Get the row count of the Block and mapping of segment and Block count.
   * @param job
   * @param absoluteTableIdentifier
   * @return
   * @throws IOException
   * @throws KeyGenException
   */
public BlockMappingVO getBlockRowCount(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier) throws IOException, KeyGenException {
    CacheClient cacheClient = new CacheClient(absoluteTableIdentifier.getStorePath());
    try {
        SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
        SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegments = new SegmentStatusManager(absoluteTableIdentifier).getValidAndInvalidSegments();
        Map<String, Long> blockRowCountMapping = new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
        Map<String, Long> segmentAndBlockCountMapping = new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
        for (String eachValidSeg : validAndInvalidSegments.getValidSegments()) {
            long countOfBlocksInSeg = 0;
            Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> taskAbstractIndexMap = getSegmentAbstractIndexs(job, absoluteTableIdentifier, eachValidSeg, cacheClient, updateStatusManager);
            for (Map.Entry<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> taskMap : taskAbstractIndexMap.entrySet()) {
                AbstractIndex taskAbstractIndex = taskMap.getValue();
                countOfBlocksInSeg += new BlockLevelTraverser().getBlockRowMapping(taskAbstractIndex, blockRowCountMapping, eachValidSeg, updateStatusManager);
            }
            segmentAndBlockCountMapping.put(eachValidSeg, countOfBlocksInSeg);
        }
        return new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping);
    } finally {
        cacheClient.close();
    }
}
Also used : BlockMappingVO(org.apache.carbondata.core.mutate.data.BlockMappingVO) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) BlockLevelTraverser(org.apache.carbondata.hadoop.util.BlockLevelTraverser)

Example 7 with SegmentUpdateStatusManager

use of org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS
   * are used to get table path to read.
   *
   * @return
   * @throws IOException
   */
private List<InputSplit> getSplits(JobContext job, FilterResolverIntf filterResolver, BitSet matchedPartitions, CacheClient cacheClient) throws IOException {
    List<InputSplit> result = new LinkedList<InputSplit>();
    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
    AbsoluteTableIdentifier absoluteTableIdentifier = getCarbonTable(job.getConfiguration()).getAbsoluteTableIdentifier();
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
    //for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : getSegmentsToAccess(job)) {
        List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor, absoluteTableIdentifier, filterResolver, matchedPartitions, segmentNo, cacheClient, updateStatusManager);
        for (DataRefNode dataRefNode : dataRefNodes) {
            BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
            TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
            if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, updateStatusManager.getInvalidTimestampRange(tableBlockInfo.getSegmentId()), updateStatusManager)) {
                continue;
            }
            result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), tableBlockInfo.getLocations(), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion()));
        }
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BlockBTreeLeafNode(org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)

Example 8 with SegmentUpdateStatusManager

use of org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager in project carbondata by apache.

the class CarbonTableReader method getInputSplits2.

public List<CarbonLocalInputSplit> getInputSplits2(CarbonTableCacheModel tableCacheModel, Expression filters) throws Exception {
    // need apply filters to segment
    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
    AbsoluteTableIdentifier absoluteTableIdentifier = tableCacheModel.carbonTable.getAbsoluteTableIdentifier();
    CacheClient cacheClient = new CacheClient(absoluteTableIdentifier.getStorePath());
    List<String> invalidSegments = new ArrayList<>();
    List<UpdateVO> invalidTimestampsList = new ArrayList<>();
    // get all valid segments and set them into the configuration
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
    SegmentStatusManager segmentStatusManager = new SegmentStatusManager(absoluteTableIdentifier);
    SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
    tableCacheModel.segments = segments.getValidSegments().toArray(new String[0]);
    if (segments.getValidSegments().size() == 0) {
        return new ArrayList<>(0);
    }
    // remove entry in the segment index if there are invalid segments
    invalidSegments.addAll(segments.getInvalidSegments());
    for (String invalidSegmentId : invalidSegments) {
        invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
    }
    if (invalidSegments.size() > 0) {
        List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
        for (String segId : invalidSegments) {
            invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segId));
        }
        cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
    }
    // get filter for segment
    CarbonInputFormatUtil.processFilterExpression(filters, tableCacheModel.carbonTable);
    FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filters, tableCacheModel.carbonTable.getAbsoluteTableIdentifier());
    List<CarbonLocalInputSplit> result = new ArrayList<>();
    // for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : tableCacheModel.segments) {
        try {
            List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(filterExpressionProcessor, absoluteTableIdentifier, tableCacheModel.carbonTablePath, filterInterface, segmentNo, cacheClient, updateStatusManager);
            for (DataRefNode dataRefNode : dataRefNodes) {
                BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
                TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
                if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, updateStatusManager.getInvalidTimestampRange(tableBlockInfo.getSegmentId()), updateStatusManager)) {
                    continue;
                }
                result.add(new CarbonLocalInputSplit(segmentNo, tableBlockInfo.getFilePath(), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), Arrays.asList(tableBlockInfo.getLocations()), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion().number()));
            }
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        }
    }
    cacheClient.close();
    return result;
}
Also used : CacheClient(org.apache.carbondata.hadoop.CacheClient) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) KeyGenException(org.apache.carbondata.core.keygenerator.KeyGenException) IndexBuilderException(org.apache.carbondata.core.datastore.exception.IndexBuilderException) IOException(java.io.IOException) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf) BlockBTreeLeafNode(org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)

Example 9 with SegmentUpdateStatusManager

use of org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager in project carbondata by apache.

the class CarbonDataMergerUtil method compactBlockDeleteDeltaFiles.

/**
   * method to compact Delete Delta files in case of IUD Compaction.
   *
   * @param seg
   * @param blockName
   * @param absoluteTableIdentifier
   * @param segmentUpdateDetails
   * @param timestamp
   * @return
   * @throws IOException
   */
public static List<CarbonDataMergerUtilResult> compactBlockDeleteDeltaFiles(String seg, String blockName, AbsoluteTableIdentifier absoluteTableIdentifier, SegmentUpdateDetails[] segmentUpdateDetails, Long timestamp) throws IOException {
    SegmentUpdateStatusManager segmentUpdateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
    List<CarbonDataMergerUtilResult> resultList = new ArrayList<CarbonDataMergerUtilResult>(1);
    // set the update status.
    segmentUpdateStatusManager.setUpdateStatusDetails(segmentUpdateDetails);
    CarbonFile[] deleteDeltaFiles = segmentUpdateStatusManager.getDeleteDeltaFilesList(seg, blockName);
    String destFileName = blockName + "-" + timestamp.toString() + CarbonCommonConstants.DELETE_DELTA_FILE_EXT;
    String fullBlockFilePath = deleteDeltaFiles[0].getParentFile().getCanonicalPath() + CarbonCommonConstants.FILE_SEPARATOR + destFileName;
    List<String> deleteFilePathList = new ArrayList<String>();
    for (CarbonFile cFile : deleteDeltaFiles) {
        deleteFilePathList.add(cFile.getCanonicalPath());
    }
    CarbonDataMergerUtilResult blockDetails = new CarbonDataMergerUtilResult();
    blockDetails.setBlockName(blockName);
    blockDetails.setSegmentName(seg);
    blockDetails.setDeleteDeltaStartTimestamp(timestamp.toString());
    blockDetails.setDeleteDeltaEndTimestamp(timestamp.toString());
    try {
        if (startCompactionDeleteDeltaFiles(deleteFilePathList, blockName, fullBlockFilePath)) {
            blockDetails.setCompactionStatus(true);
        } else {
            blockDetails.setCompactionStatus(false);
        }
        resultList.add(blockDetails);
    } catch (IOException e) {
        LOGGER.error("Compaction of Delete Delta Files failed. The complete file path is " + fullBlockFilePath);
        throw new IOException();
    }
    return resultList;
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 10 with SegmentUpdateStatusManager

use of org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager in project carbondata by apache.

the class CarbonUpdateUtil method updateSegmentStatus.

/**
   * @param updateDetailsList
   * @param table
   * @param updateStatusFileIdentifier
   * @return
   */
public static boolean updateSegmentStatus(List<SegmentUpdateDetails> updateDetailsList, CarbonTable table, String updateStatusFileIdentifier, boolean isCompaction) {
    boolean status = false;
    SegmentUpdateStatusManager segmentUpdateStatusManager = new SegmentUpdateStatusManager(table.getAbsoluteTableIdentifier());
    ICarbonLock updateLock = segmentUpdateStatusManager.getTableUpdateStatusLock();
    boolean lockStatus = false;
    try {
        lockStatus = updateLock.lockWithRetries();
        if (lockStatus) {
            AbsoluteTableIdentifier absoluteTableIdentifier = table.getAbsoluteTableIdentifier();
            CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier());
            // read the existing file if present and update the same.
            SegmentUpdateDetails[] oldDetails = segmentUpdateStatusManager.getUpdateStatusDetails();
            List<SegmentUpdateDetails> oldList = new ArrayList(Arrays.asList(oldDetails));
            for (SegmentUpdateDetails newBlockEntry : updateDetailsList) {
                int index = oldList.indexOf(newBlockEntry);
                if (index != -1) {
                    // update the element in existing list.
                    SegmentUpdateDetails blockDetail = oldList.get(index);
                    if (blockDetail.getDeleteDeltaStartTimestamp().isEmpty() || (isCompaction == true)) {
                        blockDetail.setDeleteDeltaStartTimestamp(newBlockEntry.getDeleteDeltaStartTimestamp());
                    }
                    blockDetail.setDeleteDeltaEndTimestamp(newBlockEntry.getDeleteDeltaEndTimestamp());
                    blockDetail.setStatus(newBlockEntry.getStatus());
                    blockDetail.setDeletedRowsInBlock(newBlockEntry.getDeletedRowsInBlock());
                } else {
                    // add the new details to the list.
                    oldList.add(newBlockEntry);
                }
            }
            segmentUpdateStatusManager.writeLoadDetailsIntoFile(oldList, updateStatusFileIdentifier);
            status = true;
        } else {
            LOGGER.error("Not able to acquire the segment update lock.");
            status = false;
        }
    } catch (IOException e) {
        status = false;
    } finally {
        if (lockStatus) {
            if (updateLock.unlock()) {
                LOGGER.info("Unlock the segment update lock successfull.");
            } else {
                LOGGER.error("Not able to unlock the segment update lock.");
            }
        }
    }
    return status;
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) ICarbonLock(org.apache.carbondata.core.locks.ICarbonLock) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Aggregations

SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)10 IOException (java.io.IOException)5 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)5 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)5 ArrayList (java.util.ArrayList)4 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)4 UpdateVO (org.apache.carbondata.core.mutate.UpdateVO)3 FilterExpressionProcessor (org.apache.carbondata.core.scan.filter.FilterExpressionProcessor)3 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 IndexBuilderException (org.apache.carbondata.core.datastore.exception.IndexBuilderException)2 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 BlockBTreeLeafNode (org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)2 ICarbonLock (org.apache.carbondata.core.locks.ICarbonLock)2 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)2 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)2 InputSplit (org.apache.hadoop.mapreduce.InputSplit)2 ParseException (java.text.ParseException)1 HashMap (java.util.HashMap)1 List (java.util.List)1