Search in sources :

Example 16 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonDataMergerUtil method identifySegmentsToBeMergedBasedOnSize.

/**
 * Identify the segments to be merged based on the Size in case of Major compaction.
 *
 * @param compactionSize compaction size in MB format
 * @param listOfSegmentsAfterPreserve  the segments list after
 *        preserving the configured number of latest loads
 * @param carbonLoadModel carbon load model
 * @param tablePath the store location of the segment
 * @return the list of segments that need to be merged
 *         based on the Size in case of Major compaction
 */
private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnSize(long compactionSize, List<LoadMetadataDetails> listOfSegmentsAfterPreserve, CarbonLoadModel carbonLoadModel, String tablePath) throws IOException {
    List<LoadMetadataDetails> segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
    // total length
    long totalLength = 0;
    // check size of each segment , sum it up across partitions
    for (LoadMetadataDetails segment : listOfSegmentsAfterPreserve) {
        // compaction should skip streaming segments
        if (segment.getSegmentStatus() == SegmentStatus.STREAMING || segment.getSegmentStatus() == SegmentStatus.STREAMING_FINISH) {
            continue;
        }
        String segId = segment.getLoadName();
        // variable to store one  segment size across partition.
        long sizeOfOneSegmentAcrossPartition;
        if (segment.getSegmentFile() != null) {
            sizeOfOneSegmentAcrossPartition = CarbonUtil.getSizeOfSegment(carbonTable.getTablePath(), new Segment(segId, segment.getSegmentFile()));
        } else {
            sizeOfOneSegmentAcrossPartition = getSizeOfSegment(carbonTable.getTablePath(), segId);
        }
        // if size of a segment is greater than the Major compaction size. then ignore it.
        if (sizeOfOneSegmentAcrossPartition > (compactionSize * 1024 * 1024)) {
            // if already 2 segments have been found for merging then stop scan here and merge.
            if (segmentsToBeMerged.size() > 1) {
                break;
            } else {
                // if only one segment is found then remove the earlier one in list.
                // reset the total length to 0.
                segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
                totalLength = 0;
                continue;
            }
        }
        totalLength += sizeOfOneSegmentAcrossPartition;
        // in case of major compaction the size doesnt matter. all the segments will be merged.
        if (totalLength < (compactionSize * 1024 * 1024)) {
            segmentsToBeMerged.add(segment);
        } else {
            // if already 2 segments have been found for merging then stop scan here and merge.
            if (segmentsToBeMerged.size() > 1) {
                break;
            } else {
                // if only one segment is found then remove the earlier one in list and put this.
                // reset the total length to the current identified segment.
                segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
                segmentsToBeMerged.add(segment);
                totalLength = sizeOfOneSegmentAcrossPartition;
            }
        }
    }
    return segmentsToBeMerged;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) Segment(org.apache.carbondata.core.datamap.Segment)

Example 17 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonDataMergerUtil method getSegListIUDCompactionQualified.

/**
 * method gets the segments list which get qualified for IUD compaction.
 * @param segments
 * @param absoluteTableIdentifier
 * @param compactionTypeIUD
 * @return
 */
public static List<String> getSegListIUDCompactionQualified(List<Segment> segments, AbsoluteTableIdentifier absoluteTableIdentifier, SegmentUpdateStatusManager segmentUpdateStatusManager, CompactionType compactionTypeIUD) {
    List<String> validSegments = new ArrayList<>();
    if (CompactionType.IUD_DELETE_DELTA == compactionTypeIUD) {
        int numberDeleteDeltaFilesThreshold = CarbonProperties.getInstance().getNoDeleteDeltaFilesThresholdForIUDCompaction();
        List<Segment> deleteSegments = new ArrayList<>();
        for (Segment seg : segments) {
            if (checkDeleteDeltaFilesInSeg(seg, segmentUpdateStatusManager, numberDeleteDeltaFilesThreshold)) {
                deleteSegments.add(seg);
            }
        }
        if (deleteSegments.size() > 0) {
            // in case of Delete Horizontal Compaction.
            for (Segment segName : deleteSegments) {
                List<String> tempSegments = getDeleteDeltaFilesInSeg(segName, segmentUpdateStatusManager, numberDeleteDeltaFilesThreshold);
                validSegments.addAll(tempSegments);
            }
        }
    } else if (CompactionType.IUD_UPDDEL_DELTA == compactionTypeIUD) {
        int numberUpdateDeltaFilesThreshold = CarbonProperties.getInstance().getNoUpdateDeltaFilesThresholdForIUDCompaction();
        for (Segment seg : segments) {
            if (checkUpdateDeltaFilesInSeg(seg, absoluteTableIdentifier, segmentUpdateStatusManager, numberUpdateDeltaFilesThreshold)) {
                validSegments.add(seg.getSegmentNo());
            }
        }
    }
    return validSegments;
}
Also used : Segment(org.apache.carbondata.core.datamap.Segment)

Example 18 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class SegmentFileStore method commitDropPartitions.

/**
 * Update the table status file with the dropped partitions information
 *
 * @param carbonTable
 * @param uniqueId
 * @param toBeUpdatedSegments
 * @param toBeDeleteSegments
 * @throws IOException
 */
public static void commitDropPartitions(CarbonTable carbonTable, String uniqueId, List<String> toBeUpdatedSegments, List<String> toBeDeleteSegments) throws IOException {
    if (toBeDeleteSegments.size() > 0 || toBeUpdatedSegments.size() > 0) {
        Set<Segment> segmentSet = new HashSet<>(new SegmentStatusManager(carbonTable.getAbsoluteTableIdentifier()).getValidAndInvalidSegments().getValidSegments());
        CarbonUpdateUtil.updateTableMetadataStatus(segmentSet, carbonTable, uniqueId, true, Segment.toSegmentList(toBeDeleteSegments), Segment.toSegmentList(toBeUpdatedSegments));
    }
}
Also used : SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) Segment(org.apache.carbondata.core.datamap.Segment) HashSet(java.util.HashSet)

Example 19 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonOutputCommitter method commitJob.

/**
 * Update the tablestatus as success after job is success
 *
 * @param context
 * @throws IOException
 */
@Override
public void commitJob(JobContext context) throws IOException {
    try {
        super.commitJob(context);
    } catch (IOException e) {
        // ignore, in case of concurrent load it try to remove temporary folders by other load may
        // cause file not found exception. This will not impact carbon load,
        LOGGER.warn(e.getMessage());
    }
    boolean overwriteSet = CarbonTableOutputFormat.isOverwriteSet(context.getConfiguration());
    CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
    LoadMetadataDetails newMetaEntry = loadModel.getCurrentLoadMetadataDetail();
    String readPath = CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()) + CarbonCommonConstants.FILE_SEPARATOR + loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp() + ".tmp";
    // Merge all partition files into a single file.
    String segmentFileName = SegmentFileStore.genSegmentFileName(loadModel.getSegmentId(), String.valueOf(loadModel.getFactTimeStamp()));
    SegmentFileStore.SegmentFile segmentFile = SegmentFileStore.mergeSegmentFiles(readPath, segmentFileName, CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()));
    if (segmentFile != null) {
        // Move all files from temp directory of each segment to partition directory
        SegmentFileStore.moveFromTempFolder(segmentFile, loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp() + ".tmp", loadModel.getTablePath());
        newMetaEntry.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
    }
    CarbonLoaderUtil.populateNewLoadMetaEntry(newMetaEntry, SegmentStatus.SUCCESS, loadModel.getFactTimeStamp(), true);
    CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
    long segmentSize = CarbonLoaderUtil.addDataIndexSizeIntoMetaEntry(newMetaEntry, loadModel.getSegmentId(), carbonTable);
    if (segmentSize > 0 || overwriteSet) {
        Object operationContext = getOperationContext();
        if (operationContext != null) {
            ((OperationContext) operationContext).setProperty("current.segmentfile", newMetaEntry.getSegmentFile());
            LoadEvents.LoadTablePreStatusUpdateEvent event = new LoadEvents.LoadTablePreStatusUpdateEvent(carbonTable.getCarbonTableIdentifier(), loadModel);
            try {
                OperationListenerBus.getInstance().fireEvent(event, (OperationContext) operationContext);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
        String uniqueId = null;
        if (overwriteSet) {
            if (segmentSize == 0) {
                newMetaEntry.setSegmentStatus(SegmentStatus.MARKED_FOR_DELETE);
            }
            uniqueId = overwritePartitions(loadModel, newMetaEntry);
        } else {
            CarbonLoaderUtil.recordNewLoadMetadata(newMetaEntry, loadModel, false, false);
        }
        DataMapStatusManager.disableDataMapsOfTable(carbonTable);
        if (operationContext != null) {
            LoadEvents.LoadTablePostStatusUpdateEvent postStatusUpdateEvent = new LoadEvents.LoadTablePostStatusUpdateEvent(loadModel);
            try {
                OperationListenerBus.getInstance().fireEvent(postStatusUpdateEvent, (OperationContext) operationContext);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
        String updateTime = context.getConfiguration().get(CarbonTableOutputFormat.UPADTE_TIMESTAMP, null);
        String segmentsToBeDeleted = context.getConfiguration().get(CarbonTableOutputFormat.SEGMENTS_TO_BE_DELETED, "");
        List<Segment> segmentDeleteList = Segment.toSegmentList(segmentsToBeDeleted.split(","));
        Set<Segment> segmentSet = new HashSet<>(new SegmentStatusManager(carbonTable.getAbsoluteTableIdentifier()).getValidAndInvalidSegments().getValidSegments());
        if (updateTime != null) {
            CarbonUpdateUtil.updateTableMetadataStatus(segmentSet, carbonTable, updateTime, true, segmentDeleteList);
        } else if (uniqueId != null) {
            CarbonUpdateUtil.updateTableMetadataStatus(segmentSet, carbonTable, uniqueId, true, segmentDeleteList);
        }
    } else {
        CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
    }
    if (segmentLock != null) {
        segmentLock.unlock();
    }
}
Also used : OperationContext(org.apache.carbondata.events.OperationContext) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) IOException(java.io.IOException) Segment(org.apache.carbondata.core.datamap.Segment) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) LoadEvents(org.apache.carbondata.processing.loading.events.LoadEvents) HashSet(java.util.HashSet)

Example 20 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonTableInputFormat method getBlockRowCount.

/**
 * Get the row count of the Block and mapping of segment and Block count.
 */
public BlockMappingVO getBlockRowCount(Job job, CarbonTable table, List<PartitionSpec> partitions) throws IOException {
    AbsoluteTableIdentifier identifier = table.getAbsoluteTableIdentifier();
    TableDataMap blockletMap = DataMapStoreManager.getInstance().getDefaultDataMap(table);
    LoadMetadataDetails[] loadMetadataDetails = SegmentStatusManager.readTableStatusFile(CarbonTablePath.getTableStatusFilePath(identifier.getTablePath()));
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table, loadMetadataDetails);
    SegmentStatusManager.ValidAndInvalidSegmentsInfo allSegments = new SegmentStatusManager(identifier).getValidAndInvalidSegments(loadMetadataDetails);
    Map<String, Long> blockRowCountMapping = new HashMap<>();
    Map<String, Long> segmentAndBlockCountMapping = new HashMap<>();
    // TODO: currently only batch segment is supported, add support for streaming table
    List<Segment> filteredSegment = getFilteredSegment(job, allSegments.getValidSegments(), false);
    List<ExtendedBlocklet> blocklets = blockletMap.prune(filteredSegment, null, partitions);
    for (ExtendedBlocklet blocklet : blocklets) {
        String blockName = blocklet.getPath();
        blockName = CarbonTablePath.getCarbonDataFileName(blockName);
        blockName = blockName + CarbonTablePath.getCarbonDataExtension();
        long rowCount = blocklet.getDetailInfo().getRowCount();
        String key = CarbonUpdateUtil.getSegmentBlockNameKey(blocklet.getSegmentId(), blockName);
        // if block is invalid then dont add the count
        SegmentUpdateDetails details = updateStatusManager.getDetailsForABlock(key);
        if (null == details || !CarbonUpdateUtil.isBlockInvalid(details.getSegmentStatus())) {
            Long blockCount = blockRowCountMapping.get(key);
            if (blockCount == null) {
                blockCount = 0L;
                Long count = segmentAndBlockCountMapping.get(blocklet.getSegmentId());
                if (count == null) {
                    count = 0L;
                }
                segmentAndBlockCountMapping.put(blocklet.getSegmentId(), count + 1);
            }
            blockCount += rowCount;
            blockRowCountMapping.put(key, blockCount);
        }
    }
    return new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping);
}
Also used : BlockMappingVO(org.apache.carbondata.core.mutate.data.BlockMappingVO) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) HashMap(java.util.HashMap) TableDataMap(org.apache.carbondata.core.datamap.TableDataMap) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) Segment(org.apache.carbondata.core.datamap.Segment) SegmentUpdateDetails(org.apache.carbondata.core.mutate.SegmentUpdateDetails) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet)

Aggregations

Segment (org.apache.carbondata.core.datamap.Segment)23 ArrayList (java.util.ArrayList)10 IOException (java.io.IOException)8 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)8 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)8 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)6 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)6 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)5 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)5 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)4 InputSplit (org.apache.hadoop.mapreduce.InputSplit)4 SegmentFileStore (org.apache.carbondata.core.metadata.SegmentFileStore)3 Expression (org.apache.carbondata.core.scan.expression.Expression)3 SingleTableProvider (org.apache.carbondata.core.scan.filter.SingleTableProvider)3 TableProvider (org.apache.carbondata.core.scan.filter.TableProvider)3 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)3 BitSet (java.util.BitSet)2 HashSet (java.util.HashSet)2 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)2 ICarbonLock (org.apache.carbondata.core.locks.ICarbonLock)2