use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.
the class CarbonDataMergerUtil method identifySegmentsToBeMergedBasedOnSize.
/**
* Identify the segments to be merged based on the Size in case of Major compaction.
*
* @param compactionSize compaction size in MB format
* @param listOfSegmentsAfterPreserve the segments list after
* preserving the configured number of latest loads
* @param carbonLoadModel carbon load model
* @param tablePath the store location of the segment
* @return the list of segments that need to be merged
* based on the Size in case of Major compaction
*/
private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnSize(long compactionSize, List<LoadMetadataDetails> listOfSegmentsAfterPreserve, CarbonLoadModel carbonLoadModel, String tablePath) throws IOException {
List<LoadMetadataDetails> segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
// total length
long totalLength = 0;
// check size of each segment , sum it up across partitions
for (LoadMetadataDetails segment : listOfSegmentsAfterPreserve) {
// compaction should skip streaming segments
if (segment.getSegmentStatus() == SegmentStatus.STREAMING || segment.getSegmentStatus() == SegmentStatus.STREAMING_FINISH) {
continue;
}
String segId = segment.getLoadName();
// variable to store one segment size across partition.
long sizeOfOneSegmentAcrossPartition;
if (segment.getSegmentFile() != null) {
sizeOfOneSegmentAcrossPartition = CarbonUtil.getSizeOfSegment(carbonTable.getTablePath(), new Segment(segId, segment.getSegmentFile()));
} else {
sizeOfOneSegmentAcrossPartition = getSizeOfSegment(carbonTable.getTablePath(), segId);
}
// if size of a segment is greater than the Major compaction size. then ignore it.
if (sizeOfOneSegmentAcrossPartition > (compactionSize * 1024 * 1024)) {
// if already 2 segments have been found for merging then stop scan here and merge.
if (segmentsToBeMerged.size() > 1) {
break;
} else {
// if only one segment is found then remove the earlier one in list.
// reset the total length to 0.
segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
totalLength = 0;
continue;
}
}
totalLength += sizeOfOneSegmentAcrossPartition;
// in case of major compaction the size doesnt matter. all the segments will be merged.
if (totalLength < (compactionSize * 1024 * 1024)) {
segmentsToBeMerged.add(segment);
} else {
// if already 2 segments have been found for merging then stop scan here and merge.
if (segmentsToBeMerged.size() > 1) {
break;
} else {
// if only one segment is found then remove the earlier one in list and put this.
// reset the total length to the current identified segment.
segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
segmentsToBeMerged.add(segment);
totalLength = sizeOfOneSegmentAcrossPartition;
}
}
}
return segmentsToBeMerged;
}
use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.
the class CarbonDataMergerUtil method getSegListIUDCompactionQualified.
/**
* method gets the segments list which get qualified for IUD compaction.
* @param segments
* @param absoluteTableIdentifier
* @param compactionTypeIUD
* @return
*/
public static List<String> getSegListIUDCompactionQualified(List<Segment> segments, AbsoluteTableIdentifier absoluteTableIdentifier, SegmentUpdateStatusManager segmentUpdateStatusManager, CompactionType compactionTypeIUD) {
List<String> validSegments = new ArrayList<>();
if (CompactionType.IUD_DELETE_DELTA == compactionTypeIUD) {
int numberDeleteDeltaFilesThreshold = CarbonProperties.getInstance().getNoDeleteDeltaFilesThresholdForIUDCompaction();
List<Segment> deleteSegments = new ArrayList<>();
for (Segment seg : segments) {
if (checkDeleteDeltaFilesInSeg(seg, segmentUpdateStatusManager, numberDeleteDeltaFilesThreshold)) {
deleteSegments.add(seg);
}
}
if (deleteSegments.size() > 0) {
// in case of Delete Horizontal Compaction.
for (Segment segName : deleteSegments) {
List<String> tempSegments = getDeleteDeltaFilesInSeg(segName, segmentUpdateStatusManager, numberDeleteDeltaFilesThreshold);
validSegments.addAll(tempSegments);
}
}
} else if (CompactionType.IUD_UPDDEL_DELTA == compactionTypeIUD) {
int numberUpdateDeltaFilesThreshold = CarbonProperties.getInstance().getNoUpdateDeltaFilesThresholdForIUDCompaction();
for (Segment seg : segments) {
if (checkUpdateDeltaFilesInSeg(seg, absoluteTableIdentifier, segmentUpdateStatusManager, numberUpdateDeltaFilesThreshold)) {
validSegments.add(seg.getSegmentNo());
}
}
}
return validSegments;
}
use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.
the class SegmentFileStore method commitDropPartitions.
/**
* Update the table status file with the dropped partitions information
*
* @param carbonTable
* @param uniqueId
* @param toBeUpdatedSegments
* @param toBeDeleteSegments
* @throws IOException
*/
public static void commitDropPartitions(CarbonTable carbonTable, String uniqueId, List<String> toBeUpdatedSegments, List<String> toBeDeleteSegments) throws IOException {
if (toBeDeleteSegments.size() > 0 || toBeUpdatedSegments.size() > 0) {
Set<Segment> segmentSet = new HashSet<>(new SegmentStatusManager(carbonTable.getAbsoluteTableIdentifier()).getValidAndInvalidSegments().getValidSegments());
CarbonUpdateUtil.updateTableMetadataStatus(segmentSet, carbonTable, uniqueId, true, Segment.toSegmentList(toBeDeleteSegments), Segment.toSegmentList(toBeUpdatedSegments));
}
}
use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.
the class CarbonOutputCommitter method commitJob.
/**
* Update the tablestatus as success after job is success
*
* @param context
* @throws IOException
*/
@Override
public void commitJob(JobContext context) throws IOException {
try {
super.commitJob(context);
} catch (IOException e) {
// ignore, in case of concurrent load it try to remove temporary folders by other load may
// cause file not found exception. This will not impact carbon load,
LOGGER.warn(e.getMessage());
}
boolean overwriteSet = CarbonTableOutputFormat.isOverwriteSet(context.getConfiguration());
CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
LoadMetadataDetails newMetaEntry = loadModel.getCurrentLoadMetadataDetail();
String readPath = CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()) + CarbonCommonConstants.FILE_SEPARATOR + loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp() + ".tmp";
// Merge all partition files into a single file.
String segmentFileName = SegmentFileStore.genSegmentFileName(loadModel.getSegmentId(), String.valueOf(loadModel.getFactTimeStamp()));
SegmentFileStore.SegmentFile segmentFile = SegmentFileStore.mergeSegmentFiles(readPath, segmentFileName, CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()));
if (segmentFile != null) {
// Move all files from temp directory of each segment to partition directory
SegmentFileStore.moveFromTempFolder(segmentFile, loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp() + ".tmp", loadModel.getTablePath());
newMetaEntry.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
}
CarbonLoaderUtil.populateNewLoadMetaEntry(newMetaEntry, SegmentStatus.SUCCESS, loadModel.getFactTimeStamp(), true);
CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
long segmentSize = CarbonLoaderUtil.addDataIndexSizeIntoMetaEntry(newMetaEntry, loadModel.getSegmentId(), carbonTable);
if (segmentSize > 0 || overwriteSet) {
Object operationContext = getOperationContext();
if (operationContext != null) {
((OperationContext) operationContext).setProperty("current.segmentfile", newMetaEntry.getSegmentFile());
LoadEvents.LoadTablePreStatusUpdateEvent event = new LoadEvents.LoadTablePreStatusUpdateEvent(carbonTable.getCarbonTableIdentifier(), loadModel);
try {
OperationListenerBus.getInstance().fireEvent(event, (OperationContext) operationContext);
} catch (Exception e) {
throw new IOException(e);
}
}
String uniqueId = null;
if (overwriteSet) {
if (segmentSize == 0) {
newMetaEntry.setSegmentStatus(SegmentStatus.MARKED_FOR_DELETE);
}
uniqueId = overwritePartitions(loadModel, newMetaEntry);
} else {
CarbonLoaderUtil.recordNewLoadMetadata(newMetaEntry, loadModel, false, false);
}
DataMapStatusManager.disableDataMapsOfTable(carbonTable);
if (operationContext != null) {
LoadEvents.LoadTablePostStatusUpdateEvent postStatusUpdateEvent = new LoadEvents.LoadTablePostStatusUpdateEvent(loadModel);
try {
OperationListenerBus.getInstance().fireEvent(postStatusUpdateEvent, (OperationContext) operationContext);
} catch (Exception e) {
throw new IOException(e);
}
}
String updateTime = context.getConfiguration().get(CarbonTableOutputFormat.UPADTE_TIMESTAMP, null);
String segmentsToBeDeleted = context.getConfiguration().get(CarbonTableOutputFormat.SEGMENTS_TO_BE_DELETED, "");
List<Segment> segmentDeleteList = Segment.toSegmentList(segmentsToBeDeleted.split(","));
Set<Segment> segmentSet = new HashSet<>(new SegmentStatusManager(carbonTable.getAbsoluteTableIdentifier()).getValidAndInvalidSegments().getValidSegments());
if (updateTime != null) {
CarbonUpdateUtil.updateTableMetadataStatus(segmentSet, carbonTable, updateTime, true, segmentDeleteList);
} else if (uniqueId != null) {
CarbonUpdateUtil.updateTableMetadataStatus(segmentSet, carbonTable, uniqueId, true, segmentDeleteList);
}
} else {
CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
}
if (segmentLock != null) {
segmentLock.unlock();
}
}
use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.
the class CarbonTableInputFormat method getBlockRowCount.
/**
* Get the row count of the Block and mapping of segment and Block count.
*/
public BlockMappingVO getBlockRowCount(Job job, CarbonTable table, List<PartitionSpec> partitions) throws IOException {
AbsoluteTableIdentifier identifier = table.getAbsoluteTableIdentifier();
TableDataMap blockletMap = DataMapStoreManager.getInstance().getDefaultDataMap(table);
LoadMetadataDetails[] loadMetadataDetails = SegmentStatusManager.readTableStatusFile(CarbonTablePath.getTableStatusFilePath(identifier.getTablePath()));
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table, loadMetadataDetails);
SegmentStatusManager.ValidAndInvalidSegmentsInfo allSegments = new SegmentStatusManager(identifier).getValidAndInvalidSegments(loadMetadataDetails);
Map<String, Long> blockRowCountMapping = new HashMap<>();
Map<String, Long> segmentAndBlockCountMapping = new HashMap<>();
// TODO: currently only batch segment is supported, add support for streaming table
List<Segment> filteredSegment = getFilteredSegment(job, allSegments.getValidSegments(), false);
List<ExtendedBlocklet> blocklets = blockletMap.prune(filteredSegment, null, partitions);
for (ExtendedBlocklet blocklet : blocklets) {
String blockName = blocklet.getPath();
blockName = CarbonTablePath.getCarbonDataFileName(blockName);
blockName = blockName + CarbonTablePath.getCarbonDataExtension();
long rowCount = blocklet.getDetailInfo().getRowCount();
String key = CarbonUpdateUtil.getSegmentBlockNameKey(blocklet.getSegmentId(), blockName);
// if block is invalid then dont add the count
SegmentUpdateDetails details = updateStatusManager.getDetailsForABlock(key);
if (null == details || !CarbonUpdateUtil.isBlockInvalid(details.getSegmentStatus())) {
Long blockCount = blockRowCountMapping.get(key);
if (blockCount == null) {
blockCount = 0L;
Long count = segmentAndBlockCountMapping.get(blocklet.getSegmentId());
if (count == null) {
count = 0L;
}
segmentAndBlockCountMapping.put(blocklet.getSegmentId(), count + 1);
}
blockCount += rowCount;
blockRowCountMapping.put(key, blockCount);
}
}
return new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping);
}
Aggregations