use of org.apache.carbondata.core.index.Segment in project carbondata by apache.
the class CarbonDataMergerUtil method getSegListIUDCompactionQualified.
/**
* method gets the segments list which get qualified for IUD compaction.
* @param segments
* @return
*/
public static List<String> getSegListIUDCompactionQualified(List<Segment> segments, SegmentUpdateStatusManager segmentUpdateStatusManager) throws IOException {
List<String> validSegments = new ArrayList<>();
int numberDeleteDeltaFilesThreshold = CarbonProperties.getInstance().getNoDeleteDeltaFilesThresholdForIUDCompaction();
for (Segment seg : segments) {
List<String> segmentNoAndBlocks = checkDeleteDeltaFilesInSeg(seg, segmentUpdateStatusManager, numberDeleteDeltaFilesThreshold);
validSegments.addAll(segmentNoAndBlocks);
}
return validSegments;
}
use of org.apache.carbondata.core.index.Segment in project carbondata by apache.
the class TestBlockletIndexFactory method getValidDistributables.
@Test
public void getValidDistributables() throws IOException {
BlockletIndexInputSplit blockletIndexInputSplit = new BlockletIndexInputSplit("/opt/store/default/carbon_table/Fact/Part0/Segment_0/0_batchno0-0-1521012756709.carbonindex");
Segment segment = new Segment("0", null, new TableStatusReadCommittedScope(carbonTable.getAbsoluteTableIdentifier(), new Configuration(false)));
blockletIndexInputSplit.setSegment(segment);
BlockletIndexInputSplit indexInputSplit = new BlockletIndexInputSplit("/opt/store/default/carbon_table/Fact/Part0/Segment_0/0_batchno0-0-1521012756701.carbonindex");
indexInputSplit.setSegment(segment);
List<IndexInputSplit> indexInputSplits = new ArrayList<>(2);
indexInputSplits.add(blockletIndexInputSplit);
indexInputSplits.add(indexInputSplit);
new MockUp<BlockletIndexFactory>() {
@Mock
Set<TableBlockIndexUniqueIdentifier> getTableBlockIndexUniqueIdentifiers(Segment segment) {
TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier1 = new TableBlockIndexUniqueIdentifier("/opt/store/default/carbon_table/Fact/Part0/Segment_0", "0_batchno0-0-1521012756701.carbonindex", null, "0");
Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = new HashSet<>(3);
tableBlockIndexUniqueIdentifiers.add(tableBlockIndexUniqueIdentifier);
tableBlockIndexUniqueIdentifiers.add(tableBlockIndexUniqueIdentifier1);
return tableBlockIndexUniqueIdentifiers;
}
};
List<IndexInputSplit> validDistributables = blockletIndexFactory.getAllUncached(indexInputSplits);
assert 1 == validDistributables.size();
}
use of org.apache.carbondata.core.index.Segment in project carbondata by apache.
the class BlockletIndexInputFormat method createRecordReader.
@Override
public RecordReader<TableBlockIndexUniqueIdentifier, BlockletIndexDetailsWithSchema> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
return new RecordReader<TableBlockIndexUniqueIdentifier, BlockletIndexDetailsWithSchema>() {
private BlockletIndexWrapper wrapper = null;
private TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier = null;
private TableBlockIndexUniqueIdentifierWrapper tableBlockIndexUniqueIdentifierWrapper;
Cache<TableBlockIndexUniqueIdentifierWrapper, BlockletIndexWrapper> cache = CacheProvider.getInstance().createCache(CacheType.DRIVER_BLOCKLET_INDEX);
private Iterator<TableBlockIndexUniqueIdentifier> iterator;
// Cache to avoid multiple times listing of files
private Map<String, Map<String, BlockMetaInfo>> segInfoCache = new HashMap<>();
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
BlockletIndexInputSplit segmentDistributable = (BlockletIndexInputSplit) inputSplit;
TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier = segmentDistributable.getTableBlockIndexUniqueIdentifier();
Segment segment = Segment.toSegment(tableSegmentUniqueIdentifier.getSegmentId(), readCommittedScope);
iterator = BlockletIndexUtil.getTableBlockUniqueIdentifiers(segment).iterator();
}
@Override
public boolean nextKeyValue() {
if (iterator.hasNext()) {
TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier = iterator.next();
this.tableBlockIndexUniqueIdentifier = tableBlockIndexUniqueIdentifier;
TableBlockIndexUniqueIdentifierWrapper tableBlockIndexUniqueIdentifierWrapper = new TableBlockIndexUniqueIdentifierWrapper(tableBlockIndexUniqueIdentifier, table, false, true, true);
this.tableBlockIndexUniqueIdentifierWrapper = tableBlockIndexUniqueIdentifierWrapper;
wrapper = ((BlockletIndexStore) cache).get(tableBlockIndexUniqueIdentifierWrapper, segInfoCache);
return true;
}
return false;
}
@Override
public TableBlockIndexUniqueIdentifier getCurrentKey() {
return tableBlockIndexUniqueIdentifier;
}
@Override
public BlockletIndexDetailsWithSchema getCurrentValue() {
BlockletIndexDetailsWithSchema blockletIndexDetailsWithSchema = new BlockletIndexDetailsWithSchema(wrapper, table.getTableInfo().isSchemaModified());
return blockletIndexDetailsWithSchema;
}
@Override
public float getProgress() {
return 0;
}
@Override
public void close() {
if (null != tableBlockIndexUniqueIdentifierWrapper) {
if (null != wrapper && null != wrapper.getIndexes() && !wrapper.getIndexes().isEmpty()) {
String segmentId = tableBlockIndexUniqueIdentifierWrapper.getTableBlockIndexUniqueIdentifier().getSegmentId();
// as segmentId will be same for all the indexes and segmentProperties cache is
// maintained at segment level so it need to be called only once for clearing
SegmentPropertiesAndSchemaHolder.getInstance().invalidate(segmentId, wrapper.getIndexes().get(0).getSegmentPropertiesWrapper(), tableBlockIndexUniqueIdentifierWrapper.isAddTableBlockToUnsafeAndLRUCache());
}
}
}
};
}
use of org.apache.carbondata.core.index.Segment in project carbondata by apache.
the class SegmentFileStore method writeSegmentFileForOthers.
public static boolean writeSegmentFileForOthers(CarbonTable carbonTable, Segment segment, PartitionSpec partitionSpec, List<FileStatus> partitionDataFiles) throws IOException {
String tablePath = carbonTable.getTablePath();
CarbonFile[] dataFiles = null;
if (partitionDataFiles.isEmpty()) {
CarbonFile segmentFolder = FileFactory.getCarbonFile(segment.getSegmentPath());
dataFiles = segmentFolder.listFiles(file -> (!file.getName().equals("_SUCCESS") && !file.getName().endsWith(".crc")));
} else {
dataFiles = partitionDataFiles.stream().map(fileStatus -> FileFactory.getCarbonFile(fileStatus.getPath().toString())).toArray(CarbonFile[]::new);
}
if (dataFiles != null && dataFiles.length > 0) {
SegmentFile segmentFile = new SegmentFile();
segmentFile.setOptions(segment.getOptions());
FolderDetails folderDetails = new FolderDetails();
folderDetails.setStatus(SegmentStatus.SUCCESS.getMessage());
folderDetails.setRelative(false);
if (!partitionDataFiles.isEmpty()) {
folderDetails.setPartitions(partitionSpec.getPartitions());
segmentFile.addPath(partitionSpec.getLocation().toString(), folderDetails);
} else {
segmentFile.addPath(segment.getSegmentPath(), folderDetails);
}
for (CarbonFile file : dataFiles) {
folderDetails.getFiles().add(file.getName());
}
String segmentFileFolder = CarbonTablePath.getSegmentFilesLocation(tablePath);
CarbonFile carbonFile = FileFactory.getCarbonFile(segmentFileFolder);
if (!carbonFile.exists()) {
carbonFile.mkdirs();
}
// write segment info to new file.
writeSegmentFile(segmentFile, segmentFileFolder + File.separator + segment.getSegmentFileName());
return true;
}
return false;
}
use of org.apache.carbondata.core.index.Segment in project carbondata by apache.
the class SegmentStatusManager method getValidAndInvalidSegments.
/**
* get valid segment for given load status details.
*/
public ValidAndInvalidSegmentsInfo getValidAndInvalidSegments(Boolean isChildTable, LoadMetadataDetails[] loadMetadataDetails, ReadCommittedScope readCommittedScope) throws IOException {
// @TODO: move reading LoadStatus file to separate class
List<Segment> listOfValidSegments = new ArrayList<>(10);
List<Segment> listOfValidUpdatedSegments = new ArrayList<>(10);
List<Segment> listOfInvalidSegments = new ArrayList<>(10);
List<Segment> listOfStreamSegments = new ArrayList<>(10);
List<Segment> listOfInProgressSegments = new ArrayList<>(10);
Map<String, List<String>> mergedLoadMapping = new HashMap<>();
try {
if (loadMetadataDetails == null) {
loadMetadataDetails = readTableStatusFile(CarbonTablePath.getTableStatusFilePath(identifier.getTablePath()));
}
if (readCommittedScope == null) {
readCommittedScope = new TableStatusReadCommittedScope(identifier, loadMetadataDetails, configuration);
}
// just directly iterate Array
for (LoadMetadataDetails segment : loadMetadataDetails) {
if (SegmentStatus.SUCCESS == segment.getSegmentStatus() || SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus() || SegmentStatus.LOAD_PARTIAL_SUCCESS == segment.getSegmentStatus() || SegmentStatus.STREAMING == segment.getSegmentStatus() || SegmentStatus.STREAMING_FINISH == segment.getSegmentStatus()) {
// check for merged loads.
if (null != segment.getMergedLoadName()) {
Segment seg = new Segment(segment.getMergedLoadName(), segment.getSegmentFile(), readCommittedScope, segment);
if (!listOfValidSegments.contains(seg)) {
listOfValidSegments.add(seg);
}
// if merged load is updated then put it in updated list
if (SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus()) {
listOfValidUpdatedSegments.add(seg);
}
continue;
}
if (SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus()) {
listOfValidUpdatedSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile(), readCommittedScope));
}
if (SegmentStatus.STREAMING == segment.getSegmentStatus() || SegmentStatus.STREAMING_FINISH == segment.getSegmentStatus()) {
listOfStreamSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile(), readCommittedScope));
continue;
}
// to validSegment list, as segment does not exists
if (isChildTable) {
if (!segment.getDataSize().equalsIgnoreCase("0") && !segment.getIndexSize().equalsIgnoreCase("0")) {
listOfValidSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile(), readCommittedScope, segment));
}
} else {
listOfValidSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile(), readCommittedScope, segment));
}
} else if ((SegmentStatus.LOAD_FAILURE == segment.getSegmentStatus() || SegmentStatus.COMPACTED == segment.getSegmentStatus() || SegmentStatus.MARKED_FOR_DELETE == segment.getSegmentStatus())) {
listOfInvalidSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
if (SegmentStatus.COMPACTED == segment.getSegmentStatus()) {
// check the main table's merged segment's map. ex: {0.1 -> 0,1,2,3}
if (null != segment.getMergedLoadName()) {
if (mergedLoadMapping.containsKey(segment.getMergedLoadName())) {
mergedLoadMapping.get(segment.getMergedLoadName()).add(segment.getLoadName());
} else {
List<String> mergedLoads = new ArrayList<>();
mergedLoads.add(segment.getLoadName());
mergedLoadMapping.put(segment.getMergedLoadName(), mergedLoads);
}
}
}
} else if (SegmentStatus.INSERT_IN_PROGRESS == segment.getSegmentStatus() || SegmentStatus.INSERT_OVERWRITE_IN_PROGRESS == segment.getSegmentStatus()) {
listOfInProgressSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile(), readCommittedScope));
}
}
} catch (IOException e) {
LOG.error(e.getMessage(), e);
throw e;
}
return new ValidAndInvalidSegmentsInfo(listOfValidSegments, listOfValidUpdatedSegments, listOfInvalidSegments, listOfStreamSegments, listOfInProgressSegments, mergedLoadMapping);
}
Aggregations