use of org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope in project carbondata by apache.
the class CarbonFileInputFormat method getSplits.
/**
* get list of block/blocklet and make them to CarbonInputSplit
* @param job JobContext with Configuration
* @return list of CarbonInputSplit
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
// get all valid segments and set them into the configuration
// check for externalTable segment (Segment_null)
// process and resolve the expression
ReadCommittedScope readCommittedScope;
if (carbonTable.isTransactionalTable()) {
readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath() + "/Fact/Part0/Segment_null/", job.getConfiguration());
} else {
readCommittedScope = getReadCommittedScope(job.getConfiguration());
if (readCommittedScope == null) {
readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath(), job.getConfiguration());
} else {
readCommittedScope.setConfiguration(job.getConfiguration());
}
}
// this will be null in case of corrupt schema file.
IndexFilter filter = getFilterPredicates(job.getConfiguration());
// if external table Segments are found, add it to the List
List<Segment> externalTableSegments = new ArrayList<>();
Segment seg;
if (carbonTable.isTransactionalTable()) {
// SDK some cases write into the Segment Path instead of Table Path i.e. inside
// the "Fact/Part0/Segment_null". The segment in this case is named as "null".
// The table is denoted by default as a transactional table and goes through
// the path of CarbonFileInputFormat. The above scenario is handled in the below code.
seg = new Segment("null", null, readCommittedScope);
externalTableSegments.add(seg);
} else {
LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();
for (LoadMetadataDetails load : loadMetadataDetails) {
seg = new Segment(load.getLoadName(), null, readCommittedScope);
if (fileLists != null) {
for (Object fileList : fileLists) {
String timestamp = CarbonTablePath.DataFileUtil.getTimeStampFromFileName(fileList.toString());
if (timestamp.equals(seg.getSegmentNo())) {
externalTableSegments.add(seg);
break;
}
}
} else {
externalTableSegments.add(seg);
}
}
}
List<InputSplit> splits = new ArrayList<>();
boolean useBlockIndex = job.getConfiguration().getBoolean("filter_blocks", true);
// scenarios
if (filter != null) {
filter.resolve(false);
}
if (useBlockIndex) {
// do block filtering and get split
splits = getSplits(job, filter, externalTableSegments);
} else {
List<CarbonFile> carbonFiles;
if (null != this.fileLists) {
carbonFiles = getAllCarbonDataFiles(this.fileLists);
} else {
carbonFiles = getAllCarbonDataFiles(carbonTable.getTablePath());
}
List<String> allDeleteDeltaFiles = getAllDeleteDeltaFiles(carbonTable.getTablePath());
for (CarbonFile carbonFile : carbonFiles) {
// Segment id is set to null because SDK does not write carbondata files with respect
// to segments. So no specific name is present for this load.
CarbonInputSplit split = new CarbonInputSplit("null", carbonFile.getAbsolutePath(), 0, carbonFile.getLength(), carbonFile.getLocations(), FileFormat.COLUMNAR_V3);
split.setVersion(ColumnarFormatVersion.V3);
BlockletDetailInfo info = new BlockletDetailInfo();
split.setDetailInfo(info);
info.setBlockSize(carbonFile.getLength());
info.setVersionNumber(split.getVersion().number());
info.setUseMinMaxForPruning(false);
if (CollectionUtils.isNotEmpty(allDeleteDeltaFiles)) {
split.setDeleteDeltaFiles(getDeleteDeltaFiles(carbonFile.getAbsolutePath(), allDeleteDeltaFiles));
}
splits.add(split);
}
splits.sort(Comparator.comparing(o -> ((CarbonInputSplit) o).getFilePath()));
}
setAllColumnProjectionIfNotConfigured(job, carbonTable);
return splits;
}
use of org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope in project carbondata by apache.
the class IndexUtil method executeClearIndexJob.
/**
* This method gets the indexJob and call execute , this job will be launched before clearing
* indexes from driver side during drop table and drop index and clears the index in executor
* side
* @param carbonTable
* @throws IOException
*/
private static void executeClearIndexJob(IndexJob indexJob, CarbonTable carbonTable, String indexToClear) throws IOException {
IndexInputFormat indexInputFormat;
if (!carbonTable.isTransactionalTable()) {
ReadCommittedScope readCommittedScope = new LatestFilesReadCommittedScope(carbonTable.getTablePath(), FileFactory.getConfiguration());
LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();
List<Segment> listOfValidSegments = new ArrayList<>(loadMetadataDetails.length);
Arrays.stream(loadMetadataDetails).forEach(segment -> {
Segment seg = new Segment(segment.getLoadName(), segment.getSegmentFile());
seg.setLoadMetadataDetails(segment);
listOfValidSegments.add(seg);
});
indexInputFormat = new IndexInputFormat(carbonTable, listOfValidSegments, new ArrayList<>(0), true, indexToClear);
} else {
SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegmentsInfo = getValidAndInvalidSegments(carbonTable, FileFactory.getConfiguration());
List<String> invalidSegment = new ArrayList<>();
validAndInvalidSegmentsInfo.getInvalidSegments().forEach(segment -> invalidSegment.add(segment.getSegmentNo()));
indexInputFormat = new IndexInputFormat(carbonTable, validAndInvalidSegmentsInfo.getValidSegments(), invalidSegment, true, indexToClear);
}
try {
indexJob.execute(indexInputFormat, null);
} catch (Exception e) {
// Consider a scenario where clear index job is called from drop table
// and index server crashes, in this no exception should be thrown and
// drop table should complete.
LOGGER.error("Failed to execute Index clear Job", e);
}
}
use of org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope in project carbondata by apache.
the class CarbonTableInputFormat method getReadCommitted.
public ReadCommittedScope getReadCommitted(JobContext job, AbsoluteTableIdentifier identifier) throws IOException {
if (readCommittedScope == null) {
ReadCommittedScope readCommittedScope;
if (job.getConfiguration().getBoolean(CARBON_TRANSACTIONAL_TABLE, true)) {
readCommittedScope = new TableStatusReadCommittedScope(identifier, job.getConfiguration());
} else {
readCommittedScope = getReadCommittedScope(job.getConfiguration());
if (readCommittedScope == null) {
readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath(), job.getConfiguration());
}
}
this.readCommittedScope = readCommittedScope;
}
return readCommittedScope;
}
Aggregations