use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.
the class CarbonTableInputFormat method getSplitsOfStreaming.
/**
* use file list in .carbonindex file to get the split of streaming.
*/
public List<InputSplit> getSplitsOfStreaming(JobContext job, AbsoluteTableIdentifier identifier, List<Segment> streamSegments) throws IOException {
List<InputSplit> splits = new ArrayList<InputSplit>();
if (streamSegments != null && !streamSegments.isEmpty()) {
numStreamSegments = streamSegments.size();
long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
long maxSize = getMaxSplitSize(job);
for (Segment segment : streamSegments) {
String segmentDir = CarbonTablePath.getSegmentPath(identifier.getTablePath(), segment.getSegmentNo());
FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
if (FileFactory.isFileExist(segmentDir, fileType)) {
String indexName = CarbonTablePath.getCarbonStreamIndexFileName();
String indexPath = segmentDir + File.separator + indexName;
CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
// index file exists
if (index.exists()) {
// data file exists
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
try {
// map block index
indexReader.openThriftReader(indexPath);
while (indexReader.hasNext()) {
BlockIndex blockIndex = indexReader.readBlockIndexInfo();
String filePath = segmentDir + File.separator + blockIndex.getFile_name();
Path path = new Path(filePath);
long length = blockIndex.getFile_size();
if (length != 0) {
BlockLocation[] blkLocations;
FileSystem fs = FileFactory.getFileSystem(path);
FileStatus file = fs.getFileStatus(path);
blkLocations = fs.getFileBlockLocations(path, 0, length);
long blockSize = file.getBlockSize();
long splitSize = computeSplitSize(blockSize, minSize, maxSize);
long bytesRemaining = length;
while (((double) bytesRemaining) / splitSize > 1.1) {
int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
bytesRemaining -= splitSize;
}
if (bytesRemaining != 0) {
int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
}
} else {
// Create empty hosts array for zero length files
splits.add(makeSplit(segment.getSegmentNo(), path, 0, length, new String[0], FileFormat.ROW_V1));
}
}
} finally {
indexReader.closeThriftReader();
}
}
}
}
}
return splits;
}
use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.
the class CarbonInputSplit method setInvalidSegments.
public void setInvalidSegments(List<Segment> invalidSegments) {
List<String> invalidSegmentIds = new ArrayList<>();
for (Segment segment : invalidSegments) {
invalidSegmentIds.add(segment.getSegmentNo());
}
this.invalidSegments = invalidSegmentIds;
}
use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.
the class DataMapWriterListener method register.
/**
* Register a DataMapWriter
*/
private void register(DataMapFactory factory, String segmentId, String dataWritePath) {
assert (factory != null);
assert (segmentId != null);
DataMapMeta meta = factory.getMeta();
if (meta == null) {
// if data map does not have meta, no need to register
return;
}
List<String> columns = factory.getMeta().getIndexedColumns();
List<DataMapWriter> writers = registry.get(columns);
DataMapWriter writer = factory.createWriter(new Segment(segmentId, null), dataWritePath);
if (writers != null) {
writers.add(writer);
} else {
writers = new ArrayList<>();
writers.add(writer);
registry.put(columns, writers);
}
LOG.info("DataMapWriter " + writer + " added");
}
use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.
the class CarbonUtil method validateRangeOfSegmentList.
public static boolean validateRangeOfSegmentList(String segmentId) throws InvalidConfigurationException {
String[] values = segmentId.split(",");
try {
if (values.length == 0) {
throw new InvalidConfigurationException("carbon.input.segments.<database_name>.<table_name> value can't be empty.");
}
for (String value : values) {
if (!value.equalsIgnoreCase("*")) {
Segment segment = Segment.toSegment(value);
Float aFloatValue = Float.parseFloat(segment.getSegmentNo());
if (aFloatValue < 0 || aFloatValue > Float.MAX_VALUE) {
throw new InvalidConfigurationException("carbon.input.segments.<database_name>.<table_name> value range should be greater " + "than 0 and less than " + Float.MAX_VALUE);
}
}
}
} catch (NumberFormatException nfe) {
throw new InvalidConfigurationException("carbon.input.segments.<database_name>.<table_name> value range is not valid");
}
return true;
}
use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.
the class SegmentStatusManager method getValidAndInvalidSegments.
/**
* get valid segment for given load status details.
*/
public ValidAndInvalidSegmentsInfo getValidAndInvalidSegments(LoadMetadataDetails[] loadMetadataDetails) throws IOException {
// @TODO: move reading LoadStatus file to separate class
List<Segment> listOfValidSegments = new ArrayList<>(10);
List<Segment> listOfValidUpdatedSegments = new ArrayList<>(10);
List<Segment> listOfInvalidSegments = new ArrayList<>(10);
List<Segment> listOfStreamSegments = new ArrayList<>(10);
List<Segment> listOfInProgressSegments = new ArrayList<>(10);
try {
if (loadMetadataDetails == null) {
loadMetadataDetails = readTableStatusFile(CarbonTablePath.getTableStatusFilePath(identifier.getTablePath()));
}
// just directly iterate Array
for (LoadMetadataDetails segment : loadMetadataDetails) {
if (SegmentStatus.SUCCESS == segment.getSegmentStatus() || SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus() || SegmentStatus.LOAD_PARTIAL_SUCCESS == segment.getSegmentStatus() || SegmentStatus.STREAMING == segment.getSegmentStatus() || SegmentStatus.STREAMING_FINISH == segment.getSegmentStatus()) {
// check for merged loads.
if (null != segment.getMergedLoadName()) {
Segment seg = new Segment(segment.getMergedLoadName(), segment.getSegmentFile());
if (!listOfValidSegments.contains(seg)) {
listOfValidSegments.add(seg);
}
// if merged load is updated then put it in updated list
if (SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus()) {
listOfValidUpdatedSegments.add(seg);
}
continue;
}
if (SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus()) {
listOfValidUpdatedSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
}
if (SegmentStatus.STREAMING == segment.getSegmentStatus() || SegmentStatus.STREAMING_FINISH == segment.getSegmentStatus()) {
listOfStreamSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
continue;
}
listOfValidSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
} else if ((SegmentStatus.LOAD_FAILURE == segment.getSegmentStatus() || SegmentStatus.COMPACTED == segment.getSegmentStatus() || SegmentStatus.MARKED_FOR_DELETE == segment.getSegmentStatus())) {
listOfInvalidSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
} else if (SegmentStatus.INSERT_IN_PROGRESS == segment.getSegmentStatus() || SegmentStatus.INSERT_OVERWRITE_IN_PROGRESS == segment.getSegmentStatus()) {
listOfInProgressSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
}
}
} catch (IOException e) {
LOG.error(e);
throw e;
}
return new ValidAndInvalidSegmentsInfo(listOfValidSegments, listOfValidUpdatedSegments, listOfInvalidSegments, listOfStreamSegments, listOfInProgressSegments);
}
Aggregations