use of edu.iu.dsc.tws.data.api.splits.BinaryInputSplit in project twister2 by DSC-SPIDAL.
the class BinaryInputPartitioner method createInputSplits.
/**
* Computes the input splits for the file. By default, one file block is one split. If more
* splits are requested than blocks are available, then a split may be a fraction of a block and
* splits may cross block boundaries.
*
* @param minNumSplits The minimum desired number of file splits.
* @return The computed file splits.
*/
@Override
public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException {
if (minNumSplits < 1) {
throw new IllegalArgumentException("Number of input splits has to be at least 1.");
}
int curminNumSplits = Math.max(minNumSplits, this.numSplits);
final Path path = this.filePath;
final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(curminNumSplits);
List<FileStatus> files = new ArrayList<FileStatus>();
long totalLength = 0;
// path.getFileSystem();
final FileSystem fs = FileSystemUtils.get(path);
final FileStatus pathFile = fs.getFileStatus(path);
if (pathFile.isDir()) {
totalLength += sumFilesInDir(path, files, true);
} else {
files.add(pathFile);
totalLength += pathFile.getLen();
}
if (totalLength % this.recordLength != 0) {
throw new IllegalStateException("The Binary file has a incomplete record");
}
long numberOfRecords = totalLength / this.recordLength;
long minRecordsForSplit = Math.floorDiv(numberOfRecords, minNumSplits);
long oddRecords = numberOfRecords % minNumSplits;
// Generate the splits
int splitNum = 0;
for (final FileStatus file : files) {
final long len = file.getLen();
final long blockSize = file.getBlockSize();
final long minSplitSize = minRecordsForSplit * this.recordLength;
long currentSplitSize = minSplitSize;
long halfSplit = currentSplitSize >>> 1;
if (oddRecords > 0) {
currentSplitSize = currentSplitSize + this.recordLength;
oddRecords--;
}
if (len > 0) {
// get the block locations and make sure they are in order with respect to their offset
final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
Arrays.sort(blocks);
long bytesUnassigned = len;
long position = 0;
int blockIndex = 0;
while (bytesUnassigned >= currentSplitSize) {
// get the block containing the majority of the data
blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
// create a new split
FileInputSplit fis = new BinaryInputSplit(splitNum++, file.getPath(), position, currentSplitSize, blocks[blockIndex].getHosts());
inputSplits.add(fis);
// adjust the positions
position += currentSplitSize;
bytesUnassigned -= currentSplitSize;
}
} else {
throw new IllegalStateException("The binary file " + file.getPath() + " is Empty");
}
}
return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
Aggregations