use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class DataNodeLocatorUtils method findDataNodesLocation.
/**
* This method receives the input file name of a vertex and find the location of the datanodes
* in the HDFS and returns the data node list.
*
* @return datanodes list
*/
public List<String> findDataNodesLocation(String inputFileName) {
List<String> dataNodes = new ArrayList<>();
FileSystem fileSystem;
try {
Path path = new Path(inputFileName);
fileSystem = FileSystemUtils.get(path.toUri(), config);
if (config.get(DataObjectConstants.FILE_SYSTEM).equals(DataContext.TWISTER2_HDFS_FILESYSTEM)) {
FileStatus fileStatus = fileSystem.getFileStatus(path);
if (!fileStatus.getPath().isNullOrEmpty()) {
dataNodes = getDataNodes();
}
} else if (config.get(DataObjectConstants.FILE_SYSTEM).equals(DataContext.TWISTER2_LOCAL_FILESYSTEM)) {
FileStatus fileStatus = fileSystem.getFileStatus(path);
if (!fileStatus.getPath().isNullOrEmpty()) {
String datanodeName = InetAddress.getLocalHost().getHostName();
dataNodes.add(datanodeName);
}
}
} catch (IOException ioe) {
throw new RuntimeException("IOException Occured");
}
return dataNodes;
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class FileSystemUtils method getFileSystem.
/**
* For hadoop file system
*/
public static FileSystem getFileSystem(URI uri, Config config) throws IOException {
FileSystem fs = null;
URI asked = uri;
URI curUri = uri;
if (curUri == null) {
throw new IOException("The URI " + curUri.toString() + " is not a vaild URI");
}
// TODO: check if the sycn is actually needed or can be scoped down
synchronized (SYNCHRONIZATION_OBJECT) {
if (curUri.getScheme() == null) {
try {
if (defaultScheme == null) {
defaultScheme = new URI(DataConstants.DEFAULT_FILESYSTEM_SCHEME);
}
curUri = new URI(defaultScheme.getScheme(), null, defaultScheme.getHost(), defaultScheme.getPort(), curUri.getPath(), null, null);
} catch (URISyntaxException e) {
try {
if (defaultScheme.getScheme().equals("file")) {
curUri = new URI("file", null, new Path(new File(curUri.getPath()).getAbsolutePath()).toUri().getPath(), null);
}
} catch (URISyntaxException ex) {
// we tried to repair it, but could not. report the scheme error
throw new IOException("The URI '" + curUri.toString() + "' is not valid.");
}
}
}
if (curUri.getScheme() == null) {
throw new IOException("The URI '" + curUri + "' is invalid.\n" + "The fs.default-scheme = " + defaultScheme + ", the requested URI = " + asked + ", and the final URI = " + curUri + ".");
}
if (curUri.getScheme().equals("file") && curUri.getAuthority() != null && !curUri.getAuthority().isEmpty()) {
String supposedUri = "file:///" + curUri.getAuthority() + curUri.getPath();
throw new IOException("Found local file path with authority '" + curUri.getAuthority() + "' in path '" + curUri.toString() + "'. Hint: Did you forget a slash? (correct path would be '" + supposedUri + "')");
}
// TODO : need to add cache that can save FileSystem Objects and return from cache if available
if (!isSupportedScheme(curUri.getScheme())) {
// TODO: handle when the system is not supported
} else {
String fsClass = SUPPORTEDFS.get(curUri.getScheme());
if (DataContext.TWISTER2_HDFS_FILESYSTEM.equals(curUri.getScheme())) {
try {
fs = instantiateFileSystem(fsClass, config);
} catch (NoSuchMethodException e) {
throw new RuntimeException("No such method to invoke", e);
} catch (InvocationTargetException e) {
throw new RuntimeException("Invocation exception occured", e);
}
fs.initialize(curUri);
} else {
fs = instantiateFileSystem(fsClass);
fs.initialize(curUri);
}
}
}
return fs;
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class FileSystemUtils method getFileSystem.
/**
* Returns a unsafe filesystem for the given uri
*/
public static FileSystem getFileSystem(URI uri) throws IOException {
FileSystem fs = null;
URI asked = uri;
URI curUri = uri;
if (curUri == null) {
throw new IOException("The URI " + curUri.toString() + " is not a vaild URI");
}
// TODO: check if the sycn is actually needed or can be scoped down
synchronized (SYNCHRONIZATION_OBJECT) {
if (curUri.getScheme() == null) {
try {
if (defaultScheme == null) {
defaultScheme = new URI(DataConstants.DEFAULT_FILESYSTEM_SCHEME);
}
curUri = new URI(defaultScheme.getScheme(), null, defaultScheme.getHost(), defaultScheme.getPort(), curUri.getPath(), null, null);
} catch (URISyntaxException e) {
try {
if (defaultScheme.getScheme().equals("file")) {
curUri = new URI("file", null, new Path(new File(curUri.getPath()).getAbsolutePath()).toUri().getPath(), null);
}
} catch (URISyntaxException ex) {
// we tried to repair it, but could not. report the scheme error
throw new IOException("The URI '" + curUri.toString() + "' is not valid.");
}
}
}
if (curUri.getScheme() == null) {
throw new IOException("The URI '" + curUri + "' is invalid.\n" + "The fs.default-scheme = " + defaultScheme + ", the requested URI = " + asked + ", and the final URI = " + curUri + ".");
}
if (curUri.getScheme().equals("file") && curUri.getAuthority() != null && !curUri.getAuthority().isEmpty()) {
String supposedUri = "file:///" + curUri.getAuthority() + curUri.getPath();
throw new IOException("Found local file path with authority '" + curUri.getAuthority() + "' in path '" + curUri.toString() + "'. Hint: Did you forget a slash? (correct path would be '" + supposedUri + "')");
}
// TODO : need to add cache that can save FileSystem Objects and return from cache if available
if (!isSupportedScheme(curUri.getScheme())) {
// TODO: handle when the system is not supported
} else {
String fsClass = SUPPORTEDFS.get(curUri.getScheme());
fs = instantiateFileSystem(fsClass);
fs.initialize(curUri);
}
}
return fs;
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class CompleteCSVInputPartitioner method createInputSplits.
/**
* It creates the split for the complete file.
*
* @param minNumSplits Number of minimal input splits, as a hint.
*/
@Override
public FileInputSplit<OT>[] createInputSplits(int minNumSplits) throws IOException {
if (minNumSplits < 1) {
throw new IllegalArgumentException("Number of input splits has to be at least 1.");
}
int curminNumSplits = Math.max(minNumSplits, this.numSplits);
final Path path = this.filePath;
final List<FileInputSplit> inputSplits = new ArrayList<>(curminNumSplits);
List<FileStatus> files = new ArrayList<>();
long totalLength = 0;
final FileSystem fs = FileSystemUtils.get(path, config);
final FileStatus pathFile = fs.getFileStatus(path);
if (pathFile.isDir()) {
totalLength += sumFilesInDir(path, files, true);
} else {
files.add(pathFile);
totalLength += pathFile.getLen();
}
final long maxSplitSize = totalLength;
// Generate the splits
int splitNum = 0;
for (final FileStatus file : files) {
final long len = file.getLen();
final long blockSize = file.getBlockSize();
final long localminSplitSize;
if (this.minSplitSize <= blockSize) {
localminSplitSize = this.minSplitSize;
} else {
LOG.log(Level.WARNING, "Minimal split size of " + this.minSplitSize + " is larger than the block size of " + blockSize + ". Decreasing minimal split size to block size.");
localminSplitSize = blockSize;
}
final long splitSize = Math.max(localminSplitSize, Math.min(maxSplitSize, blockSize));
if (len > 0) {
final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
Arrays.sort(blocks);
long position = 0;
int blockIndex = 0;
for (int i = 0; i < curminNumSplits; i++) {
blockIndex = getBlockIndexForPosition(blocks, position, splitSize, blockIndex);
final FileInputSplit fis = new CSVInputSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex].getHosts());
inputSplits.add(fis);
}
} else {
// special case with a file of zero bytes size
final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, 0);
String[] hosts;
if (blocks.length > 0) {
hosts = blocks[0].getHosts();
} else {
hosts = new String[0];
}
for (int i = 0; i < curminNumSplits; i++) {
final FileInputSplit fis = new CSVInputSplit(splitNum++, file.getPath(), 0, 0, hosts);
inputSplits.add(fis);
}
}
}
return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class FileInputPartitioner method createInputSplits.
/**
* Computes the input splits for the file. By default, one file block is one split. If more splits
* are requested than blocks are available, then a split may be a fraction of a
* block and splits may cross block boundaries.
*
* @param minNumSplits The minimum desired number of file splits.
* @return The computed file splits.
*/
@Override
public FileInputSplit<OT>[] createInputSplits(int minNumSplits) throws IOException {
if (minNumSplits < 1) {
throw new IllegalArgumentException("Number of input splits has to be at least 1.");
}
// take the desired number of splits into account
int curminNumSplits = Math.max(minNumSplits, this.numSplits);
final Path path = this.filePath;
final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(curminNumSplits);
// get all the files that are involved in the splits
List<FileStatus> files = new ArrayList<FileStatus>();
long totalLength = 0;
// final FileSystem fs = path.getFileSystem();
final FileSystem fs = FileSystemUtils.get(path, config);
final FileStatus pathFile = fs.getFileStatus(path);
if (pathFile.isDir()) {
totalLength += sumFilesInDir(path, files, true);
} else {
// TODO L3: implement test for unsplittable
// testForUnsplittable(pathFile);
files.add(pathFile);
totalLength += pathFile.getLen();
}
// TODO L3: Handle if unsplittable
// TODO L1: check if we can add the i j method when making splits so that the last split is not
// larger than the other splits
final long maxSplitSize = totalLength / curminNumSplits + (totalLength % curminNumSplits == 0 ? 0 : 1);
// Generate the splits
int splitNum = 0;
for (final FileStatus file : files) {
final long len = file.getLen();
final long blockSize = file.getBlockSize();
final long localminSplitSize;
if (this.minSplitSize <= blockSize) {
localminSplitSize = this.minSplitSize;
} else {
LOG.log(Level.WARNING, "Minimal split size of " + this.minSplitSize + " is larger than the block size of " + blockSize + ". Decreasing minimal split size to block size.");
localminSplitSize = blockSize;
}
final long splitSize = Math.max(localminSplitSize, Math.min(maxSplitSize, blockSize));
final long halfSplit = splitSize >>> 1;
final long maxBytesForLastSplit = (long) (splitSize * MAX_SPLIT_SIZE_DISCREPANCY);
if (len > 0) {
// get the block locations and make sure they are in order with respect to their offset
final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
Arrays.sort(blocks);
long bytesUnassigned = len;
long position = 0;
int blockIndex = 0;
while (bytesUnassigned > maxBytesForLastSplit) {
// get the block containing the majority of the data
blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
// create a new split
FileInputSplit fis = createSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex].getHosts());
inputSplits.add(fis);
// adjust the positions
position += splitSize;
bytesUnassigned -= splitSize;
}
if (bytesUnassigned > 0) {
blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
final FileInputSplit fis = createSplit(splitNum++, file.getPath(), position, bytesUnassigned, blocks[blockIndex].getHosts());
inputSplits.add(fis);
}
} else {
// special case with a file of zero bytes size
final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, 0);
String[] hosts;
if (blocks.length > 0) {
hosts = blocks[0].getHosts();
} else {
hosts = new String[0];
}
final FileInputSplit fis = createSplit(splitNum++, file.getPath(), 0, 0, hosts);
inputSplits.add(fis);
}
}
LOG.fine("input splits value:" + inputSplits.size() + "\t" + Arrays.toString(inputSplits.toArray()));
return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
Aggregations