use of edu.iu.dsc.tws.api.data.FileStatus in project twister2 by DSC-SPIDAL.
the class HadoopFileSystem method listFiles.
/**
* List the statuses of the files/directories in the given path if the path is
* a directory.
*
* @param f given path
* @return the statuses of the files/directories in the given patch
*/
@Override
public FileStatus[] listFiles(Path f) throws IOException {
RemoteIterator<LocatedFileStatus> listFiles = this.hadoopFileSystem.listFiles(toHadoopPath(f), true);
List<FileStatus> statusList = new ArrayList<>();
while (listFiles.hasNext()) {
LocatedFileStatus next = listFiles.next();
FileStatus status = new HadoopFileStatus(next);
statusList.add(status);
}
return statusList.toArray(new FileStatus[0]);
}
use of edu.iu.dsc.tws.api.data.FileStatus in project twister2 by DSC-SPIDAL.
the class KMeansDataGeneratorTest method testUniqueSchedules3.
/**
* Commented the hdfs data generation testing for the travis build
*/
/* @Test
public void testUniqueSchedules2() throws IOException {
Config config = getConfig();
String hostname = String.valueOf(config.get("twister2.data.hdfs.namenode"));
String dinputDirectory = "hdfs://" + hostname + ":9000/tmp/testdinput";
int numFiles = 1;
int dsize = 20;
int dimension = 2;
int parallelismValue = 2;
KMeansDataGenerator.generateData("txt", new Path(dinputDirectory),
numFiles, dsize, 100, dimension, config);
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
computeGraphBuilder.setTaskGraphName("kmeans");
DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
DataObjectSink sinkTask = new DataObjectSink();
computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
ComputeConnection computeConnection1 = computeGraphBuilder.addSink("sink", sinkTask,
parallelismValue);
computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
computeGraphBuilder.setMode(OperationMode.BATCH);
LocalCompleteTextInputPartitioner localCompleteTextInputPartitioner
= new LocalCompleteTextInputPartitioner(
new Path(dinputDirectory), parallelismValue, config);
DataSource<String, ?> source
= new DataSource<>(config, localCompleteTextInputPartitioner, parallelismValue);
InputSplit<String> inputSplit;
for (int i = 0; i < parallelismValue; i++) {
inputSplit = source.getNextSplit(i);
Assert.assertNotNull(inputSplit);
}
}*/
@Test
public void testUniqueSchedules3() throws IOException {
Config config = getConfig();
String cinputDirectory = "/tmp/testcinput";
int numFiles = 1;
int csize = 4;
int dimension = 2;
int parallelismValue = 2;
KMeansDataGenerator.generateData("txt", new Path(cinputDirectory), numFiles, csize, 100, dimension, config);
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
computeGraphBuilder.setTaskGraphName("kmeans");
DataFileReplicatedReadSource task = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, cinputDirectory);
computeGraphBuilder.addSource("map", task, parallelismValue);
computeGraphBuilder.setMode(OperationMode.BATCH);
Path path = new Path(cinputDirectory);
final FileSystem fs = FileSystemUtils.get(path);
final FileStatus pathFile = fs.getFileStatus(path);
Assert.assertNotNull(pathFile);
DataFileReader fileReader = new DataFileReader(config, "local");
double[][] centroids = fileReader.readData(path, dimension, csize);
Assert.assertNotNull(centroids);
}
use of edu.iu.dsc.tws.api.data.FileStatus in project twister2 by DSC-SPIDAL.
the class DataNodeLocatorUtils method findDataNodesLocation.
/**
* This method receives the input file name of a vertex and find the location of the datanodes
* in the HDFS and returns the data node list.
*
* @return datanodes list
*/
public List<String> findDataNodesLocation(String inputFileName) {
List<String> dataNodes = new ArrayList<>();
FileSystem fileSystem;
try {
Path path = new Path(inputFileName);
fileSystem = FileSystemUtils.get(path.toUri(), config);
if (config.get(DataObjectConstants.FILE_SYSTEM).equals(DataContext.TWISTER2_HDFS_FILESYSTEM)) {
FileStatus fileStatus = fileSystem.getFileStatus(path);
if (!fileStatus.getPath().isNullOrEmpty()) {
dataNodes = getDataNodes();
}
} else if (config.get(DataObjectConstants.FILE_SYSTEM).equals(DataContext.TWISTER2_LOCAL_FILESYSTEM)) {
FileStatus fileStatus = fileSystem.getFileStatus(path);
if (!fileStatus.getPath().isNullOrEmpty()) {
String datanodeName = InetAddress.getLocalHost().getHostName();
dataNodes.add(datanodeName);
}
}
} catch (IOException ioe) {
throw new RuntimeException("IOException Occured");
}
return dataNodes;
}
use of edu.iu.dsc.tws.api.data.FileStatus in project twister2 by DSC-SPIDAL.
the class CSVInputPartitioner method sumFilesInDir.
long sumFilesInDir(Path path, List<FileStatus> files, boolean logExcludedFiles) throws IOException {
final FileSystem fs = FileSystemUtils.get(path);
long length = 0;
for (FileStatus file : fs.listFiles(path)) {
if (file.isDir()) {
if (acceptFile(file) && enumerateNestedFiles) {
length += sumFilesInDir(file.getPath(), files, logExcludedFiles);
} else {
if (logExcludedFiles) {
LOG.log(Level.INFO, "Directory " + file.getPath().toString() + " did not pass the " + "file-filter and is excluded.");
}
}
} else {
if (acceptFile(file)) {
files.add(file);
length += file.getLen();
} else {
if (logExcludedFiles) {
LOG.log(Level.INFO, "Directory " + file.getPath().toString() + " did not pass the file-filter and is excluded.");
}
}
}
}
return length;
}
use of edu.iu.dsc.tws.api.data.FileStatus in project twister2 by DSC-SPIDAL.
the class CompleteCSVInputPartitioner method createInputSplits.
/**
* It creates the split for the complete file.
*
* @param minNumSplits Number of minimal input splits, as a hint.
*/
@Override
public FileInputSplit<OT>[] createInputSplits(int minNumSplits) throws IOException {
if (minNumSplits < 1) {
throw new IllegalArgumentException("Number of input splits has to be at least 1.");
}
int curminNumSplits = Math.max(minNumSplits, this.numSplits);
final Path path = this.filePath;
final List<FileInputSplit> inputSplits = new ArrayList<>(curminNumSplits);
List<FileStatus> files = new ArrayList<>();
long totalLength = 0;
final FileSystem fs = FileSystemUtils.get(path, config);
final FileStatus pathFile = fs.getFileStatus(path);
if (pathFile.isDir()) {
totalLength += sumFilesInDir(path, files, true);
} else {
files.add(pathFile);
totalLength += pathFile.getLen();
}
final long maxSplitSize = totalLength;
// Generate the splits
int splitNum = 0;
for (final FileStatus file : files) {
final long len = file.getLen();
final long blockSize = file.getBlockSize();
final long localminSplitSize;
if (this.minSplitSize <= blockSize) {
localminSplitSize = this.minSplitSize;
} else {
LOG.log(Level.WARNING, "Minimal split size of " + this.minSplitSize + " is larger than the block size of " + blockSize + ". Decreasing minimal split size to block size.");
localminSplitSize = blockSize;
}
final long splitSize = Math.max(localminSplitSize, Math.min(maxSplitSize, blockSize));
if (len > 0) {
final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
Arrays.sort(blocks);
long position = 0;
int blockIndex = 0;
for (int i = 0; i < curminNumSplits; i++) {
blockIndex = getBlockIndexForPosition(blocks, position, splitSize, blockIndex);
final FileInputSplit fis = new CSVInputSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex].getHosts());
inputSplits.add(fis);
}
} else {
// special case with a file of zero bytes size
final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, 0);
String[] hosts;
if (blocks.length > 0) {
hosts = blocks[0].getHosts();
} else {
hosts = new String[0];
}
for (int i = 0; i < curminNumSplits; i++) {
final FileInputSplit fis = new CSVInputSplit(splitNum++, file.getPath(), 0, 0, hosts);
inputSplits.add(fis);
}
}
}
return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
Aggregations