use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class DataGenerator method generateCSV.
/**
* Generate a random csv file, we generate a csv with 10 attributes
*
* @param directory the path of the directory
*/
private static void generateCSV(Path directory, int numOfFiles, int sizeOfFile, int sizeMargin) throws IOException {
FileSystem fs = FileSystemUtils.get(directory.toUri());
Random random = new Random(System.currentTimeMillis());
for (int i = 0; i < numOfFiles; i++) {
FSDataOutputStream outputStream = fs.create(new Path(directory, generateRandom(10) + ".csv"));
PrintWriter pw = new PrintWriter(outputStream);
for (int j = 0; j < sizeOfFile + random.nextInt(sizeMargin); j++) {
String row = generateCSVLine(10);
pw.println(row);
}
pw.close();
}
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class DataGenerator method generateText.
private static void generateText(Path directory, int numOfFiles, int sizeOfFile, int sizeMargin) throws IOException {
FileSystem fs = FileSystemUtils.get(directory.toUri());
Random random = new Random(System.currentTimeMillis());
for (int i = 0; i < numOfFiles; i++) {
FSDataOutputStream outputStream = fs.create(new Path(directory, generateRandom(10) + ".txt"));
PrintWriter pw = new PrintWriter(outputStream);
for (int j = 0; j < sizeOfFile + random.nextInt(sizeMargin); j++) {
String row = generateRandom(20 + random.nextInt(10));
pw.println(row);
}
pw.close();
}
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class KMeansDataGeneratorTest method testUniqueSchedules3.
/**
* Commented the hdfs data generation testing for the travis build
*/
/* @Test
public void testUniqueSchedules2() throws IOException {
Config config = getConfig();
String hostname = String.valueOf(config.get("twister2.data.hdfs.namenode"));
String dinputDirectory = "hdfs://" + hostname + ":9000/tmp/testdinput";
int numFiles = 1;
int dsize = 20;
int dimension = 2;
int parallelismValue = 2;
KMeansDataGenerator.generateData("txt", new Path(dinputDirectory),
numFiles, dsize, 100, dimension, config);
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
computeGraphBuilder.setTaskGraphName("kmeans");
DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
DataObjectSink sinkTask = new DataObjectSink();
computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
ComputeConnection computeConnection1 = computeGraphBuilder.addSink("sink", sinkTask,
parallelismValue);
computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
computeGraphBuilder.setMode(OperationMode.BATCH);
LocalCompleteTextInputPartitioner localCompleteTextInputPartitioner
= new LocalCompleteTextInputPartitioner(
new Path(dinputDirectory), parallelismValue, config);
DataSource<String, ?> source
= new DataSource<>(config, localCompleteTextInputPartitioner, parallelismValue);
InputSplit<String> inputSplit;
for (int i = 0; i < parallelismValue; i++) {
inputSplit = source.getNextSplit(i);
Assert.assertNotNull(inputSplit);
}
}*/
@Test
public void testUniqueSchedules3() throws IOException {
Config config = getConfig();
String cinputDirectory = "/tmp/testcinput";
int numFiles = 1;
int csize = 4;
int dimension = 2;
int parallelismValue = 2;
KMeansDataGenerator.generateData("txt", new Path(cinputDirectory), numFiles, csize, 100, dimension, config);
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
computeGraphBuilder.setTaskGraphName("kmeans");
DataFileReplicatedReadSource task = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, cinputDirectory);
computeGraphBuilder.addSource("map", task, parallelismValue);
computeGraphBuilder.setMode(OperationMode.BATCH);
Path path = new Path(cinputDirectory);
final FileSystem fs = FileSystemUtils.get(path);
final FileStatus pathFile = fs.getFileStatus(path);
Assert.assertNotNull(pathFile);
DataFileReader fileReader = new DataFileReader(config, "local");
double[][] centroids = fileReader.readData(path, dimension, csize);
Assert.assertNotNull(centroids);
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class KMeansDataGeneratorTest method testUniqueSchedules1.
@Test
public void testUniqueSchedules1() throws IOException {
Config config = getConfig();
String dinputDirectory = "/tmp/testdinput";
int numFiles = 1;
int dsize = 20;
int dimension = 2;
int parallelismValue = 2;
KMeansDataGenerator.generateData("txt", new Path(dinputDirectory), numFiles, dsize, 100, dimension, config);
ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
computeGraphBuilder.setTaskGraphName("kmeans");
DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
DataObjectSink sinkTask = new DataObjectSink();
computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
ComputeConnection computeConnection1 = computeGraphBuilder.addCompute("sink", sinkTask, parallelismValue);
computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
computeGraphBuilder.setMode(OperationMode.BATCH);
LocalTextInputPartitioner localTextInputPartitioner = new LocalTextInputPartitioner(new Path(dinputDirectory), parallelismValue, config);
DataSource<String, ?> source = new DataSource<>(config, localTextInputPartitioner, parallelismValue);
InputSplit<String> inputSplit;
for (int i = 0; i < parallelismValue; i++) {
inputSplit = source.getNextSplit(i);
Assert.assertNotNull(inputSplit);
}
}
use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.
the class DataGenerator method generate.
public void generate(Path directory, int size, int dimension) {
try {
FileSystem fs = FileSystemUtils.get(directory.toUri(), config);
if (fs.exists(directory)) {
fs.delete(directory, true);
}
FSDataOutputStream outputStream = fs.create(new Path(directory, generateRandom(10) + ".txt"));
PrintWriter pw = new PrintWriter(outputStream);
String points = generatePoints(size, dimension, 100);
pw.print(points);
outputStream.sync();
pw.close();
} catch (IOException e) {
throw new RuntimeException("Data Generation Error Occured", e);
}
}
Aggregations