Search in sources :

Example 31 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class DataGenerator method generateCSV.

/**
 * Generate a random csv file, we generate a csv with 10 attributes
 *
 * @param directory the path of the directory
 */
private static void generateCSV(Path directory, int numOfFiles, int sizeOfFile, int sizeMargin) throws IOException {
    FileSystem fs = FileSystemUtils.get(directory.toUri());
    Random random = new Random(System.currentTimeMillis());
    for (int i = 0; i < numOfFiles; i++) {
        FSDataOutputStream outputStream = fs.create(new Path(directory, generateRandom(10) + ".csv"));
        PrintWriter pw = new PrintWriter(outputStream);
        for (int j = 0; j < sizeOfFile + random.nextInt(sizeMargin); j++) {
            String row = generateCSVLine(10);
            pw.println(row);
        }
        pw.close();
    }
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) Random(java.util.Random) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) FSDataOutputStream(edu.iu.dsc.tws.api.data.FSDataOutputStream) PrintWriter(java.io.PrintWriter)

Example 32 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class DataGenerator method generateText.

private static void generateText(Path directory, int numOfFiles, int sizeOfFile, int sizeMargin) throws IOException {
    FileSystem fs = FileSystemUtils.get(directory.toUri());
    Random random = new Random(System.currentTimeMillis());
    for (int i = 0; i < numOfFiles; i++) {
        FSDataOutputStream outputStream = fs.create(new Path(directory, generateRandom(10) + ".txt"));
        PrintWriter pw = new PrintWriter(outputStream);
        for (int j = 0; j < sizeOfFile + random.nextInt(sizeMargin); j++) {
            String row = generateRandom(20 + random.nextInt(10));
            pw.println(row);
        }
        pw.close();
    }
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) Random(java.util.Random) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) FSDataOutputStream(edu.iu.dsc.tws.api.data.FSDataOutputStream) PrintWriter(java.io.PrintWriter)

Example 33 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class KMeansDataGeneratorTest method testUniqueSchedules3.

/**
 * Commented the hdfs data generation testing for the travis build
 */
/* @Test
  public void testUniqueSchedules2() throws IOException {
    Config config = getConfig();

    String hostname = String.valueOf(config.get("twister2.data.hdfs.namenode"));
    String dinputDirectory = "hdfs://" + hostname + ":9000/tmp/testdinput";

    int numFiles = 1;
    int dsize = 20;
    int dimension = 2;
    int parallelismValue = 2;

    KMeansDataGenerator.generateData("txt", new Path(dinputDirectory),
        numFiles, dsize, 100, dimension, config);
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    computeGraphBuilder.setTaskGraphName("kmeans");
    DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
    DataObjectSink sinkTask = new DataObjectSink();
    computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
    ComputeConnection computeConnection1 = computeGraphBuilder.addSink("sink", sinkTask,
        parallelismValue);
    computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
    computeGraphBuilder.setMode(OperationMode.BATCH);

    LocalCompleteTextInputPartitioner localCompleteTextInputPartitioner
        = new LocalCompleteTextInputPartitioner(
        new Path(dinputDirectory), parallelismValue, config);

    DataSource<String, ?> source
        = new DataSource<>(config, localCompleteTextInputPartitioner, parallelismValue);
    InputSplit<String> inputSplit;
    for (int i = 0; i < parallelismValue; i++) {
      inputSplit = source.getNextSplit(i);
      Assert.assertNotNull(inputSplit);
    }
  }*/
@Test
public void testUniqueSchedules3() throws IOException {
    Config config = getConfig();
    String cinputDirectory = "/tmp/testcinput";
    int numFiles = 1;
    int csize = 4;
    int dimension = 2;
    int parallelismValue = 2;
    KMeansDataGenerator.generateData("txt", new Path(cinputDirectory), numFiles, csize, 100, dimension, config);
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    computeGraphBuilder.setTaskGraphName("kmeans");
    DataFileReplicatedReadSource task = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, cinputDirectory);
    computeGraphBuilder.addSource("map", task, parallelismValue);
    computeGraphBuilder.setMode(OperationMode.BATCH);
    Path path = new Path(cinputDirectory);
    final FileSystem fs = FileSystemUtils.get(path);
    final FileStatus pathFile = fs.getFileStatus(path);
    Assert.assertNotNull(pathFile);
    DataFileReader fileReader = new DataFileReader(config, "local");
    double[][] centroids = fileReader.readData(path, dimension, csize);
    Assert.assertNotNull(centroids);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) DataFileReader(edu.iu.dsc.tws.data.utils.DataFileReader) DataFileReplicatedReadSource(edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource) Config(edu.iu.dsc.tws.api.config.Config) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) Test(org.junit.Test)

Example 34 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class KMeansDataGeneratorTest method testUniqueSchedules1.

@Test
public void testUniqueSchedules1() throws IOException {
    Config config = getConfig();
    String dinputDirectory = "/tmp/testdinput";
    int numFiles = 1;
    int dsize = 20;
    int dimension = 2;
    int parallelismValue = 2;
    KMeansDataGenerator.generateData("txt", new Path(dinputDirectory), numFiles, dsize, 100, dimension, config);
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    computeGraphBuilder.setTaskGraphName("kmeans");
    DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
    DataObjectSink sinkTask = new DataObjectSink();
    computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
    ComputeConnection computeConnection1 = computeGraphBuilder.addCompute("sink", sinkTask, parallelismValue);
    computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
    computeGraphBuilder.setMode(OperationMode.BATCH);
    LocalTextInputPartitioner localTextInputPartitioner = new LocalTextInputPartitioner(new Path(dinputDirectory), parallelismValue, config);
    DataSource<String, ?> source = new DataSource<>(config, localTextInputPartitioner, parallelismValue);
    InputSplit<String> inputSplit;
    for (int i = 0; i < parallelismValue; i++) {
        inputSplit = source.getNextSplit(i);
        Assert.assertNotNull(inputSplit);
    }
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) LocalTextInputPartitioner(edu.iu.dsc.tws.data.api.formatters.LocalTextInputPartitioner) DataObjectSink(edu.iu.dsc.tws.task.dataobjects.DataObjectSink) Config(edu.iu.dsc.tws.api.config.Config) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) DataObjectSource(edu.iu.dsc.tws.task.dataobjects.DataObjectSource) ComputeConnection(edu.iu.dsc.tws.task.impl.ComputeConnection) DataSource(edu.iu.dsc.tws.dataset.DataSource) Test(org.junit.Test)

Example 35 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class DataGenerator method generate.

public void generate(Path directory, int size, int dimension) {
    try {
        FileSystem fs = FileSystemUtils.get(directory.toUri(), config);
        if (fs.exists(directory)) {
            fs.delete(directory, true);
        }
        FSDataOutputStream outputStream = fs.create(new Path(directory, generateRandom(10) + ".txt"));
        PrintWriter pw = new PrintWriter(outputStream);
        String points = generatePoints(size, dimension, 100);
        pw.print(points);
        outputStream.sync();
        pw.close();
    } catch (IOException e) {
        throw new RuntimeException("Data Generation Error Occured", e);
    }
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) FSDataOutputStream(edu.iu.dsc.tws.api.data.FSDataOutputStream) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter)

Aggregations

Path (edu.iu.dsc.tws.api.data.Path)61 IOException (java.io.IOException)23 FileSystem (edu.iu.dsc.tws.api.data.FileSystem)19 FileStatus (edu.iu.dsc.tws.api.data.FileStatus)14 ArrayList (java.util.ArrayList)12 Config (edu.iu.dsc.tws.api.config.Config)11 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)8 FileInputSplit (edu.iu.dsc.tws.data.api.splits.FileInputSplit)8 ExecutionRuntime (edu.iu.dsc.tws.executor.core.ExecutionRuntime)8 BlockLocation (edu.iu.dsc.tws.api.data.BlockLocation)7 FSDataOutputStream (edu.iu.dsc.tws.api.data.FSDataOutputStream)7 PrintWriter (java.io.PrintWriter)7 File (java.io.File)6 LocalTextInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalTextInputPartitioner)5 Test (org.junit.Test)5 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)4 LocalCSVInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalCSVInputPartitioner)4 LocalFixedInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalFixedInputPartitioner)4 DataGenerator (edu.iu.dsc.tws.tsched.utils.DataGenerator)4 CSVInputSplit (edu.iu.dsc.tws.data.api.splits.CSVInputSplit)3