Search in sources :

Example 11 with FileSystem

use of edu.iu.dsc.tws.api.data.FileSystem in project twister2 by DSC-SPIDAL.

the class DataGenerator method generateCSV.

/**
 * Generate a random csv file, we generate a csv with 10 attributes
 *
 * @param directory the path of the directory
 */
private static void generateCSV(Path directory, int numOfFiles, int sizeOfFile, int sizeMargin) throws IOException {
    FileSystem fs = FileSystemUtils.get(directory.toUri());
    Random random = new Random(System.currentTimeMillis());
    for (int i = 0; i < numOfFiles; i++) {
        FSDataOutputStream outputStream = fs.create(new Path(directory, generateRandom(10) + ".csv"));
        PrintWriter pw = new PrintWriter(outputStream);
        for (int j = 0; j < sizeOfFile + random.nextInt(sizeMargin); j++) {
            String row = generateCSVLine(10);
            pw.println(row);
        }
        pw.close();
    }
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) Random(java.util.Random) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) FSDataOutputStream(edu.iu.dsc.tws.api.data.FSDataOutputStream) PrintWriter(java.io.PrintWriter)

Example 12 with FileSystem

use of edu.iu.dsc.tws.api.data.FileSystem in project twister2 by DSC-SPIDAL.

the class DataGenerator method generateText.

private static void generateText(Path directory, int numOfFiles, int sizeOfFile, int sizeMargin) throws IOException {
    FileSystem fs = FileSystemUtils.get(directory.toUri());
    Random random = new Random(System.currentTimeMillis());
    for (int i = 0; i < numOfFiles; i++) {
        FSDataOutputStream outputStream = fs.create(new Path(directory, generateRandom(10) + ".txt"));
        PrintWriter pw = new PrintWriter(outputStream);
        for (int j = 0; j < sizeOfFile + random.nextInt(sizeMargin); j++) {
            String row = generateRandom(20 + random.nextInt(10));
            pw.println(row);
        }
        pw.close();
    }
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) Random(java.util.Random) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) FSDataOutputStream(edu.iu.dsc.tws.api.data.FSDataOutputStream) PrintWriter(java.io.PrintWriter)

Example 13 with FileSystem

use of edu.iu.dsc.tws.api.data.FileSystem in project twister2 by DSC-SPIDAL.

the class KMeansDataGeneratorTest method testUniqueSchedules3.

/**
 * Commented the hdfs data generation testing for the travis build
 */
/* @Test
  public void testUniqueSchedules2() throws IOException {
    Config config = getConfig();

    String hostname = String.valueOf(config.get("twister2.data.hdfs.namenode"));
    String dinputDirectory = "hdfs://" + hostname + ":9000/tmp/testdinput";

    int numFiles = 1;
    int dsize = 20;
    int dimension = 2;
    int parallelismValue = 2;

    KMeansDataGenerator.generateData("txt", new Path(dinputDirectory),
        numFiles, dsize, 100, dimension, config);
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    computeGraphBuilder.setTaskGraphName("kmeans");
    DataObjectSource sourceTask = new DataObjectSource("direct", dinputDirectory);
    DataObjectSink sinkTask = new DataObjectSink();
    computeGraphBuilder.addSource("source", sourceTask, parallelismValue);
    ComputeConnection computeConnection1 = computeGraphBuilder.addSink("sink", sinkTask,
        parallelismValue);
    computeConnection1.direct("source").viaEdge("direct").withDataType(MessageTypes.OBJECT);
    computeGraphBuilder.setMode(OperationMode.BATCH);

    LocalCompleteTextInputPartitioner localCompleteTextInputPartitioner
        = new LocalCompleteTextInputPartitioner(
        new Path(dinputDirectory), parallelismValue, config);

    DataSource<String, ?> source
        = new DataSource<>(config, localCompleteTextInputPartitioner, parallelismValue);
    InputSplit<String> inputSplit;
    for (int i = 0; i < parallelismValue; i++) {
      inputSplit = source.getNextSplit(i);
      Assert.assertNotNull(inputSplit);
    }
  }*/
@Test
public void testUniqueSchedules3() throws IOException {
    Config config = getConfig();
    String cinputDirectory = "/tmp/testcinput";
    int numFiles = 1;
    int csize = 4;
    int dimension = 2;
    int parallelismValue = 2;
    KMeansDataGenerator.generateData("txt", new Path(cinputDirectory), numFiles, csize, 100, dimension, config);
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    computeGraphBuilder.setTaskGraphName("kmeans");
    DataFileReplicatedReadSource task = new DataFileReplicatedReadSource(Context.TWISTER2_DIRECT_EDGE, cinputDirectory);
    computeGraphBuilder.addSource("map", task, parallelismValue);
    computeGraphBuilder.setMode(OperationMode.BATCH);
    Path path = new Path(cinputDirectory);
    final FileSystem fs = FileSystemUtils.get(path);
    final FileStatus pathFile = fs.getFileStatus(path);
    Assert.assertNotNull(pathFile);
    DataFileReader fileReader = new DataFileReader(config, "local");
    double[][] centroids = fileReader.readData(path, dimension, csize);
    Assert.assertNotNull(centroids);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) DataFileReader(edu.iu.dsc.tws.data.utils.DataFileReader) DataFileReplicatedReadSource(edu.iu.dsc.tws.task.dataobjects.DataFileReplicatedReadSource) Config(edu.iu.dsc.tws.api.config.Config) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) Test(org.junit.Test)

Example 14 with FileSystem

use of edu.iu.dsc.tws.api.data.FileSystem in project twister2 by DSC-SPIDAL.

the class DataGenerator method generate.

public void generate(Path directory, int size, int dimension) {
    try {
        FileSystem fs = FileSystemUtils.get(directory.toUri(), config);
        if (fs.exists(directory)) {
            fs.delete(directory, true);
        }
        FSDataOutputStream outputStream = fs.create(new Path(directory, generateRandom(10) + ".txt"));
        PrintWriter pw = new PrintWriter(outputStream);
        String points = generatePoints(size, dimension, 100);
        pw.print(points);
        outputStream.sync();
        pw.close();
    } catch (IOException e) {
        throw new RuntimeException("Data Generation Error Occured", e);
    }
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) FSDataOutputStream(edu.iu.dsc.tws.api.data.FSDataOutputStream) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter)

Example 15 with FileSystem

use of edu.iu.dsc.tws.api.data.FileSystem in project twister2 by DSC-SPIDAL.

the class DataNodeLocatorUtils method findDataNodesLocation.

/**
 * This method receives the input file name of a vertex and find the location of the datanodes
 * in the HDFS and returns the data node list.
 *
 * @return datanodes list
 */
public List<String> findDataNodesLocation(String inputFileName) {
    List<String> dataNodes = new ArrayList<>();
    FileSystem fileSystem;
    try {
        Path path = new Path(inputFileName);
        fileSystem = FileSystemUtils.get(path.toUri(), config);
        if (config.get(DataObjectConstants.FILE_SYSTEM).equals(DataContext.TWISTER2_HDFS_FILESYSTEM)) {
            FileStatus fileStatus = fileSystem.getFileStatus(path);
            if (!fileStatus.getPath().isNullOrEmpty()) {
                dataNodes = getDataNodes();
            }
        } else if (config.get(DataObjectConstants.FILE_SYSTEM).equals(DataContext.TWISTER2_LOCAL_FILESYSTEM)) {
            FileStatus fileStatus = fileSystem.getFileStatus(path);
            if (!fileStatus.getPath().isNullOrEmpty()) {
                String datanodeName = InetAddress.getLocalHost().getHostName();
                dataNodes.add(datanodeName);
            }
        }
    } catch (IOException ioe) {
        throw new RuntimeException("IOException Occured");
    }
    return dataNodes;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Aggregations

FileSystem (edu.iu.dsc.tws.api.data.FileSystem)26 FileStatus (edu.iu.dsc.tws.api.data.FileStatus)19 Path (edu.iu.dsc.tws.api.data.Path)18 ArrayList (java.util.ArrayList)11 IOException (java.io.IOException)10 BlockLocation (edu.iu.dsc.tws.api.data.BlockLocation)7 FileInputSplit (edu.iu.dsc.tws.data.api.splits.FileInputSplit)7 FSDataOutputStream (edu.iu.dsc.tws.api.data.FSDataOutputStream)4 PrintWriter (java.io.PrintWriter)4 CSVInputSplit (edu.iu.dsc.tws.data.api.splits.CSVInputSplit)3 LocalFileSystem (edu.iu.dsc.tws.data.fs.local.LocalFileSystem)3 HadoopFileSystem (edu.iu.dsc.tws.data.hdfs.HadoopFileSystem)3 File (java.io.File)2 URI (java.net.URI)2 URISyntaxException (java.net.URISyntaxException)2 Random (java.util.Random)2 TaskSchedulerException (edu.iu.dsc.tws.api.compute.exceptions.TaskSchedulerException)1 Config (edu.iu.dsc.tws.api.config.Config)1 BinaryInputSplit (edu.iu.dsc.tws.data.api.splits.BinaryInputSplit)1 DataFileReader (edu.iu.dsc.tws.data.utils.DataFileReader)1