Search in sources :

Example 41 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class DataParallelJob method main.

public static void main(String[] args) throws ParseException, IOException {
    // first load the configurations from command line and config files
    Config config = ResourceAllocator.loadConfig(new HashMap<>());
    Options options = new Options();
    options.addOption(Constants.ARGS_WORKERS, true, "Workers");
    options.addOption(Constants.ARGS_SIZE, true, "Size of the file");
    options.addOption(Constants.ARGS_NUMBER_OF_FILES, true, "Number of files");
    options.addOption(Constants.ARGS_SHARED_FILE_SYSTEM, false, "Shared file system");
    options.addOption(Constants.ARGS_PARALLEL, true, "parallelism value");
    options.addOption(Utils.createOption(Constants.ARGS_INPUT_DIRECTORY, true, "Input directory", true));
    options.addOption(Utils.createOption(Constants.ARGS_OUTPUT_DIRECTORY, true, "Output directory", true));
    CommandLineParser commandLineParser = new DefaultParser();
    CommandLine cmd = commandLineParser.parse(options, args);
    int workers = Integer.parseInt(cmd.getOptionValue(Constants.ARGS_WORKERS));
    int size = Integer.parseInt(cmd.getOptionValue(Constants.ARGS_SIZE));
    String fName = cmd.getOptionValue(Constants.ARGS_INPUT_DIRECTORY);
    String outDir = cmd.getOptionValue(Constants.ARGS_OUTPUT_DIRECTORY);
    int numFiles = Integer.parseInt(cmd.getOptionValue(Constants.ARGS_NUMBER_OF_FILES));
    boolean shared = cmd.hasOption(Constants.ARGS_SHARED_FILE_SYSTEM);
    int parallel = Integer.parseInt(cmd.getOptionValue(Constants.ARGS_PARALLEL));
    // we we are a shared file system, lets generate data at the client
    if (shared) {
        DataGenerator.generateData("txt", new Path(fName), numFiles, size, 100);
    }
    // build JobConfig
    JobConfig jobConfig = new JobConfig();
    jobConfig.put(Constants.ARGS_SIZE, Integer.toString(size));
    jobConfig.put(Constants.ARGS_WORKERS, Integer.toString(workers));
    jobConfig.put(Constants.ARGS_INPUT_DIRECTORY, fName);
    jobConfig.put(Constants.ARGS_OUTPUT_DIRECTORY, outDir);
    jobConfig.put(Constants.ARGS_NUMBER_OF_FILES, numFiles);
    jobConfig.put(Constants.ARGS_SHARED_FILE_SYSTEM, shared);
    jobConfig.put(Constants.ARGS_PARALLEL, parallel);
    // build the job
    submitJob(config, workers, jobConfig, DataParallelWorker.class.getName());
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) CommandLineParser(org.apache.commons.cli.CommandLineParser) JobConfig(edu.iu.dsc.tws.api.JobConfig) DefaultParser(org.apache.commons.cli.DefaultParser)

Example 42 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class WeightVectorLoad method prepare.

@Override
public void prepare(TSetContext context) {
    super.prepare(context);
    this.config = context.getConfig();
    this.parallelism = context.getParallelism();
    LOG.info(String.format("%d, %d, %d", this.getTSetContext().getIndex(), this.svmJobParameters.getParallelism(), context.getParallelism()));
    this.dimension = this.binaryBatchModel.getFeatures();
    this.localPoints = new double[this.dimension];
    this.source = new DataSource<double[], InputSplit<double[]>>(config, new LocalFixedInputPartitioner(new Path(this.svmJobParameters.getWeightVectorDataDir()), this.parallelism, config, 1), this.parallelism);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) LocalFixedInputPartitioner(edu.iu.dsc.tws.data.api.formatters.LocalFixedInputPartitioner) InputSplit(edu.iu.dsc.tws.data.fs.io.InputSplit)

Example 43 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class DataParallelWorker method execute.

@Override
public void execute() {
    ComputeGraphBuilder computeGraphBuilder = ComputeGraphBuilder.newBuilder(config);
    String inputDirectory = config.getStringValue(Constants.ARGS_INPUT_DIRECTORY);
    boolean shared = config.getBooleanValue(Constants.ARGS_SHARED_FILE_SYSTEM);
    int numFiles = config.getIntegerValue(Constants.ARGS_NUMBER_OF_FILES, 4);
    int size = config.getIntegerValue(Constants.ARGS_SIZE, 1000);
    int parallel = config.getIntegerValue(Constants.ARGS_PARALLEL, 2);
    if (!shared && workerId == 0) {
        try {
            DataGenerator.generateData("txt", new Path(inputDirectory), numFiles, size, 10);
        } catch (IOException e) {
            throw new RuntimeException("Failed to create data: " + inputDirectory);
        }
    }
    DataParallelTask task = new DataParallelTask();
    computeGraphBuilder.addSource("map", task, parallel);
    computeGraphBuilder.setMode(OperationMode.BATCH);
    ComputeGraph computeGraph = computeGraphBuilder.build();
    ExecutionPlan plan = taskExecutor.plan(computeGraph);
    taskExecutor.execute(computeGraph, plan);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) ExecutionPlan(edu.iu.dsc.tws.api.compute.executor.ExecutionPlan) ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) ComputeGraphBuilder(edu.iu.dsc.tws.task.impl.ComputeGraphBuilder) IOException(java.io.IOException)

Example 44 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class LMDBTest method testPrimitives.

public boolean testPrimitives() {
    boolean allPassed = true;
    Path dataPath = new Path("/home/pulasthi/work/twister2/lmdbdatabase");
    MemoryManager memoryManager = new LMDBMemoryManager(dataPath);
    int opID = 1;
    memoryManager.addOperation(opID, DataMessageType.INTEGER);
    // Test single integer operation
    ByteBuffer key = ByteBuffer.allocateDirect(4);
    ByteBuffer value = ByteBuffer.allocateDirect(4);
    key.putInt(1);
    int testInt = 1231212121;
    byte[] val = Ints.toByteArray(testInt);
    value.put(val);
    memoryManager.put(0, key, value);
    ByteBuffer results = memoryManager.get(opID, key);
    int res = results.getInt();
    if (res == testInt) {
        System.out.println("true");
    } else {
        allPassed = false;
        System.out.println("false");
    }
    return true;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) LMDBMemoryManager(edu.iu.dsc.tws.data.memory.lmdb.LMDBMemoryManager) LMDBMemoryManager(edu.iu.dsc.tws.data.memory.lmdb.LMDBMemoryManager) ByteBuffer(java.nio.ByteBuffer)

Example 45 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class DataGeneratorSource method generateData.

public void generateData() {
    try {
        int numOfFiles = 1;
        int sizeMargin = 100;
        KMeansDataGenerator.generateData("txt", new Path(dataDirectory), numOfFiles, dsize, sizeMargin, dim, config);
        KMeansDataGenerator.generateData("txt", new Path(centroidDirectory), numOfFiles, csize, sizeMargin, dim, config);
    } catch (IOException ioe) {
        throw new Twister2RuntimeException("Failed to create input data:", ioe);
    }
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) IOException(java.io.IOException)

Aggregations

Path (edu.iu.dsc.tws.api.data.Path)61 IOException (java.io.IOException)23 FileSystem (edu.iu.dsc.tws.api.data.FileSystem)19 FileStatus (edu.iu.dsc.tws.api.data.FileStatus)14 ArrayList (java.util.ArrayList)12 Config (edu.iu.dsc.tws.api.config.Config)11 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)8 FileInputSplit (edu.iu.dsc.tws.data.api.splits.FileInputSplit)8 ExecutionRuntime (edu.iu.dsc.tws.executor.core.ExecutionRuntime)8 BlockLocation (edu.iu.dsc.tws.api.data.BlockLocation)7 FSDataOutputStream (edu.iu.dsc.tws.api.data.FSDataOutputStream)7 PrintWriter (java.io.PrintWriter)7 File (java.io.File)6 LocalTextInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalTextInputPartitioner)5 Test (org.junit.Test)5 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)4 LocalCSVInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalCSVInputPartitioner)4 LocalFixedInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalFixedInputPartitioner)4 DataGenerator (edu.iu.dsc.tws.tsched.utils.DataGenerator)4 CSVInputSplit (edu.iu.dsc.tws.data.api.splits.CSVInputSplit)3