use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class TestLocalFileSystem method main.
public static void main(String[] args) {
Config.Builder builder = new Config.Builder();
builder.put("input.file.path", "/home/pulasthi/git/twister2/twister2/data/src/test" + "/resources/TextInputFormatTestFile.text");
Config txtFileConf = builder.build();
Path path = new Path("/home/pulasthi/git/twister2/twister2/data/src/test/resources" + "/TextInputFormatTestFile.text");
InputPartitioner txtInput = new SharedTextInputPartitioner(path);
txtInput.configure(txtFileConf);
int minSplits = 8;
try {
InputSplit[] inputSplits = txtInput.createInputSplits(minSplits);
InputSplitAssigner inputSplitAssigner = txtInput.getInputSplitAssigner(inputSplits);
InputSplit cur = inputSplitAssigner.getNextInputSplit(null, 0);
cur.open();
String line = "";
line = (String) cur.nextRecord(line);
System.out.println(line);
} catch (Exception e) {
e.printStackTrace();
}
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class ArrowTSetSourceExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
Config config = env.getConfig();
String csvInputDirectory = config.getStringValue(DataObjectConstants.DINPUT_DIRECTORY);
String arrowInputDirectory = config.getStringValue(DataObjectConstants.ARROW_DIRECTORY);
String arrowFileName = config.getStringValue(DataObjectConstants.FILE_NAME);
int workers = config.getIntegerValue(DataObjectConstants.WORKERS);
int parallel = config.getIntegerValue(DataObjectConstants.PARALLELISM_VALUE);
int dsize = config.getIntegerValue(DataObjectConstants.DSIZE);
LOG.info("arrow input file:" + arrowFileName + "\t" + arrowInputDirectory + "\t" + csvInputDirectory + "\t" + workers + "\t" + parallel);
Schema schema = makeSchema();
SourceTSet<String[]> csvSource = env.createCSVSource(csvInputDirectory, dsize, parallel, "split");
SinkTSet<Iterator<Integer>> sinkTSet = csvSource.direct().map((MapFunc<String[], Integer>) input -> Integer.parseInt(input[0])).direct().sink(new ArrowBasedSinkFunction<>(arrowInputDirectory, arrowFileName, schema.toJson()));
env.run(sinkTSet);
// Source Function Call
env.createArrowSource(arrowInputDirectory, arrowFileName, parallel, schema.toJson()).direct().compute((ComputeFunc<Iterator<Object>, List<Integer>>) input -> {
List<Integer> integers = new ArrayList<>();
input.forEachRemaining(i -> integers.add((Integer) i));
return integers;
}).direct().forEach(s -> LOG.info("Integer Array Size:" + s.size() + "\tvalues:" + s));
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class ArrowTSetSourceExample method main.
public static void main(String[] args) throws Exception {
LOG.log(Level.INFO, "Starting Twister2 Arrow Job");
// first load the configurations from command line and config files
Config config = ResourceAllocator.loadConfig(new HashMap<>());
Options options = new Options();
options.addOption(Utils.createOption(DataObjectConstants.PARALLELISM_VALUE, true, "Parallelism", true));
options.addOption(Utils.createOption(DataObjectConstants.WORKERS, true, "Workers", true));
options.addOption(Utils.createOption(DataObjectConstants.DSIZE, true, "100", true));
options.addOption(Utils.createOption(DataObjectConstants.DINPUT_DIRECTORY, true, "CSV Input Directory", true));
options.addOption(Utils.createOption(DataObjectConstants.ARROW_DIRECTORY, true, "Arrow Input Directory", true));
options.addOption(Utils.createOption(DataObjectConstants.FILE_NAME, true, "Arrow File Name", true));
CommandLineParser commandLineParser = new DefaultParser();
CommandLine cmd = commandLineParser.parse(options, args);
int parallelism = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.PARALLELISM_VALUE));
int workers = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.WORKERS));
int dsize = Integer.parseInt(cmd.getOptionValue(DataObjectConstants.DSIZE));
String csvInputDirectory = cmd.getOptionValue(DataObjectConstants.DINPUT_DIRECTORY);
String arrowInputDirectory = cmd.getOptionValue(DataObjectConstants.ARROW_DIRECTORY);
String arrowFileName = cmd.getOptionValue(DataObjectConstants.FILE_NAME);
Twister2Job.Twister2JobBuilder jobBuilder = Twister2Job.newBuilder();
JobConfig jobConfig = new JobConfig();
jobConfig.put(DataObjectConstants.PARALLELISM_VALUE, parallelism);
jobConfig.put(DataObjectConstants.WORKERS, workers);
jobConfig.put(DataObjectConstants.DSIZE, dsize);
jobConfig.put(DataObjectConstants.DINPUT_DIRECTORY, csvInputDirectory);
jobConfig.put(DataObjectConstants.ARROW_DIRECTORY, arrowInputDirectory);
jobConfig.put(DataObjectConstants.FILE_NAME, arrowFileName);
jobBuilder.setJobName("Arrow Testing Example");
jobBuilder.setWorkerClass(ArrowTSetSourceExample.class);
jobBuilder.addComputeResource(1, 512, 2, workers);
jobBuilder.setConfig(jobConfig);
// now submit the job
Twister2Submitter.submitJob(jobBuilder.build(), config);
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class TestBinaryFileFormatter method main.
public static void main(String[] args) {
Config.Builder builder = new Config.Builder();
builder.put("input.file.path", "/tmp/2000.bin");
builder.put("RECORD_LENGTH", 1000 * Short.BYTES);
Config txtFileConf = builder.build();
Path path = new Path("/tmp/2000.bin");
InputPartitioner binaryInputPartitioner = new BinaryInputPartitioner(path, 1000 * Short.BYTES);
binaryInputPartitioner.configure(txtFileConf);
int count = 0;
int minSplits = 4;
double expectedSum = 1.6375350724E1;
double newSum = 0.0;
Buffer buffer;
try {
InputSplit[] inputSplits = binaryInputPartitioner.createInputSplits(minSplits);
InputSplitAssigner inputSplitAssigner = binaryInputPartitioner.getInputSplitAssigner(inputSplits);
InputSplit currentSplit;
byte[] line = new byte[2000];
ByteBuffer byteBuffer = ByteBuffer.allocate(2000);
byteBuffer.order(ByteOrder.BIG_ENDIAN);
while ((currentSplit = inputSplitAssigner.getNextInputSplit("localhost", 0)) != null) {
currentSplit.open(txtFileConf);
while (currentSplit.nextRecord(line) != null) {
byteBuffer.clear();
byteBuffer.put(line);
byteBuffer.flip();
buffer = byteBuffer.asShortBuffer();
short[] shortArray = new short[1000];
((ShortBuffer) buffer).get(shortArray);
for (short i : shortArray) {
newSum += i;
count++;
}
}
}
LOG.info("Sum and count values are:" + newSum + "\t" + count);
} catch (Exception e) {
e.printStackTrace();
}
}
use of edu.iu.dsc.tws.api.config.Config in project twister2 by DSC-SPIDAL.
the class HelloWorld method execute.
@Override
public void execute(WorkerEnvironment workerEnvironment) {
int workerID = workerEnvironment.getWorkerId();
Config config = workerEnvironment.getConfig();
// lets retrieve the configuration set in the job config
String helloKeyValue = config.getStringValue("hello-key");
// lets do a log to indicate we are running
LOG.info(String.format("Hello World from Worker %d; there are %d total workers " + "and I got a message: %s", workerID, workerEnvironment.getNumberOfWorkers(), helloKeyValue));
waitSeconds(30);
}
Aggregations