use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.
the class TestDAGRecovery2 method testFailingCommitter.
@Test(timeout = 120000)
public void testFailingCommitter() throws Exception {
DAG dag = SimpleVTestDAG.createDAG("FailingCommitterDAG", null);
OutputDescriptor od = OutputDescriptor.create(MultiAttemptDAG.NoOpOutput.class.getName());
od.setUserPayload(UserPayload.create(ByteBuffer.wrap(new MultiAttemptDAG.FailingOutputCommitter.FailingOutputCommitterConfig(true).toUserPayload())));
OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MultiAttemptDAG.FailingOutputCommitter.class.getName());
dag.getVertex("v3").addDataSink("FailingOutput", DataSinkDescriptor.create(od, ocd, null));
runDAGAndVerify(dag, State.FAILED);
}
use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.
the class FilterLinesByWord method run.
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Credentials credentials = new Credentials();
boolean generateSplitsInClient = false;
SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
try {
generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
otherArgs = splitCmdLineParser.getRemainingArgs();
} catch (ParseException e1) {
System.err.println("Invalid options");
printUsage();
return 2;
}
if (otherArgs.length != 3) {
printUsage();
return 2;
}
String inputPath = otherArgs[0];
String outputPath = otherArgs[1];
String filterWord = otherArgs[2];
FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(outputPath))) {
System.err.println("Output directory : " + outputPath + " already exists");
return 2;
}
TezConfiguration tezConf = new TezConfiguration(conf);
fs.getWorkingDirectory();
Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class);
if (jarPath == null) {
throw new TezUncheckedException("Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath");
}
Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf);
Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime());
commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, credentials);
// Why do I need to start the TezSession.
tezSession.start();
Configuration stage1Conf = new JobConf(conf);
stage1Conf.set(FILTER_PARAM_NAME, filterWord);
Configuration stage2Conf = new JobConf(conf);
stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
stage2Conf.setBoolean("mapred.mapper.new-api", false);
UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
// Setup stage1 Vertex
Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor.create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)).addTaskLocalFiles(commonLocalResources);
DataSourceDescriptor dsd;
if (generateSplitsInClient) {
// TODO TEZ-1406. Dont' use MRInputLegacy
stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
stage1Conf.setBoolean("mapred.mapper.new-api", false);
dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
} else {
dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false).build();
}
stage1Vertex.addDataSource("MRInput", dsd);
// Setup stage2 Vertex
Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), 1);
stage2Vertex.addTaskLocalFiles(commonLocalResources);
// Configure the Output for stage2
OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf));
OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName());
stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null));
UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf).build();
DAG dag = DAG.create("FilterLinesByWord");
Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty());
dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);
LOG.info("Submitting DAG to Tez Session");
DAGClient dagClient = tezSession.submitDAG(dag);
LOG.info("Submitted DAG to Tez Session");
DAGStatus dagStatus = null;
String[] vNames = { "stage1", "stage2" };
try {
while (true) {
dagStatus = dagClient.getDAGStatus(null);
if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
break;
}
try {
Thread.sleep(500);
} catch (InterruptedException e) {
// continue;
}
}
while (dagStatus.getState() == DAGStatus.State.RUNNING) {
try {
ExampleDriver.printDAGStatus(dagClient, vNames);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// continue;
}
dagStatus = dagClient.getDAGStatus(null);
} catch (TezException e) {
LOG.error("Failed to get application progress. Exiting");
return -1;
}
}
dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
} finally {
fs.delete(stagingDir, true);
tezSession.stop();
}
ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.
the class TestWeightedScalingMemoryDistributor method testWeightedScalingNonConcurrent.
@Test(timeout = 5000)
public void testWeightedScalingNonConcurrent() throws TezException {
Configuration conf = new Configuration(this.conf);
conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_INPUT_OUTPUT_CONCURRENT, false);
conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_NON_CONCURRENT_INPUTS_ENABLED, true);
conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION, 0.2);
conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 1, 2, 3, 1, 1));
System.err.println(Joiner.on(",").join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));
MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
dist.setJvmMemory(10000l);
// First request - ScatterGatherShuffleInput
MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
InputContext e1InputContext1 = createTestInputContext();
InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
// Second request - BroadcastInput
MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
InputContext e2InputContext2 = createTestInputContext();
InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
// Third request - randomOutput (simulates MROutput)
MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
OutputContext e3OutputContext1 = createTestOutputContext();
OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
// Fourth request - OnFileSortedOutput
MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
OutputContext e4OutputContext2 = createTestOutputContext();
OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
// Fifth request - Processor
MemoryUpdateCallbackForTest e5Callback = new MemoryUpdateCallbackForTest();
ProcessorContext e5ProcContext = createTestProcessortContext();
ProcessorDescriptor e5ProcDesc = createTestProcessorDescriptor();
dist.requestMemory(10000, e5Callback, e5ProcContext, e5ProcDesc);
dist.makeInitialAllocations();
// Total available: 80% of 10K = 8000
// 5 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2), 10K(1)
// Overlap input and output memory
assertEquals(5250, e1Callback.assigned);
assertEquals(1750, e2Callback.assigned);
assertEquals(2333, e3Callback.assigned);
assertEquals(4666, e4Callback.assigned);
assertEquals(1000, e5Callback.assigned);
}
use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.
the class TestWeightedScalingMemoryDistributor method testAdditionalReserveFractionWeightedScalingNonConcurrent.
@Test(timeout = 5000)
public void testAdditionalReserveFractionWeightedScalingNonConcurrent() throws TezException {
Configuration conf = new Configuration(this.conf);
conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_INPUT_OUTPUT_CONCURRENT, false);
conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_NON_CONCURRENT_INPUTS_ENABLED, true);
conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 2, 3, 6, 1, 1));
conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_PER_IO, 0.025d);
conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_MAX, 0.2d);
MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
dist.setJvmMemory(10000l);
// First request - ScatterGatherShuffleInput [weight 6]
MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
InputContext e1InputContext1 = createTestInputContext();
InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
// Second request - BroadcastInput [weight 2]
MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
InputContext e2InputContext2 = createTestInputContext();
InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
// Third request - randomOutput (simulates MROutput) [weight 1]
MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
OutputContext e3OutputContext1 = createTestOutputContext();
OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
// Fourth request - OnFileSortedOutput [weight 3]
MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
OutputContext e4OutputContext2 = createTestOutputContext();
OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
dist.makeInitialAllocations();
// Total available: 60% of 10K = 6000
// 4 requests (weight) - 10K (6), 10K(2), 10K(1), 10K(3)
// Overlap input and output memory
assertEquals(4500, e1Callback.assigned);
assertEquals(1500, e2Callback.assigned);
assertEquals(1500, e3Callback.assigned);
assertEquals(4500, e4Callback.assigned);
}
use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.
the class TestWeightedScalingMemoryDistributor method testWeightedScalingNonConcurrentInputsDisabled.
@Test(timeout = 5000)
public void testWeightedScalingNonConcurrentInputsDisabled() throws TezException {
Configuration conf = new Configuration(this.conf);
conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_INPUT_OUTPUT_CONCURRENT, false);
conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_NON_CONCURRENT_INPUTS_ENABLED, false);
conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION, 0.2);
conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 1, 2, 3, 1, 1));
System.err.println(Joiner.on(",").join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));
MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
dist.setJvmMemory(10000l);
// First request - ScatterGatherShuffleInput
MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
InputContext e1InputContext1 = createTestInputContext();
InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
// Second request - BroadcastInput
MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
InputContext e2InputContext2 = createTestInputContext();
InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
// Third request - randomOutput (simulates MROutput)
MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
OutputContext e3OutputContext1 = createTestOutputContext();
OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
// Fourth request - OnFileSortedOutput
MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
OutputContext e4OutputContext2 = createTestOutputContext();
OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
// Fifth request - Processor
MemoryUpdateCallbackForTest e5Callback = new MemoryUpdateCallbackForTest();
ProcessorContext e5ProcContext = createTestProcessortContext();
ProcessorDescriptor e5ProcDesc = createTestProcessorDescriptor();
dist.requestMemory(10000, e5Callback, e5ProcContext, e5ProcDesc);
dist.makeInitialAllocations();
// Total available: 80% of 10K = 8000
// 5 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2), 10K(1)
// Overlap input and output memory
assertEquals(3000, e1Callback.assigned);
assertEquals(1000, e2Callback.assigned);
assertEquals(2333, e3Callback.assigned);
assertEquals(4666, e4Callback.assigned);
assertEquals(1000, e5Callback.assigned);
}
Aggregations