Search in sources :

Example 6 with UnorderedKVEdgeConfig

use of org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig in project tez by apache.

the class CartesianProduct method createDAG.

private DAG createDAG(TezConfiguration tezConf, String inputPath1, String inputPath2, String inputPath3, String outputPath, boolean isPartitioned) throws IOException {
    Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
    // turn off groupSplit so that each input file incurs one task
    v1.addDataSource(INPUT, MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath1).groupSplits(false).build());
    Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
    v2.addDataSource(INPUT, MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath2).groupSplits(false).build());
    Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(TokenProcessor.class.getName()));
    v3.addDataSource(INPUT, MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath3).groupSplits(false).build());
    CartesianProductConfig cartesianProductConfig;
    if (isPartitioned) {
        Map<String, Integer> vertexPartitionMap = new HashMap<>();
        for (String vertex : cpSources) {
            vertexPartitionMap.put(vertex, numPartition);
        }
        cartesianProductConfig = new CartesianProductConfig(vertexPartitionMap);
    } else {
        cartesianProductConfig = new CartesianProductConfig(Arrays.asList(cpSources));
    }
    UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
    Vertex v4 = Vertex.create(VERTEX4, ProcessorDescriptor.create(JoinProcessor.class.getName()));
    v4.addDataSink(OUTPUT, MROutput.createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, outputPath).build());
    v4.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(userPayload));
    EdgeManagerPluginDescriptor cpEdgeManager = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
    cpEdgeManager.setUserPayload(userPayload);
    EdgeProperty cpEdgeProperty;
    if (isPartitioned) {
        UnorderedPartitionedKVEdgeConfig cpEdgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(), CustomPartitioner.class.getName()).build();
        cpEdgeProperty = cpEdgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
    } else {
        UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
        cpEdgeProperty = edgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
    }
    EdgeProperty broadcastEdgeProperty;
    UnorderedKVEdgeConfig broadcastEdgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
    broadcastEdgeProperty = broadcastEdgeConf.createDefaultBroadcastEdgeProperty();
    return DAG.create("CartesianProduct").addVertex(v1).addVertex(v2).addVertex(v3).addVertex(v4).addEdge(Edge.create(v1, v4, cpEdgeProperty)).addEdge(Edge.create(v2, v4, cpEdgeProperty)).addEdge(Edge.create(v3, v4, broadcastEdgeProperty));
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) UserPayload(org.apache.tez.dag.api.UserPayload) HashMap(java.util.HashMap) CartesianProductVertexManager(org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager) UnorderedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig) EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) TextInputFormat(org.apache.hadoop.mapreduce.lib.input.TextInputFormat) CartesianProductEdgeManager(org.apache.tez.runtime.library.cartesianproduct.CartesianProductEdgeManager) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) UnorderedPartitionedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig) CartesianProductConfig(org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig)

Example 7 with UnorderedKVEdgeConfig

use of org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig in project tez by apache.

the class FilterLinesByWord method run.

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Credentials credentials = new Credentials();
    boolean generateSplitsInClient = false;
    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }
    if (otherArgs.length != 3) {
        printUsage();
        return 2;
    }
    String inputPath = otherArgs[0];
    String outputPath = otherArgs[1];
    String filterWord = otherArgs[2];
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(outputPath))) {
        System.err.println("Output directory : " + outputPath + " already exists");
        return 2;
    }
    TezConfiguration tezConf = new TezConfiguration(conf);
    fs.getWorkingDirectory();
    Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
    String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class);
    if (jarPath == null) {
        throw new TezUncheckedException("Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath");
    }
    Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
    fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
    TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf);
    Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
    TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, credentials);
    // Why do I need to start the TezSession.
    tezSession.start();
    Configuration stage1Conf = new JobConf(conf);
    stage1Conf.set(FILTER_PARAM_NAME, filterWord);
    Configuration stage2Conf = new JobConf(conf);
    stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
    stage2Conf.setBoolean("mapred.mapper.new-api", false);
    UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor.create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)).addTaskLocalFiles(commonLocalResources);
    DataSourceDescriptor dsd;
    if (generateSplitsInClient) {
        // TODO TEZ-1406. Dont' use MRInputLegacy
        stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
        stage1Conf.setBoolean("mapred.mapper.new-api", false);
        dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
    } else {
        dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false).build();
    }
    stage1Vertex.addDataSource("MRInput", dsd);
    // Setup stage2 Vertex
    Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), 1);
    stage2Vertex.addTaskLocalFiles(commonLocalResources);
    // Configure the Output for stage2
    OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf));
    OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName());
    stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null));
    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf).build();
    DAG dag = DAG.create("FilterLinesByWord");
    Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty());
    dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);
    LOG.info("Submitting DAG to Tez Session");
    DAGClient dagClient = tezSession.submitDAG(dag);
    LOG.info("Submitted DAG to Tez Session");
    DAGStatus dagStatus = null;
    String[] vNames = { "stage1", "stage2" };
    try {
        while (true) {
            dagStatus = dagClient.getDAGStatus(null);
            if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
                break;
            }
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
            // continue;
            }
        }
        while (dagStatus.getState() == DAGStatus.State.RUNNING) {
            try {
                ExampleDriver.printDAGStatus(dagClient, vNames);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                // continue;
                }
                dagStatus = dagClient.getDAGStatus(null);
            } catch (TezException e) {
                LOG.error("Failed to get application progress. Exiting");
                return -1;
            }
        }
        dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }
    ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
    LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
Also used : TezException(org.apache.tez.dag.api.TezException) Vertex(org.apache.tez.dag.api.Vertex) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) FilterByWordOutputProcessor(org.apache.tez.mapreduce.examples.processor.FilterByWordOutputProcessor) TezClient(org.apache.tez.client.TezClient) UnorderedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) JobConf(org.apache.hadoop.mapred.JobConf) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor) Path(org.apache.hadoop.fs.Path) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) UserPayload(org.apache.tez.dag.api.UserPayload) OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) Text(org.apache.hadoop.io.Text) DAG(org.apache.tez.dag.api.DAG) TreeMap(java.util.TreeMap) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) SplitsInClientOptionParser(org.apache.tez.mapreduce.examples.helpers.SplitsInClientOptionParser) DAGClient(org.apache.tez.dag.api.client.DAGClient) ParseException(org.apache.commons.cli.ParseException) MROutputCommitter(org.apache.tez.mapreduce.committer.MROutputCommitter) Edge(org.apache.tez.dag.api.Edge) Credentials(org.apache.hadoop.security.Credentials) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 8 with UnorderedKVEdgeConfig

use of org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig in project tez by apache.

the class BroadcastAndOneToOneExample method createDAG.

private DAG createDAG(FileSystem fs, TezConfiguration tezConf, Path stagingDir, boolean doLocalityCheck) throws IOException, YarnException {
    int numBroadcastTasks = 2;
    int numOneToOneTasks = 3;
    if (doLocalityCheck) {
        YarnClient yarnClient = YarnClient.createYarnClient();
        yarnClient.init(tezConf);
        yarnClient.start();
        int numNMs = yarnClient.getNodeReports(NodeState.RUNNING).size();
        yarnClient.stop();
        // create enough 1-1 tasks to run in parallel
        // 1 AM
        numOneToOneTasks = numNMs - numBroadcastTasks - 1;
        if (numOneToOneTasks < 1) {
            numOneToOneTasks = 1;
        }
    }
    byte[] procByte = { (byte) (doLocalityCheck ? 1 : 0), 1 };
    UserPayload procPayload = UserPayload.create(ByteBuffer.wrap(procByte));
    System.out.println("Using " + numOneToOneTasks + " 1-1 tasks");
    Vertex broadcastVertex = Vertex.create("Broadcast", ProcessorDescriptor.create(InputProcessor.class.getName()), numBroadcastTasks);
    Vertex inputVertex = Vertex.create("Input", ProcessorDescriptor.create(InputProcessor.class.getName()).setUserPayload(procPayload), numOneToOneTasks);
    Vertex oneToOneVertex = Vertex.create("OneToOne", ProcessorDescriptor.create(OneToOneProcessor.class.getName()).setUserPayload(procPayload));
    oneToOneVertex.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(InputReadyVertexManager.class.getName()));
    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).setFromConfiguration(tezConf).build();
    DAG dag = DAG.create("BroadcastAndOneToOneExample");
    dag.addVertex(inputVertex).addVertex(broadcastVertex).addVertex(oneToOneVertex).addEdge(Edge.create(inputVertex, oneToOneVertex, edgeConf.createDefaultOneToOneEdgeProperty())).addEdge(Edge.create(broadcastVertex, oneToOneVertex, edgeConf.createDefaultBroadcastEdgeProperty()));
    return dag;
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) UnorderedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig) UserPayload(org.apache.tez.dag.api.UserPayload) Text(org.apache.hadoop.io.Text) DAG(org.apache.tez.dag.api.DAG) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

UnorderedKVEdgeConfig (org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig)8 Vertex (org.apache.tez.dag.api.Vertex)6 DAG (org.apache.tez.dag.api.DAG)5 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)5 Configuration (org.apache.hadoop.conf.Configuration)4 Text (org.apache.hadoop.io.Text)4 UserPayload (org.apache.tez.dag.api.UserPayload)4 UnorderedPartitionedKVEdgeConfig (org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig)4 Edge (org.apache.tez.dag.api.Edge)3 EdgeManagerPluginDescriptor (org.apache.tez.dag.api.EdgeManagerPluginDescriptor)3 TreeMap (java.util.TreeMap)2 ParseException (org.apache.commons.cli.ParseException)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 EdgeType (org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType)2 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)2 IntWritable (org.apache.hadoop.io.IntWritable)2 NullWritable (org.apache.hadoop.io.NullWritable)2 JobConf (org.apache.hadoop.mapred.JobConf)2