Search in sources :

Example 1 with Metrics

use of co.cask.cdap.api.metrics.Metrics in project cdap by caskdata.

the class CustomActionExecutor method createAction.

@SuppressWarnings("unchecked")
@Deprecated
private WorkflowAction createAction(BasicWorkflowContext context, InstantiatorFactory instantiator, ClassLoader classLoader) throws Exception {
    Class<?> clz = Class.forName(context.getSpecification().getClassName(), true, classLoader);
    Preconditions.checkArgument(WorkflowAction.class.isAssignableFrom(clz), "%s is not a WorkflowAction.", clz);
    WorkflowAction action = instantiator.get(TypeToken.of((Class<? extends WorkflowAction>) clz)).create();
    Metrics metrics = new ProgramUserMetrics(context.getProgramMetrics().childContext(Constants.Metrics.Tag.NODE, context.getSpecification().getName()));
    Reflections.visit(action, action.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new DataSetFieldSetter(context), new MetricsFieldSetter(metrics));
    return action;
}
Also used : ProgramUserMetrics(co.cask.cdap.app.metrics.ProgramUserMetrics) Metrics(co.cask.cdap.api.metrics.Metrics) PropertyFieldSetter(co.cask.cdap.common.lang.PropertyFieldSetter) MetricsFieldSetter(co.cask.cdap.internal.app.runtime.MetricsFieldSetter) WorkflowAction(co.cask.cdap.api.workflow.WorkflowAction) ProgramUserMetrics(co.cask.cdap.app.metrics.ProgramUserMetrics) DataSetFieldSetter(co.cask.cdap.internal.app.runtime.DataSetFieldSetter)

Example 2 with Metrics

use of co.cask.cdap.api.metrics.Metrics in project cdap by caskdata.

the class SparkPageRankProgram method run.

@Override
public void run(JavaSparkExecutionContext sec) throws Exception {
    JavaSparkContext jsc = new JavaSparkContext();
    LOG.info("Processing backlinkURLs data");
    JavaPairRDD<Long, String> backlinkURLs = sec.fromStream("backlinkURLStream", String.class);
    int iterationCount = getIterationCount(sec);
    LOG.info("Grouping data by key");
    // Grouping backlinks by unique URL in key
    JavaPairRDD<String, Iterable<String>> links = backlinkURLs.values().mapToPair(new PairFunction<String, String, String>() {

        @Override
        public Tuple2<String, String> call(String s) {
            String[] parts = SPACES.split(s);
            return new Tuple2<>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();
    // Initialize default rank for each key URL
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {

        @Override
        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });
    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < iterationCount; current++) {
        LOG.debug("Processing data with PageRank algorithm. Iteration {}/{}", current + 1, (iterationCount));
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values().flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {

            @Override
            public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                LOG.debug("Processing {} with rank {}", s._1(), s._2());
                int urlCount = Iterables.size(s._1());
                List<Tuple2<String, Double>> results = new ArrayList<>();
                for (String n : s._1()) {
                    results.add(new Tuple2<>(n, s._2() / urlCount));
                }
                return results;
            }
        });
        // Re-calculates URL ranks based on backlink contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {

            @Override
            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        });
    }
    LOG.info("Writing ranks data");
    final ServiceDiscoverer discoveryServiceContext = sec.getServiceDiscoverer();
    final Metrics sparkMetrics = sec.getMetrics();
    JavaPairRDD<byte[], Integer> ranksRaw = ranks.mapToPair(new PairFunction<Tuple2<String, Double>, byte[], Integer>() {

        @Override
        public Tuple2<byte[], Integer> call(Tuple2<String, Double> tuple) throws Exception {
            LOG.debug("URL {} has rank {}", Arrays.toString(tuple._1().getBytes(Charsets.UTF_8)), tuple._2());
            URL serviceURL = discoveryServiceContext.getServiceURL(SparkPageRankApp.SERVICE_HANDLERS);
            if (serviceURL == null) {
                throw new RuntimeException("Failed to discover service: " + SparkPageRankApp.SERVICE_HANDLERS);
            }
            try {
                URLConnection connection = new URL(serviceURL, String.format("%s/%s", SparkPageRankApp.SparkPageRankServiceHandler.TRANSFORM_PATH, tuple._2().toString())).openConnection();
                try (BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charsets.UTF_8))) {
                    String pr = reader.readLine();
                    if ((Integer.parseInt(pr)) == POPULAR_PAGE_THRESHOLD) {
                        sparkMetrics.count(POPULAR_PAGES, 1);
                    } else if (Integer.parseInt(pr) <= UNPOPULAR_PAGE_THRESHOLD) {
                        sparkMetrics.count(UNPOPULAR_PAGES, 1);
                    } else {
                        sparkMetrics.count(REGULAR_PAGES, 1);
                    }
                    return new Tuple2<>(tuple._1().getBytes(Charsets.UTF_8), Integer.parseInt(pr));
                }
            } catch (Exception e) {
                LOG.warn("Failed to read the Stream for service {}", SparkPageRankApp.SERVICE_HANDLERS, e);
                throw Throwables.propagate(e);
            }
        }
    });
    // Store calculated results in output Dataset.
    // All calculated results are stored in one row.
    // Each result, the calculated URL rank based on backlink contributions, is an entry of the row.
    // The value of the entry is the URL rank.
    sec.saveAsDataset(ranksRaw, "ranks");
    LOG.info("PageRanks successfuly computed and written to \"ranks\" dataset");
}
Also used : URL(java.net.URL) PairFlatMapFunction(org.apache.spark.api.java.function.PairFlatMapFunction) Function(org.apache.spark.api.java.function.Function) PairFunction(org.apache.spark.api.java.function.PairFunction) Metrics(co.cask.cdap.api.metrics.Metrics) ArrayList(java.util.ArrayList) List(java.util.List) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) PairFunction(org.apache.spark.api.java.function.PairFunction) ServiceDiscoverer(co.cask.cdap.api.ServiceDiscoverer) InputStreamReader(java.io.InputStreamReader) URLConnection(java.net.URLConnection) Tuple2(scala.Tuple2) BufferedReader(java.io.BufferedReader)

Example 3 with Metrics

use of co.cask.cdap.api.metrics.Metrics in project cdap by caskdata.

the class SparkPageRankProgram method run.

@Override
public void run(JavaSparkExecutionContext sec) throws Exception {
    JavaSparkContext jsc = new JavaSparkContext();
    LOG.info("Processing backlinkURLs data");
    JavaPairRDD<Long, String> backlinkURLs = sec.fromStream("backlinkURLStream", String.class);
    int iterationCount = getIterationCount(sec);
    LOG.info("Grouping data by key");
    // Grouping backlinks by unique URL in key
    JavaPairRDD<String, Iterable<String>> links = backlinkURLs.values().mapToPair(new PairFunction<String, String, String>() {

        @Override
        public Tuple2<String, String> call(String s) {
            String[] parts = SPACES.split(s);
            return new Tuple2<>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();
    // Initialize default rank for each key URL
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {

        @Override
        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });
    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < iterationCount; current++) {
        LOG.debug("Processing data with PageRank algorithm. Iteration {}/{}", current + 1, (iterationCount));
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values().flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {

            @Override
            public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                LOG.debug("Processing {} with rank {}", s._1(), s._2());
                int urlCount = Iterables.size(s._1());
                List<Tuple2<String, Double>> results = new ArrayList<>();
                for (String n : s._1()) {
                    results.add(new Tuple2<>(n, s._2() / urlCount));
                }
                return results;
            }
        });
        // Re-calculates URL ranks based on backlink contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {

            @Override
            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        });
    }
    LOG.info("Writing ranks data");
    final ServiceDiscoverer discoveryServiceContext = sec.getServiceDiscoverer();
    final Metrics sparkMetrics = sec.getMetrics();
    JavaPairRDD<byte[], Integer> ranksRaw = ranks.mapToPair(new PairFunction<Tuple2<String, Double>, byte[], Integer>() {

        @Override
        public Tuple2<byte[], Integer> call(Tuple2<String, Double> tuple) throws Exception {
            LOG.debug("URL {} has rank {}", Arrays.toString(tuple._1().getBytes(Charsets.UTF_8)), tuple._2());
            URL serviceURL = discoveryServiceContext.getServiceURL(SparkPageRankApp.SERVICE_HANDLERS);
            if (serviceURL == null) {
                throw new RuntimeException("Failed to discover service: " + SparkPageRankApp.SERVICE_HANDLERS);
            }
            try {
                URLConnection connection = new URL(serviceURL, String.format("%s/%s", SparkPageRankApp.SparkPageRankServiceHandler.TRANSFORM_PATH, tuple._2().toString())).openConnection();
                try (BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charsets.UTF_8))) {
                    String pr = reader.readLine();
                    if ((Integer.parseInt(pr)) == POPULAR_PAGE_THRESHOLD) {
                        sparkMetrics.count(POPULAR_PAGES, 1);
                    } else if (Integer.parseInt(pr) <= UNPOPULAR_PAGE_THRESHOLD) {
                        sparkMetrics.count(UNPOPULAR_PAGES, 1);
                    } else {
                        sparkMetrics.count(REGULAR_PAGES, 1);
                    }
                    return new Tuple2<>(tuple._1().getBytes(Charsets.UTF_8), Integer.parseInt(pr));
                }
            } catch (Exception e) {
                LOG.warn("Failed to read the Stream for service {}", SparkPageRankApp.SERVICE_HANDLERS, e);
                throw Throwables.propagate(e);
            }
        }
    });
    // Store calculated results in output Dataset.
    // All calculated results are stored in one row.
    // Each result, the calculated URL rank based on backlink contributions, is an entry of the row.
    // The value of the entry is the URL rank.
    sec.saveAsDataset(ranksRaw, "ranks");
    LOG.info("PageRanks successfuly computed and written to \"ranks\" dataset");
}
Also used : URL(java.net.URL) PairFlatMapFunction(org.apache.spark.api.java.function.PairFlatMapFunction) Function(org.apache.spark.api.java.function.Function) PairFunction(org.apache.spark.api.java.function.PairFunction) Metrics(co.cask.cdap.api.metrics.Metrics) ArrayList(java.util.ArrayList) List(java.util.List) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) PairFunction(org.apache.spark.api.java.function.PairFunction) ServiceDiscoverer(co.cask.cdap.api.ServiceDiscoverer) InputStreamReader(java.io.InputStreamReader) URLConnection(java.net.URLConnection) Tuple2(scala.Tuple2) BufferedReader(java.io.BufferedReader)

Example 4 with Metrics

use of co.cask.cdap.api.metrics.Metrics in project cdap by caskdata.

the class MetricsHandlerTestRun method setupMetrics.

private static void setupMetrics() throws Exception {
    // Adding metrics for app "WordCount1" in namespace "myspace", "WCount1" in "yourspace"
    MetricsContext collector = collectionService.getContext(getFlowletContext("myspace", "WordCount1", "WordCounter", "run1", "splitter"));
    collector.increment("reads", 1);
    collector.increment("writes", 1);
    collector = collectionService.getContext(getFlowletContext("yourspace", "WCount1", "WordCounter", "run1", "splitter"));
    collector.increment("reads", 1);
    collector = collectionService.getContext(getFlowletContext("yourspace", "WCount1", "WCounter", "run1", "splitter"));
    emitTs = System.currentTimeMillis();
    // we want to emit in two different seconds
    // todo : figure out why we need this
    TimeUnit.SECONDS.sleep(1);
    collector.increment("reads", 1);
    TimeUnit.MILLISECONDS.sleep(2000);
    collector.increment("reads", 2);
    collector = collectionService.getContext(getFlowletContext("yourspace", "WCount1", "WCounter", "run1", "counter"));
    collector.increment("reads", 1);
    collector = collectionService.getContext(getMapReduceTaskContext("yourspace", "WCount1", "ClassicWordCount", MapReduceMetrics.TaskType.Mapper, "run1", "task1"));
    collector.increment("reads", 1);
    collector = collectionService.getContext(getMapReduceTaskContext("yourspace", "WCount1", "ClassicWordCount", MapReduceMetrics.TaskType.Reducer, "run1", "task2"));
    collector.increment("reads", 1);
    collector = collectionService.getContext(getFlowletContext("myspace", "WordCount1", "WordCounter", "run1", "splitter"));
    collector.increment("reads", 1);
    collector.increment("writes", 1);
    collector = collectionService.getContext(getFlowletContext("myspace", "WordCount1", "WordCounter", "run1", "collector"));
    collector.increment("aa", 1);
    collector.increment("zz", 1);
    collector.increment("ab", 1);
    collector = collectionService.getContext(getWorkerContext("yourspace", "WCount1", "WorkerWordCount", "run1", "task1"));
    collector.increment("workerreads", 5);
    collector.increment("workerwrites", 6);
    collector = collectionService.getContext(getWorkerContext("yourspace", "WCount1", "WorkerWordCount", "run2", "task1"));
    collector.increment("workerreads", 5);
    collector.increment("workerwrites", 6);
    // also: user metrics
    Metrics userMetrics = new ProgramUserMetrics(collectionService.getContext(getFlowletContext("myspace", "WordCount1", "WordCounter", "run1", "splitter")));
    userMetrics.count("reads", 1);
    userMetrics.count("writes", 2);
    collector = collectionService.getContext(new HashMap<String, String>());
    collector.increment("resources.total.storage", 10);
    // need a better way to do this
    TimeUnit.SECONDS.sleep(2);
}
Also used : MapReduceMetrics(co.cask.cdap.app.metrics.MapReduceMetrics) ProgramUserMetrics(co.cask.cdap.app.metrics.ProgramUserMetrics) Metrics(co.cask.cdap.api.metrics.Metrics) HashMap(java.util.HashMap) MetricsContext(co.cask.cdap.api.metrics.MetricsContext) ProgramUserMetrics(co.cask.cdap.app.metrics.ProgramUserMetrics)

Example 5 with Metrics

use of co.cask.cdap.api.metrics.Metrics in project cdap by caskdata.

the class SparkCSVToSpaceProgram method run.

@Override
public void run(final JavaSparkExecutionContext sec) throws Exception {
    JavaSparkContext jsc = new JavaSparkContext();
    Map<String, String> fileSetArgs = new HashMap<>();
    final Metrics metrics = sec.getMetrics();
    FileSetArguments.addInputPath(fileSetArgs, sec.getRuntimeArguments().get("input.path"));
    JavaPairRDD<LongWritable, Text> input = sec.fromDataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET, fileSetArgs);
    final List<String> converted = input.values().map(new Function<Text, String>() {

        @Override
        public String call(Text input) throws Exception {
            String line = input.toString();
            metrics.count("num.lines", 1);
            return line.replaceAll(",", " ");
        }
    }).collect();
    sec.execute(new TxRunnable() {

        @Override
        public void run(DatasetContext context) throws Exception {
            Map<String, String> args = sec.getRuntimeArguments();
            String outputPath = args.get("output.path");
            Map<String, String> fileSetArgs = new HashMap<>();
            FileSetArguments.setOutputPath(fileSetArgs, outputPath);
            FileSet fileSet = context.getDataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET, fileSetArgs);
            try (PrintWriter writer = new PrintWriter(fileSet.getOutputLocation().getOutputStream())) {
                for (String line : converted) {
                    writer.write(line);
                    writer.println();
                }
            }
        }
    });
}
Also used : FileSet(co.cask.cdap.api.dataset.lib.FileSet) HashMap(java.util.HashMap) Text(org.apache.hadoop.io.Text) Function(org.apache.spark.api.java.function.Function) Metrics(co.cask.cdap.api.metrics.Metrics) TxRunnable(co.cask.cdap.api.TxRunnable) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) LongWritable(org.apache.hadoop.io.LongWritable) DatasetContext(co.cask.cdap.api.data.DatasetContext) HashMap(java.util.HashMap) Map(java.util.Map) PrintWriter(java.io.PrintWriter)

Aggregations

Metrics (co.cask.cdap.api.metrics.Metrics)5 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)3 Function (org.apache.spark.api.java.function.Function)3 ServiceDiscoverer (co.cask.cdap.api.ServiceDiscoverer)2 ProgramUserMetrics (co.cask.cdap.app.metrics.ProgramUserMetrics)2 BufferedReader (java.io.BufferedReader)2 InputStreamReader (java.io.InputStreamReader)2 URL (java.net.URL)2 URLConnection (java.net.URLConnection)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 PairFlatMapFunction (org.apache.spark.api.java.function.PairFlatMapFunction)2 PairFunction (org.apache.spark.api.java.function.PairFunction)2 Tuple2 (scala.Tuple2)2 TxRunnable (co.cask.cdap.api.TxRunnable)1 DatasetContext (co.cask.cdap.api.data.DatasetContext)1 FileSet (co.cask.cdap.api.dataset.lib.FileSet)1 MetricsContext (co.cask.cdap.api.metrics.MetricsContext)1 WorkflowAction (co.cask.cdap.api.workflow.WorkflowAction)1