Search in sources :

Example 6 with Metrics

use of io.cdap.cdap.api.metrics.Metrics in project cdap by caskdata.

the class BatchSQLEngineAdapter method tryRelationalTransform.

/**
 * This method is called when engine is present and is willing to try performing a relational transform.
 *
 * @param stageSpec stage specification
 * @param transform transform plugin
 * @param input     input collections
 * @return resulting collection or empty optional if tranform can't be done with this engine
 */
public Optional<SQLEngineJob<SQLDataset>> tryRelationalTransform(StageSpec stageSpec, RelationalTransform transform, Map<String, SparkCollection<Object>> input) {
    String stageName = stageSpec.getName();
    Map<String, Relation> inputRelations = input.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> sqlEngine.getRelation(new SQLRelationDefinition(e.getKey(), stageSpec.getInputSchemas().get(e.getKey())))));
    BasicRelationalTransformContext pluginContext = new BasicRelationalTransformContext(getSQLRelationalEngine(), inputRelations, stageSpec.getInputSchemas(), stageSpec.getOutputSchema());
    if (!transform.transform(pluginContext)) {
        // Plugin was not able to do relational tranform with this engine
        return Optional.empty();
    }
    if (pluginContext.getOutputRelation() == null) {
        // Plugin said that tranformation was success but failed to set output
        throw new IllegalStateException("Plugin " + transform + " did not produce a relational output");
    }
    if (!pluginContext.getOutputRelation().isValid()) {
        // An output is set to invalid relation, probably some of transforms are not supported by an engine
        return Optional.empty();
    }
    // Ensure input and output schemas for this stage are supported by the engine
    if (stageSpec.getInputSchemas().values().stream().anyMatch(s -> !sqlEngine.supportsInputSchema(s))) {
        return Optional.empty();
    }
    if (!sqlEngine.supportsOutputSchema(stageSpec.getOutputSchema())) {
        return Optional.empty();
    }
    // Validate transformation definition with engine
    SQLTransformDefinition transformDefinition = new SQLTransformDefinition(stageName, pluginContext.getOutputRelation(), stageSpec.getOutputSchema(), Collections.emptyMap(), Collections.emptyMap());
    if (!sqlEngine.canTransform(transformDefinition)) {
        return Optional.empty();
    }
    return Optional.of(runJob(stageSpec.getName(), SQLEngineJobType.EXECUTE, () -> {
        // Push all stages that need to be pushed to execute this aggregation
        input.forEach((name, collection) -> {
            if (!exists(name)) {
                push(name, stageSpec.getInputSchemas().get(name), collection);
            }
        });
        // Initialize metrics collector
        DefaultStageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
        StageStatisticsCollector statisticsCollector = statsCollectors.get(stageName);
        // Collect input datasets and execute transformation
        Map<String, SQLDataset> inputDatasets = input.keySet().stream().collect(Collectors.toMap(Function.identity(), this::getDatasetForStage));
        // Count input records
        for (SQLDataset inputDataset : inputDatasets.values()) {
            countRecordsIn(inputDataset, statisticsCollector, stageMetrics);
        }
        // Execute transform
        SQLTransformRequest sqlContext = new SQLTransformRequest(inputDatasets, stageSpec.getName(), pluginContext.getOutputRelation(), stageSpec.getOutputSchema());
        SQLDataset transformed = sqlEngine.transform(sqlContext);
        // Count output records
        countRecordsOut(transformed, statisticsCollector, stageMetrics);
        return transformed;
    }));
}
Also used : RelationalTransform(io.cdap.cdap.etl.api.relational.RelationalTransform) SQLTransformRequest(io.cdap.cdap.etl.api.engine.sql.request.SQLTransformRequest) DataFrames(io.cdap.cdap.api.spark.sql.DataFrames) Relation(io.cdap.cdap.etl.api.relational.Relation) SQLPullRequest(io.cdap.cdap.etl.api.engine.sql.request.SQLPullRequest) LoggerFactory(org.slf4j.LoggerFactory) Constants(io.cdap.cdap.etl.common.Constants) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) SQLEngineJob(io.cdap.cdap.etl.engine.SQLEngineJob) SQLRelationDefinition(io.cdap.cdap.etl.api.engine.sql.request.SQLRelationDefinition) PullCapability(io.cdap.cdap.etl.api.engine.sql.capability.PullCapability) JavaSparkExecutionContext(io.cdap.cdap.api.spark.JavaSparkExecutionContext) StageMetrics(io.cdap.cdap.etl.api.StageMetrics) Map(java.util.Map) Objects(com.google.common.base.Objects) SQLPullDataset(io.cdap.cdap.etl.api.engine.sql.dataset.SQLPullDataset) StructType(org.apache.spark.sql.types.StructType) SQLJoinDefinition(io.cdap.cdap.etl.api.engine.sql.request.SQLJoinDefinition) Threads(org.apache.twill.common.Threads) CancellationException(java.util.concurrent.CancellationException) Engine(io.cdap.cdap.etl.api.relational.Engine) Collection(java.util.Collection) SQLTransformDefinition(io.cdap.cdap.etl.api.engine.sql.request.SQLTransformDefinition) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Metrics(io.cdap.cdap.api.metrics.Metrics) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) RecordCollection(io.cdap.cdap.etl.api.engine.sql.dataset.RecordCollection) SQLPushDataset(io.cdap.cdap.etl.api.engine.sql.dataset.SQLPushDataset) StageStatisticsCollector(io.cdap.cdap.etl.common.StageStatisticsCollector) SparkCollection(io.cdap.cdap.etl.spark.SparkCollection) List(java.util.List) DefaultStageMetrics(io.cdap.cdap.etl.common.DefaultStageMetrics) SQLDataset(io.cdap.cdap.etl.api.engine.sql.dataset.SQLDataset) SQLWriteRequest(io.cdap.cdap.etl.api.engine.sql.request.SQLWriteRequest) PushCapability(io.cdap.cdap.etl.api.engine.sql.capability.PushCapability) Optional(java.util.Optional) TransformToPairFunction(io.cdap.cdap.etl.spark.function.TransformToPairFunction) SparkRecordCollection(io.cdap.cdap.etl.api.sql.engine.dataset.SparkRecordCollection) TransformFromPairFunction(io.cdap.cdap.etl.spark.function.TransformFromPairFunction) Dataset(org.apache.spark.sql.Dataset) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SQLPushRequest(io.cdap.cdap.etl.api.engine.sql.request.SQLPushRequest) SQLDatasetConsumer(io.cdap.cdap.etl.api.engine.sql.dataset.SQLDatasetConsumer) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Function(java.util.function.Function) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) SQLEngineJobKey(io.cdap.cdap.etl.engine.SQLEngineJobKey) SQLEngineWriteJobKey(io.cdap.cdap.etl.engine.SQLEngineWriteJobKey) HashSet(java.util.HashSet) SQLEngineJobType(io.cdap.cdap.etl.engine.SQLEngineJobType) SQLDatasetProducer(io.cdap.cdap.etl.api.engine.sql.dataset.SQLDatasetProducer) JavaRDD(org.apache.spark.api.java.JavaRDD) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) SQLWriteResult(io.cdap.cdap.etl.api.engine.sql.request.SQLWriteResult) Logger(org.slf4j.Logger) JoinStage(io.cdap.cdap.etl.api.join.JoinStage) SQLEngineException(io.cdap.cdap.etl.api.engine.sql.SQLEngineException) SQLJoinRequest(io.cdap.cdap.etl.api.engine.sql.request.SQLJoinRequest) SQLContext(org.apache.spark.sql.SQLContext) Row(org.apache.spark.sql.Row) Schema(io.cdap.cdap.api.data.schema.Schema) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) Closeable(java.io.Closeable) SQLEngineOutput(io.cdap.cdap.etl.api.engine.sql.SQLEngineOutput) SQLEngine(io.cdap.cdap.etl.api.engine.sql.SQLEngine) JoinDefinition(io.cdap.cdap.etl.api.join.JoinDefinition) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) Collections(java.util.Collections) SparkRecordCollectionImpl(io.cdap.cdap.etl.api.sql.engine.dataset.SparkRecordCollectionImpl) SQLDataset(io.cdap.cdap.etl.api.engine.sql.dataset.SQLDataset) SQLRelationDefinition(io.cdap.cdap.etl.api.engine.sql.request.SQLRelationDefinition) Relation(io.cdap.cdap.etl.api.relational.Relation) SQLTransformRequest(io.cdap.cdap.etl.api.engine.sql.request.SQLTransformRequest) StageStatisticsCollector(io.cdap.cdap.etl.common.StageStatisticsCollector) SQLTransformDefinition(io.cdap.cdap.etl.api.engine.sql.request.SQLTransformDefinition) Map(java.util.Map) HashMap(java.util.HashMap) DefaultStageMetrics(io.cdap.cdap.etl.common.DefaultStageMetrics)

Example 7 with Metrics

use of io.cdap.cdap.api.metrics.Metrics in project cdap by caskdata.

the class ConnectionHandler method browse.

/**
 * Browse the connection on a given path.
 */
@POST
@TransactionPolicy(value = TransactionControl.EXPLICIT)
@Path(API_VERSION + "/contexts/{context}/connections/{connection}/browse")
public void browse(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("context") String namespace, @PathParam("connection") String connection) {
    respond(namespace, responder, namespaceSummary -> {
        if (namespaceSummary.getName().equalsIgnoreCase(NamespaceId.SYSTEM.getNamespace())) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Browsing connection in system namespace is currently not supported");
            return;
        }
        contextAccessEnforcer.enforce(new ConnectionEntityId(namespace, ConnectionId.getConnectionId(connection)), StandardPermission.USE);
        String browseRequestString = StandardCharsets.UTF_8.decode(request.getContent()).toString();
        BrowseRequest browseRequest = GSON.fromJson(browseRequestString, BrowseRequest.class);
        if (browseRequest == null) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "The request body is empty");
            return;
        }
        if (browseRequest.getPath() == null) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Path is not provided in the browse request");
            return;
        }
        Connection conn = store.getConnection(new ConnectionId(namespaceSummary, connection));
        if (getContext().isRemoteTaskEnabled()) {
            executeRemotely(namespace, browseRequestString, conn, RemoteConnectionBrowseTask.class, responder);
        } else {
            browseLocally(namespaceSummary.getName(), browseRequest, conn, responder);
        }
        Metrics child = metrics.child(ImmutableMap.of(Constants.Metrics.Tag.APP_ENTITY_TYPE, Constants.CONNECTION_SERVICE_NAME, Constants.Metrics.Tag.APP_ENTITY_TYPE_NAME, conn.getConnectionType()));
        child.count(Constants.Metrics.Connection.CONNECTION_BROWSE_COUNT, 1);
    });
}
Also used : ConnectionId(io.cdap.cdap.etl.proto.connection.ConnectionId) Metrics(io.cdap.cdap.api.metrics.Metrics) HttpURLConnection(java.net.HttpURLConnection) Connection(io.cdap.cdap.etl.proto.connection.Connection) BrowseRequest(io.cdap.cdap.etl.api.connector.BrowseRequest) ConnectionEntityId(io.cdap.cdap.proto.id.ConnectionEntityId) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy)

Example 8 with Metrics

use of io.cdap.cdap.api.metrics.Metrics in project cdap by caskdata.

the class ConnectionHandler method sample.

/**
 * Retrive sample result for the connection
 */
@POST
@TransactionPolicy(value = TransactionControl.EXPLICIT)
@Path(API_VERSION + "/contexts/{context}/connections/{connection}/sample")
public void sample(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("context") String namespace, @PathParam("connection") String connection) {
    respond(namespace, responder, namespaceSummary -> {
        if (namespaceSummary.getName().equalsIgnoreCase(NamespaceId.SYSTEM.getNamespace())) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Sampling connection in system namespace is currently not supported");
            return;
        }
        contextAccessEnforcer.enforce(new ConnectionEntityId(namespace, ConnectionId.getConnectionId(connection)), StandardPermission.USE);
        String sampleRequestString = StandardCharsets.UTF_8.decode(request.getContent()).toString();
        SampleRequest sampleRequest = GSON.fromJson(sampleRequestString, SampleRequest.class);
        if (sampleRequest == null) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "The request body is empty");
            return;
        }
        if (sampleRequest.getPath() == null) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Path is not provided in the sample request");
            return;
        }
        if (sampleRequest.getLimit() <= 0) {
            responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Limit should be greater than 0");
            return;
        }
        Connection conn = store.getConnection(new ConnectionId(namespaceSummary, connection));
        if (getContext().isRemoteTaskEnabled()) {
            executeRemotely(namespace, sampleRequestString, conn, RemoteConnectionSampleTask.class, responder);
        } else {
            sampleLocally(namespaceSummary.getName(), sampleRequestString, conn, responder);
        }
        Metrics child = metrics.child(ImmutableMap.of(Constants.Metrics.Tag.APP_ENTITY_TYPE, Constants.CONNECTION_SERVICE_NAME, Constants.Metrics.Tag.APP_ENTITY_TYPE_NAME, conn.getConnectionType()));
        child.count(Constants.Metrics.Connection.CONNECTION_SAMPLE_COUNT, 1);
        // sample will also generate the spec, so add the metric for it
        child.count(Constants.Metrics.Connection.CONNECTION_SPEC_COUNT, 1);
    });
}
Also used : ConnectionId(io.cdap.cdap.etl.proto.connection.ConnectionId) Metrics(io.cdap.cdap.api.metrics.Metrics) HttpURLConnection(java.net.HttpURLConnection) Connection(io.cdap.cdap.etl.proto.connection.Connection) SampleRequest(io.cdap.cdap.etl.api.connector.SampleRequest) ConnectionEntityId(io.cdap.cdap.proto.id.ConnectionEntityId) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) TransactionPolicy(io.cdap.cdap.api.annotation.TransactionPolicy)

Example 9 with Metrics

use of io.cdap.cdap.api.metrics.Metrics in project cdap by caskdata.

the class BatchSQLEngineAdapterTest method setUp.

@Before
public void setUp() {
    invocationCounts = new HashMap<>();
    stageMetrics = new StageMetrics() {

        @Override
        public void count(String metricName, int delta) {
            throw new UnsupportedOperationException("not implemented");
        }

        @Override
        public void countLong(String metricName, long delta) {
            invocationCounts.compute(delta, (k, v) -> (v == null) ? 1 : v + 1);
        }

        @Override
        public void gauge(String metricName, long value) {
            throw new UnsupportedOperationException("not implemented");
        }

        @Override
        public void pipelineCount(String metricName, int delta) {
            throw new UnsupportedOperationException("not implemented");
        }

        @Override
        public void pipelineGauge(String metricName, long value) {
            throw new UnsupportedOperationException("not implemented");
        }

        @Override
        public Metrics child(Map<String, String> tags) {
            throw new UnsupportedOperationException("not implemented");
        }

        @Override
        public Map<String, String> getTags() {
            throw new UnsupportedOperationException("not implemented");
        }
    };
}
Also used : StageMetrics(io.cdap.cdap.etl.api.StageMetrics) Map(java.util.Map) Test(org.junit.Test) HashMap(java.util.HashMap) Metrics(io.cdap.cdap.api.metrics.Metrics) Assert(org.junit.Assert) Before(org.junit.Before) StageMetrics(io.cdap.cdap.etl.api.StageMetrics) Metrics(io.cdap.cdap.api.metrics.Metrics) StageMetrics(io.cdap.cdap.etl.api.StageMetrics) Map(java.util.Map) HashMap(java.util.HashMap) Before(org.junit.Before)

Example 10 with Metrics

use of io.cdap.cdap.api.metrics.Metrics in project cdap by caskdata.

the class MetricsHandlerTest method setupMetrics.

private static void setupMetrics() throws Exception {
    // Adding metrics for app "WordCount1" in namespace "myspace", "WCount1" in "yourspace"
    MetricsContext collector = collectionService.getContext(getServiceContext("myspace", "WordCount1", "WordCounter", "run1", "splitter"));
    collector.increment("reads", 1);
    collector.increment("writes", 1);
    collector = collectionService.getContext(getServiceContext("yourspace", "WCount1", "WordCounter", "run1", "splitter"));
    collector.increment("reads", 1);
    collector = collectionService.getContext(getServiceContext("yourspace", "WCount1", "WCounter", "run1", "splitter"));
    emitTs = System.currentTimeMillis();
    // we want to emit in two different seconds
    // todo : figure out why we need this
    TimeUnit.SECONDS.sleep(1);
    collector.increment("reads", 1);
    TimeUnit.MILLISECONDS.sleep(2000);
    collector.increment("reads", 2);
    collector = collectionService.getContext(getServiceContext("yourspace", "WCount1", "WCounter", "run1", "counter"));
    collector.increment("reads", 1);
    collector = collectionService.getContext(getMapReduceTaskContext("yourspace", "WCount1", "ClassicWordCount", MapReduceMetrics.TaskType.Mapper, "run1", "task1"));
    collector.increment("reads", 1);
    collector = collectionService.getContext(getMapReduceTaskContext("yourspace", "WCount1", "ClassicWordCount", MapReduceMetrics.TaskType.Reducer, "run1", "task2"));
    collector.increment("reads", 1);
    collector = collectionService.getContext(getServiceContext("myspace", "WordCount1", "WordCounter", "run1", "splitter"));
    collector.increment("reads", 1);
    collector.increment("writes", 1);
    collector = collectionService.getContext(getServiceContext("myspace", "WordCount1", "WordCounter", "run1", "collector"));
    collector.increment("aa", 1);
    collector.increment("zz", 1);
    collector.increment("ab", 1);
    collector = collectionService.getContext(getWorkerContext("yourspace", "WCount1", "WorkerWordCount", "run1", "task1"));
    collector.increment("workerreads", 5);
    collector.increment("workerwrites", 6);
    collector = collectionService.getContext(getWorkerContext("yourspace", "WCount1", "WorkerWordCount", "run2", "task1"));
    collector.increment("workerreads", 5);
    collector.increment("workerwrites", 6);
    // also: user metrics
    Metrics userMetrics = new ProgramUserMetrics(collectionService.getContext(getServiceContext("myspace", "WordCount1", "WordCounter", "run1", "splitter")));
    userMetrics.count("reads", 1);
    userMetrics.count("writes", 2);
    collector = collectionService.getContext(new HashMap<String, String>());
    collector.increment("resources.total.storage", 10);
    Metrics replicatorMetrics = new ProgramUserMetrics(collectionService.getContext(getWorkerContext("childctx", "Replicator", "ReplicatorWorker", "somerunid", "instance1")));
    Metrics tableMetrics = replicatorMetrics.child(ImmutableMap.of("ent", "mytable"));
    tableMetrics.count("inserts", 10);
    tableMetrics.count("ddls", 1);
    try {
        replicatorMetrics.child(ImmutableMap.of("ns", "anothernamespace"));
        Assert.fail("Creating child Metrics with duplicate tag name 'ns' should have failed.");
    } catch (IllegalArgumentException ignored) {
    }
    // need a better way to do this
    TimeUnit.SECONDS.sleep(2);
}
Also used : MapReduceMetrics(io.cdap.cdap.app.metrics.MapReduceMetrics) ProgramUserMetrics(io.cdap.cdap.app.metrics.ProgramUserMetrics) Metrics(io.cdap.cdap.api.metrics.Metrics) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) MetricsContext(io.cdap.cdap.api.metrics.MetricsContext) ProgramUserMetrics(io.cdap.cdap.app.metrics.ProgramUserMetrics)

Aggregations

Metrics (io.cdap.cdap.api.metrics.Metrics)10 TransactionPolicy (io.cdap.cdap.api.annotation.TransactionPolicy)6 Connection (io.cdap.cdap.etl.proto.connection.Connection)6 ConnectionId (io.cdap.cdap.etl.proto.connection.ConnectionId)6 HttpURLConnection (java.net.HttpURLConnection)6 Path (javax.ws.rs.Path)6 ConnectionEntityId (io.cdap.cdap.proto.id.ConnectionEntityId)5 HashMap (java.util.HashMap)4 Map (java.util.Map)3 POST (javax.ws.rs.POST)3 StageMetrics (io.cdap.cdap.etl.api.StageMetrics)2 Objects (com.google.common.base.Objects)1 TxRunnable (io.cdap.cdap.api.TxRunnable)1 DatasetContext (io.cdap.cdap.api.data.DatasetContext)1 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)1 Schema (io.cdap.cdap.api.data.schema.Schema)1 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)1 MetricsContext (io.cdap.cdap.api.metrics.MetricsContext)1 JavaSparkExecutionContext (io.cdap.cdap.api.spark.JavaSparkExecutionContext)1 DataFrames (io.cdap.cdap.api.spark.sql.DataFrames)1