Search in sources :

Example 1 with TransformResult

use of com.thinkbiganalytics.spark.model.TransformResult in project kylo by Teradata.

the class TransformService method submitTransformJob.

/**
 * Submits the specified task to be executed and returns the result.
 */
@Nonnull
private TransformResponse submitTransformJob(@Nonnull final Supplier<TransformResult> task, @Nullable final FieldPolicy[] policies) throws ScriptException {
    log.entry(task, policies);
    // Prepare script
    Supplier<TransformResult> result = task;
    if (policies != null && policies.length > 0 && validator != null) {
        result = Suppliers.compose(new ValidationStage(policies, validator), result);
    }
    if (profiler != null) {
        result = Suppliers.compose(new ProfileStage(profiler), result);
    }
    // Execute script
    final String table = newTableName();
    final TransformJob job = new TransformJob(table, Suppliers.compose(new ResponseStage(table), result), engine.getSparkContext());
    tracker.submitJob(job);
    // Build response
    TransformResponse response;
    try {
        response = job.get(500, TimeUnit.MILLISECONDS);
        tracker.removeJob(table);
    } catch (final ExecutionException cause) {
        throw log.throwing(new ScriptException(cause));
    } catch (final InterruptedException | TimeoutException e) {
        log.trace("Timeout waiting for script result", e);
        response = new TransformResponse();
        response.setProgress(0.0);
        response.setStatus(TransformResponse.Status.PENDING);
        response.setTable(table);
    }
    return log.exit(response);
}
Also used : ProfileStage(com.thinkbiganalytics.spark.metadata.ProfileStage) TransformResult(com.thinkbiganalytics.spark.model.TransformResult) ScriptException(javax.script.ScriptException) TransformJob(com.thinkbiganalytics.spark.metadata.TransformJob) ResponseStage(com.thinkbiganalytics.spark.metadata.ResponseStage) TransformResponse(com.thinkbiganalytics.spark.rest.model.TransformResponse) ExecutionException(java.util.concurrent.ExecutionException) ValidationStage(com.thinkbiganalytics.spark.metadata.ValidationStage) TimeoutException(java.util.concurrent.TimeoutException) Nonnull(javax.annotation.Nonnull)

Example 2 with TransformResult

use of com.thinkbiganalytics.spark.model.TransformResult in project kylo by Teradata.

the class TransformService method submitTransformJob.

/**
 * Submits the specified task to be executed and returns the result.
 */
@Nonnull
private TransformResponse submitTransformJob(final Supplier<TransformResult> task, @Nonnull final TransformRequest request) throws ScriptException {
    final FieldPolicy[] policies = getPolicies(request);
    final PageSpec pageSpec = request.getPageSpec();
    log.entry(task, policies);
    // Prepare script
    Supplier<TransformResult> result = task;
    if (request.isDoValidate() && policies != null && policies.length > 0 && validator != null) {
        result = Suppliers.compose(new ValidationStage(policies, validator), result);
    }
    if (request.isDoProfile() && profiler != null) {
        result = Suppliers.compose(new ProfileStage(profiler), result);
    }
    return submitTransformJob(result, pageSpec);
}
Also used : ProfileStage(com.thinkbiganalytics.spark.metadata.ProfileStage) TransformResult(com.thinkbiganalytics.spark.model.TransformResult) FieldPolicy(com.thinkbiganalytics.policy.rest.model.FieldPolicy) PageSpec(com.thinkbiganalytics.spark.rest.model.PageSpec) ValidationStage(com.thinkbiganalytics.spark.metadata.ValidationStage) Nonnull(javax.annotation.Nonnull)

Example 3 with TransformResult

use of com.thinkbiganalytics.spark.model.TransformResult in project kylo by Teradata.

the class ShellTransformStage method get.

@Override
public TransformResult get() {
    final TransformResult result = new TransformResult();
    result.setDataSet(dataSet.persist(StorageLevel.MEMORY_ONLY()));
    result.setColumns(Arrays.<QueryResultColumn>asList(new QueryResultRowTransform(result.getDataSet().schema(), "", converterService).columns()));
    return result;
}
Also used : TransformResult(com.thinkbiganalytics.spark.model.TransformResult)

Example 4 with TransformResult

use of com.thinkbiganalytics.spark.model.TransformResult in project kylo by Teradata.

the class SqlTransformStage method extractData.

@Override
public TransformResult extractData(@Nonnull final ResultSet rs) throws SQLException {
    final ResultSetMetaData metaData = rs.getMetaData();
    final TransformResult result = new TransformResult();
    final StructType schema = extractSchema(metaData, result);
    // Create data set
    final Function0<Connection> getConnection = ScalaUtil.wrap(Suppliers.compose(JdbcUtil.getDataSourceConnection(), dataSource));
    final Function1<ResultSet, Row> mapRow = ScalaUtil.wrap(new RowTransform());
    // noinspection RedundantCast,unchecked
    final ClassTag<Row> classTag = (ClassTag) ClassTag$.MODULE$.apply(Row.class);
    final RDD<Row> rdd = new JdbcRDD<Row>(sqlContext.sparkContext(), getConnection, "SELECT * FROM (" + sql + ") rdd WHERE ? = ?", 1, 1, 1, mapRow, classTag);
    result.setDataSet(sparkContextService.toDataSet(sqlContext, rdd.toJavaRDD(), schema));
    return result;
}
Also used : TransformResult(com.thinkbiganalytics.spark.model.TransformResult) StructType(org.apache.spark.sql.types.StructType) Connection(java.sql.Connection) ResultSetMetaData(java.sql.ResultSetMetaData) JdbcRDD(org.apache.spark.rdd.JdbcRDD) RowTransform(com.thinkbiganalytics.spark.jdbc.RowTransform) ResultSet(java.sql.ResultSet) Row(org.apache.spark.sql.Row) ClassTag(scala.reflect.ClassTag)

Aggregations

TransformResult (com.thinkbiganalytics.spark.model.TransformResult)4 ProfileStage (com.thinkbiganalytics.spark.metadata.ProfileStage)2 ValidationStage (com.thinkbiganalytics.spark.metadata.ValidationStage)2 Nonnull (javax.annotation.Nonnull)2 FieldPolicy (com.thinkbiganalytics.policy.rest.model.FieldPolicy)1 RowTransform (com.thinkbiganalytics.spark.jdbc.RowTransform)1 ResponseStage (com.thinkbiganalytics.spark.metadata.ResponseStage)1 TransformJob (com.thinkbiganalytics.spark.metadata.TransformJob)1 PageSpec (com.thinkbiganalytics.spark.rest.model.PageSpec)1 TransformResponse (com.thinkbiganalytics.spark.rest.model.TransformResponse)1 Connection (java.sql.Connection)1 ResultSet (java.sql.ResultSet)1 ResultSetMetaData (java.sql.ResultSetMetaData)1 ExecutionException (java.util.concurrent.ExecutionException)1 TimeoutException (java.util.concurrent.TimeoutException)1 ScriptException (javax.script.ScriptException)1 JdbcRDD (org.apache.spark.rdd.JdbcRDD)1 Row (org.apache.spark.sql.Row)1 StructType (org.apache.spark.sql.types.StructType)1 ClassTag (scala.reflect.ClassTag)1