Search in sources :

Example 1 with SQLDatasetConsumer

use of io.cdap.cdap.etl.api.engine.sql.dataset.SQLDatasetConsumer in project cdap by caskdata.

the class BatchSQLEngineAdapter method pushInternal.

/**
 * Push implementation. This method has blocking calls and should be executed in a separate thread.
 *
 * @param datasetName name of the dataset to push.
 * @param schema      the record schema.
 * @param collection  the collection containing the records to push.
 * @return {@link SQLDataset} instance representing the pushed records.
 * @throws SQLEngineException if the push operation fails.
 */
@SuppressWarnings("unchecked")
public SQLDataset pushInternal(String datasetName, Schema schema, SparkCollection<?> collection) throws SQLEngineException {
    // Create push request
    SQLPushRequest pushRequest = new SQLPushRequest(datasetName, schema);
    // If so, we will process this request using a consumer.
    for (PushCapability capability : sqlEngine.getPushCapabilities()) {
        SQLDatasetConsumer consumer = sqlEngine.getConsumer(pushRequest, capability);
        // If a consumer is able to consume this request, we delegate the execution to the consumer.
        if (consumer != null) {
            StructType sparkSchema = DataFrames.toDataType(schema);
            JavaRDD<Row> rowRDD = ((JavaRDD<StructuredRecord>) collection.getUnderlying()).map(r -> DataFrames.toRow(r, sparkSchema));
            Dataset<Row> ds = sqlContext.createDataFrame(rowRDD, sparkSchema);
            RecordCollection recordCollection = new SparkRecordCollectionImpl(ds);
            return consumer.consume(recordCollection);
        }
    }
    // If no capabilities could be used to produce records, proceed using the Push Provider.
    SQLPushDataset<StructuredRecord, ?, ?> pushDataset = sqlEngine.getPushProvider(pushRequest);
    // Write records using the Push provider.
    JavaPairRDD<?, ?> pairRdd = ((JavaRDD) collection.getUnderlying()).flatMapToPair(new TransformToPairFunction<>(pushDataset.toKeyValue()));
    RDDUtils.saveUsingOutputFormat(pushDataset, pairRdd);
    return pushDataset;
}
Also used : StructType(org.apache.spark.sql.types.StructType) SQLPushRequest(io.cdap.cdap.etl.api.engine.sql.request.SQLPushRequest) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) JavaRDD(org.apache.spark.api.java.JavaRDD) PushCapability(io.cdap.cdap.etl.api.engine.sql.capability.PushCapability) SQLDatasetConsumer(io.cdap.cdap.etl.api.engine.sql.dataset.SQLDatasetConsumer) RecordCollection(io.cdap.cdap.etl.api.engine.sql.dataset.RecordCollection) SparkRecordCollection(io.cdap.cdap.etl.api.sql.engine.dataset.SparkRecordCollection) SparkRecordCollectionImpl(io.cdap.cdap.etl.api.sql.engine.dataset.SparkRecordCollectionImpl) Row(org.apache.spark.sql.Row)

Aggregations

StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)1 PushCapability (io.cdap.cdap.etl.api.engine.sql.capability.PushCapability)1 RecordCollection (io.cdap.cdap.etl.api.engine.sql.dataset.RecordCollection)1 SQLDatasetConsumer (io.cdap.cdap.etl.api.engine.sql.dataset.SQLDatasetConsumer)1 SQLPushRequest (io.cdap.cdap.etl.api.engine.sql.request.SQLPushRequest)1 SparkRecordCollection (io.cdap.cdap.etl.api.sql.engine.dataset.SparkRecordCollection)1 SparkRecordCollectionImpl (io.cdap.cdap.etl.api.sql.engine.dataset.SparkRecordCollectionImpl)1 JavaRDD (org.apache.spark.api.java.JavaRDD)1 Row (org.apache.spark.sql.Row)1 StructType (org.apache.spark.sql.types.StructType)1