use of io.cdap.cdap.etl.api.engine.sql.capability.PushCapability in project cdap by caskdata.
the class BatchSQLEngineAdapter method pushInternal.
/**
* Push implementation. This method has blocking calls and should be executed in a separate thread.
*
* @param datasetName name of the dataset to push.
* @param schema the record schema.
* @param collection the collection containing the records to push.
* @return {@link SQLDataset} instance representing the pushed records.
* @throws SQLEngineException if the push operation fails.
*/
@SuppressWarnings("unchecked")
public SQLDataset pushInternal(String datasetName, Schema schema, SparkCollection<?> collection) throws SQLEngineException {
// Create push request
SQLPushRequest pushRequest = new SQLPushRequest(datasetName, schema);
// If so, we will process this request using a consumer.
for (PushCapability capability : sqlEngine.getPushCapabilities()) {
SQLDatasetConsumer consumer = sqlEngine.getConsumer(pushRequest, capability);
// If a consumer is able to consume this request, we delegate the execution to the consumer.
if (consumer != null) {
StructType sparkSchema = DataFrames.toDataType(schema);
JavaRDD<Row> rowRDD = ((JavaRDD<StructuredRecord>) collection.getUnderlying()).map(r -> DataFrames.toRow(r, sparkSchema));
Dataset<Row> ds = sqlContext.createDataFrame(rowRDD, sparkSchema);
RecordCollection recordCollection = new SparkRecordCollectionImpl(ds);
return consumer.consume(recordCollection);
}
}
// If no capabilities could be used to produce records, proceed using the Push Provider.
SQLPushDataset<StructuredRecord, ?, ?> pushDataset = sqlEngine.getPushProvider(pushRequest);
// Write records using the Push provider.
JavaPairRDD<?, ?> pairRdd = ((JavaRDD) collection.getUnderlying()).flatMapToPair(new TransformToPairFunction<>(pushDataset.toKeyValue()));
RDDUtils.saveUsingOutputFormat(pushDataset, pairRdd);
return pushDataset;
}
Aggregations