use of io.cdap.cdap.etl.engine.SQLEngineJobKey in project cdap by caskdata.
the class BatchSQLEngineAdapter method pull.
/**
* Creates a new job to pull a Spark Collection from the SQL engine
*
* @param job the job representing the compute stage for the dataset we need to pull.
* @return Job representing this pull operation.
*/
@SuppressWarnings("unchecked,raw")
public <T> SQLEngineJob<JavaRDD<T>> pull(SQLEngineJob<SQLDataset> job) {
// If this job already exists, return the existing instance.
SQLEngineJobKey jobKey = new SQLEngineJobKey(job.getDatasetName(), SQLEngineJobType.PULL);
if (jobs.containsKey(jobKey)) {
return (SQLEngineJob<JavaRDD<T>>) jobs.get(jobKey);
}
CompletableFuture<JavaRDD<T>> future = new CompletableFuture<>();
Runnable pullTask = () -> {
try {
LOG.debug("Starting pull for dataset '{}'", job.getDatasetName());
waitForJobAndThrowException(job);
JavaRDD<T> result = pullInternal(job.waitFor());
LOG.debug("Completed pull for dataset '{}'", job.getDatasetName());
future.complete(result);
} catch (Throwable t) {
future.completeExceptionally(t);
}
};
executorService.submit(pullTask);
SQLEngineJob<JavaRDD<T>> pullJob = new SQLEngineJob<>(jobKey, future);
jobs.put(jobKey, pullJob);
return pullJob;
}
use of io.cdap.cdap.etl.engine.SQLEngineJobKey in project cdap by caskdata.
the class BatchSQLEngineAdapter method push.
/**
* Creates a new job tu push a SparkCollection into the SQL engine.
*
* @param datasetName the name of the dataset to push
* @param schema the schema for this dataset
* @param collection the Spark collection containing the dataset to push
* @return Job representing this Push operation.
*/
@SuppressWarnings("unchecked,raw")
protected SQLEngineJob<SQLDataset> push(String datasetName, Schema schema, SparkCollection<?> collection) {
// If this job already exists, return the existing instance.
SQLEngineJobKey jobKey = new SQLEngineJobKey(datasetName, SQLEngineJobType.PUSH);
if (jobs.containsKey(jobKey)) {
return (SQLEngineJob<SQLDataset>) jobs.get(jobKey);
}
CompletableFuture<SQLDataset> future = new CompletableFuture<>();
Runnable pushTask = () -> {
try {
LOG.debug("Starting push for dataset '{}'", datasetName);
SQLDataset result = pushInternal(datasetName, schema, collection);
LOG.debug("Completed push for dataset '{}'", datasetName);
future.complete(result);
} catch (Throwable t) {
future.completeExceptionally(t);
}
};
executorService.submit(pushTask);
SQLEngineJob<SQLDataset> job = new SQLEngineJob<>(jobKey, future);
jobs.put(jobKey, job);
return job;
}
use of io.cdap.cdap.etl.engine.SQLEngineJobKey in project cdap by caskdata.
the class BatchSQLEngineAdapter method getDatasetForStage.
/**
* Function used to fetch the dataset for an input stage.
*
* @param stageName
* @return
*/
private SQLDataset getDatasetForStage(String stageName) {
// Wait for the previous push or execute jobs to complete
SQLEngineJobKey pushJobKey = new SQLEngineJobKey(stageName, SQLEngineJobType.PUSH);
SQLEngineJobKey execJobKey = new SQLEngineJobKey(stageName, SQLEngineJobType.EXECUTE);
if (jobs.containsKey(pushJobKey)) {
SQLEngineJob<SQLDataset> job = (SQLEngineJob<SQLDataset>) jobs.get(pushJobKey);
waitForJobAndThrowException(job);
return job.waitFor();
} else if (jobs.containsKey(execJobKey)) {
SQLEngineJob<SQLDataset> job = (SQLEngineJob<SQLDataset>) jobs.get(execJobKey);
waitForJobAndThrowException(job);
return job.waitFor();
} else {
throw new IllegalArgumentException("No SQL Engine job exists for stage " + stageName);
}
}
use of io.cdap.cdap.etl.engine.SQLEngineJobKey in project cdap by caskdata.
the class BatchSQLEngineAdapter method exists.
/**
* Check if a collection exists in the SQL Engine.
* <p>
* If there are already jobs in place to either push or compute this dataset, we will assume that this job exists on
* the engine.
* <p>
* Otherwise, we delegate to the SQL engine.
*
* @param datasetName the name of the dataset to verify
* @return boolean detailing if the collection exists or not.
*/
public boolean exists(String datasetName) {
SQLEngineJobKey joinStagePushKey = new SQLEngineJobKey(datasetName, SQLEngineJobType.PUSH);
if (jobs.containsKey(joinStagePushKey)) {
return true;
}
SQLEngineJobKey joinStageExecKey = new SQLEngineJobKey(datasetName, SQLEngineJobType.EXECUTE);
if (jobs.containsKey(joinStageExecKey)) {
return true;
}
return false;
}
Aggregations