use of alluxio.table.common.transform.TransformPlan in project alluxio by Alluxio.
the class Table method getTransformPlans.
/**
* Returns a list of plans to transform the table, according to the transformation definition.
*
* @param definition the transformation definition
* @return a list of {@link TransformPlan} to transform this table
*/
public List<TransformPlan> getTransformPlans(TransformDefinition definition) throws IOException {
List<TransformPlan> plans = new ArrayList<>(getPartitions().size());
for (Partition partition : getPartitions()) {
if (!partition.isTransformed(definition.getDefinition())) {
TransformContext transformContext = new TransformContext(mDatabase.getName(), mName, partition.getSpec());
plans.add(partition.getTransformPlan(transformContext, definition));
}
}
return plans;
}
use of alluxio.table.common.transform.TransformPlan in project alluxio by Alluxio.
the class HiveLayout method getTransformPlan.
@Override
public TransformPlan getTransformPlan(TransformContext transformContext, TransformDefinition definition) throws IOException {
AlluxioURI outputPath = transformContext.generateTransformedPath();
AlluxioURI outputUri = new AlluxioURI(ConfigurationUtils.getSchemeAuthority(ServerConfiguration.global()) + outputPath.getPath());
HiveLayout transformedLayout = transformLayout(outputUri, definition);
return new TransformPlan(this, transformedLayout, definition);
}
use of alluxio.table.common.transform.TransformPlan in project alluxio by Alluxio.
the class TransformManager method execute.
/**
* Executes the plans for the table transformation.
*
* This method executes a transformation job with type{@link CompositeConfig},
* the transformation job concurrently executes the plans,
* each plan has a list of jobs to be executed sequentially.
*
* This method triggers the execution of the transformation job asynchronously without waiting
* for it to finish. The returned job ID can be used to poll the job service for the status of
* this transformation.
*
* @param dbName the database name
* @param tableName the table name
* @param definition the parsed transformation definition
* @return the job ID for the transformation job
* @throws IOException when there is an ongoing transformation on the table, or the transformation
* job fails to be started, or all partitions of the table have been transformed with the same
* definition
*/
public long execute(String dbName, String tableName, TransformDefinition definition) throws IOException {
List<TransformPlan> plans = mCatalog.getTransformPlan(dbName, tableName, definition);
if (plans.isEmpty()) {
throw new IOException(ExceptionMessage.TABLE_ALREADY_TRANSFORMED.getMessage(dbName, tableName, definition.getDefinition()));
}
Pair<String, String> dbTable = new Pair<>(dbName, tableName);
// Atomically try to acquire the permit to execute the transformation job.
// This PUT does not need to be journaled, because if this PUT succeeds and master crashes,
// when master restarts, this temporary placeholder entry will not exist, which is correct
// behavior.
Long existingJobId = mState.acquireJobPermit(dbTable);
if (existingJobId != null) {
if (existingJobId == INVALID_JOB_ID) {
throw new IOException("A concurrent transformation request is going to be executed");
} else {
throw new IOException(ExceptionMessage.TABLE_BEING_TRANSFORMED.getMessage(existingJobId.toString(), tableName, dbName));
}
}
ArrayList<JobConfig> concurrentJobs = new ArrayList<>(plans.size());
for (TransformPlan plan : plans) {
concurrentJobs.add(new CompositeConfig(plan.getJobConfigs(), true));
}
CompositeConfig transformJob = new CompositeConfig(concurrentJobs, false);
long jobId;
try {
jobId = mJobMasterClient.run(transformJob);
} catch (IOException e) {
// The job fails to start, clear the acquired permit for execution.
// No need to journal this REMOVE, if master crashes, when it restarts, the permit placeholder
// entry will not exist any more, which is correct behavior.
mState.releaseJobPermit(dbTable);
String error = String.format("Fails to start job to transform table %s in database %s", tableName, dbName);
LOG.error(error, e);
throw new IOException(error, e);
}
Map<String, Layout> transformedLayouts = new HashMap<>(plans.size());
for (TransformPlan plan : plans) {
transformedLayouts.put(plan.getBaseLayout().getSpec(), plan.getTransformedLayout());
}
AddTransformJobInfoEntry journalEntry = AddTransformJobInfoEntry.newBuilder().setDbName(dbName).setTableName(tableName).setDefinition(definition.getDefinition()).setJobId(jobId).putAllTransformedLayouts(Maps.transformValues(transformedLayouts, Layout::toProto)).build();
try (JournalContext journalContext = mCreateJournalContext.apply()) {
applyAndJournal(journalContext, Journal.JournalEntry.newBuilder().setAddTransformJobInfo(journalEntry).build());
}
return jobId;
}
use of alluxio.table.common.transform.TransformPlan in project alluxio by Alluxio.
the class AlluxioCatalogTest method completeTransformTable.
@Test
public void completeTransformTable() throws IOException {
String dbName = "testdb";
TestDatabase.genTable(1, 10, false);
mCatalog.attachDatabase(NoopJournalContext.INSTANCE, TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, Collections.emptyMap(), false);
String tableName = TestDatabase.getTableName(0);
Table table = mCatalog.getTable(dbName, tableName);
table.getPartitions().forEach(partition -> assertFalse(partition.isTransformed(TRANSFORM_DEFINITION.getDefinition())));
// When generating transform plan, the authority of the output path
// will be determined based on this hostname configuration.
ServerConfiguration.set(PropertyKey.MASTER_HOSTNAME, "localhost");
List<TransformPlan> plans = mCatalog.getTransformPlan(dbName, tableName, TRANSFORM_DEFINITION);
Map<String, Layout> transformedLayouts = Maps.newHashMapWithExpectedSize(plans.size());
plans.forEach(plan -> transformedLayouts.put(plan.getBaseLayout().getSpec(), plan.getTransformedLayout()));
mCatalog.completeTransformTable(NoopJournalContext.INSTANCE, dbName, tableName, TRANSFORM_DEFINITION.getDefinition(), transformedLayouts);
table.getPartitions().forEach(partition -> {
assertTrue(partition.isTransformed(TRANSFORM_DEFINITION.getDefinition()));
assertEquals(transformedLayouts.get(partition.getSpec()), partition.getLayout());
});
}
use of alluxio.table.common.transform.TransformPlan in project alluxio by Alluxio.
the class AlluxioCatalogTest method getTransformPlan.
@Test
public void getTransformPlan() throws Exception {
String dbName = "testdb";
TestDatabase.genTable(1, 1, false);
mCatalog.attachDatabase(NoopJournalContext.INSTANCE, TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, Collections.emptyMap(), false);
assertEquals(1, mCatalog.getAllDatabases().size());
assertEquals(1, mCatalog.getAllTables(dbName).size());
String tableName = TestDatabase.getTableName(0);
// When generating transform plan, the authority of the output path
// will be determined based on this hostname configuration.
ServerConfiguration.set(PropertyKey.MASTER_HOSTNAME, "localhost");
List<TransformPlan> plans = mCatalog.getTransformPlan(dbName, tableName, TRANSFORM_DEFINITION);
assertEquals(1, plans.size());
Table table = mCatalog.getTable(dbName, tableName);
assertEquals(1, table.getPartitions().size());
assertEquals(table.getPartitions().get(0).getLayout(), plans.get(0).getBaseLayout());
}
Aggregations