Search in sources :

Example 1 with TransformPlan

use of alluxio.table.common.transform.TransformPlan in project alluxio by Alluxio.

the class Table method getTransformPlans.

/**
 * Returns a list of plans to transform the table, according to the transformation definition.
 *
 * @param definition the transformation definition
 * @return a list of {@link TransformPlan} to transform this table
 */
public List<TransformPlan> getTransformPlans(TransformDefinition definition) throws IOException {
    List<TransformPlan> plans = new ArrayList<>(getPartitions().size());
    for (Partition partition : getPartitions()) {
        if (!partition.isTransformed(definition.getDefinition())) {
            TransformContext transformContext = new TransformContext(mDatabase.getName(), mName, partition.getSpec());
            plans.add(partition.getTransformPlan(transformContext, definition));
        }
    }
    return plans;
}
Also used : UdbPartition(alluxio.table.common.UdbPartition) TransformContext(alluxio.table.common.transform.TransformContext) ArrayList(java.util.ArrayList) TransformPlan(alluxio.table.common.transform.TransformPlan)

Example 2 with TransformPlan

use of alluxio.table.common.transform.TransformPlan in project alluxio by Alluxio.

the class HiveLayout method getTransformPlan.

@Override
public TransformPlan getTransformPlan(TransformContext transformContext, TransformDefinition definition) throws IOException {
    AlluxioURI outputPath = transformContext.generateTransformedPath();
    AlluxioURI outputUri = new AlluxioURI(ConfigurationUtils.getSchemeAuthority(ServerConfiguration.global()) + outputPath.getPath());
    HiveLayout transformedLayout = transformLayout(outputUri, definition);
    return new TransformPlan(this, transformedLayout, definition);
}
Also used : TransformPlan(alluxio.table.common.transform.TransformPlan) AlluxioURI(alluxio.AlluxioURI)

Example 3 with TransformPlan

use of alluxio.table.common.transform.TransformPlan in project alluxio by Alluxio.

the class TransformManager method execute.

/**
 * Executes the plans for the table transformation.
 *
 * This method executes a transformation job with type{@link CompositeConfig},
 * the transformation job concurrently executes the plans,
 * each plan has a list of jobs to be executed sequentially.
 *
 * This method triggers the execution of the transformation job asynchronously without waiting
 * for it to finish. The returned job ID can be used to poll the job service for the status of
 * this transformation.
 *
 * @param dbName the database name
 * @param tableName the table name
 * @param definition the parsed transformation definition
 * @return the job ID for the transformation job
 * @throws IOException when there is an ongoing transformation on the table, or the transformation
 *    job fails to be started, or all partitions of the table have been transformed with the same
 *    definition
 */
public long execute(String dbName, String tableName, TransformDefinition definition) throws IOException {
    List<TransformPlan> plans = mCatalog.getTransformPlan(dbName, tableName, definition);
    if (plans.isEmpty()) {
        throw new IOException(ExceptionMessage.TABLE_ALREADY_TRANSFORMED.getMessage(dbName, tableName, definition.getDefinition()));
    }
    Pair<String, String> dbTable = new Pair<>(dbName, tableName);
    // Atomically try to acquire the permit to execute the transformation job.
    // This PUT does not need to be journaled, because if this PUT succeeds and master crashes,
    // when master restarts, this temporary placeholder entry will not exist, which is correct
    // behavior.
    Long existingJobId = mState.acquireJobPermit(dbTable);
    if (existingJobId != null) {
        if (existingJobId == INVALID_JOB_ID) {
            throw new IOException("A concurrent transformation request is going to be executed");
        } else {
            throw new IOException(ExceptionMessage.TABLE_BEING_TRANSFORMED.getMessage(existingJobId.toString(), tableName, dbName));
        }
    }
    ArrayList<JobConfig> concurrentJobs = new ArrayList<>(plans.size());
    for (TransformPlan plan : plans) {
        concurrentJobs.add(new CompositeConfig(plan.getJobConfigs(), true));
    }
    CompositeConfig transformJob = new CompositeConfig(concurrentJobs, false);
    long jobId;
    try {
        jobId = mJobMasterClient.run(transformJob);
    } catch (IOException e) {
        // The job fails to start, clear the acquired permit for execution.
        // No need to journal this REMOVE, if master crashes, when it restarts, the permit placeholder
        // entry will not exist any more, which is correct behavior.
        mState.releaseJobPermit(dbTable);
        String error = String.format("Fails to start job to transform table %s in database %s", tableName, dbName);
        LOG.error(error, e);
        throw new IOException(error, e);
    }
    Map<String, Layout> transformedLayouts = new HashMap<>(plans.size());
    for (TransformPlan plan : plans) {
        transformedLayouts.put(plan.getBaseLayout().getSpec(), plan.getTransformedLayout());
    }
    AddTransformJobInfoEntry journalEntry = AddTransformJobInfoEntry.newBuilder().setDbName(dbName).setTableName(tableName).setDefinition(definition.getDefinition()).setJobId(jobId).putAllTransformedLayouts(Maps.transformValues(transformedLayouts, Layout::toProto)).build();
    try (JournalContext journalContext = mCreateJournalContext.apply()) {
        applyAndJournal(journalContext, Journal.JournalEntry.newBuilder().setAddTransformJobInfo(journalEntry).build());
    }
    return jobId;
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) JournalContext(alluxio.master.journal.JournalContext) ArrayList(java.util.ArrayList) IOException(java.io.IOException) JobConfig(alluxio.job.JobConfig) Layout(alluxio.table.common.Layout) AddTransformJobInfoEntry(alluxio.proto.journal.Table.AddTransformJobInfoEntry) TransformPlan(alluxio.table.common.transform.TransformPlan) CompositeConfig(alluxio.job.workflow.composite.CompositeConfig) Pair(alluxio.collections.Pair)

Example 4 with TransformPlan

use of alluxio.table.common.transform.TransformPlan in project alluxio by Alluxio.

the class AlluxioCatalogTest method completeTransformTable.

@Test
public void completeTransformTable() throws IOException {
    String dbName = "testdb";
    TestDatabase.genTable(1, 10, false);
    mCatalog.attachDatabase(NoopJournalContext.INSTANCE, TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, Collections.emptyMap(), false);
    String tableName = TestDatabase.getTableName(0);
    Table table = mCatalog.getTable(dbName, tableName);
    table.getPartitions().forEach(partition -> assertFalse(partition.isTransformed(TRANSFORM_DEFINITION.getDefinition())));
    // When generating transform plan, the authority of the output path
    // will be determined based on this hostname configuration.
    ServerConfiguration.set(PropertyKey.MASTER_HOSTNAME, "localhost");
    List<TransformPlan> plans = mCatalog.getTransformPlan(dbName, tableName, TRANSFORM_DEFINITION);
    Map<String, Layout> transformedLayouts = Maps.newHashMapWithExpectedSize(plans.size());
    plans.forEach(plan -> transformedLayouts.put(plan.getBaseLayout().getSpec(), plan.getTransformedLayout()));
    mCatalog.completeTransformTable(NoopJournalContext.INSTANCE, dbName, tableName, TRANSFORM_DEFINITION.getDefinition(), transformedLayouts);
    table.getPartitions().forEach(partition -> {
        assertTrue(partition.isTransformed(TRANSFORM_DEFINITION.getDefinition()));
        assertEquals(transformedLayouts.get(partition.getSpec()), partition.getLayout());
    });
}
Also used : UdbTable(alluxio.table.common.udb.UdbTable) Layout(alluxio.table.common.Layout) HiveLayout(alluxio.table.common.layout.HiveLayout) TransformPlan(alluxio.table.common.transform.TransformPlan) Test(org.junit.Test)

Example 5 with TransformPlan

use of alluxio.table.common.transform.TransformPlan in project alluxio by Alluxio.

the class AlluxioCatalogTest method getTransformPlan.

@Test
public void getTransformPlan() throws Exception {
    String dbName = "testdb";
    TestDatabase.genTable(1, 1, false);
    mCatalog.attachDatabase(NoopJournalContext.INSTANCE, TestUdbFactory.TYPE, "connect_URI", TestDatabase.TEST_UDB_NAME, dbName, Collections.emptyMap(), false);
    assertEquals(1, mCatalog.getAllDatabases().size());
    assertEquals(1, mCatalog.getAllTables(dbName).size());
    String tableName = TestDatabase.getTableName(0);
    // When generating transform plan, the authority of the output path
    // will be determined based on this hostname configuration.
    ServerConfiguration.set(PropertyKey.MASTER_HOSTNAME, "localhost");
    List<TransformPlan> plans = mCatalog.getTransformPlan(dbName, tableName, TRANSFORM_DEFINITION);
    assertEquals(1, plans.size());
    Table table = mCatalog.getTable(dbName, tableName);
    assertEquals(1, table.getPartitions().size());
    assertEquals(table.getPartitions().get(0).getLayout(), plans.get(0).getBaseLayout());
}
Also used : UdbTable(alluxio.table.common.udb.UdbTable) TransformPlan(alluxio.table.common.transform.TransformPlan) Test(org.junit.Test)

Aggregations

TransformPlan (alluxio.table.common.transform.TransformPlan)7 Test (org.junit.Test)4 UdbTable (alluxio.table.common.udb.UdbTable)3 Layout (alluxio.table.common.Layout)2 ArrayList (java.util.ArrayList)2 AlluxioURI (alluxio.AlluxioURI)1 Pair (alluxio.collections.Pair)1 JobConfig (alluxio.job.JobConfig)1 CompositeConfig (alluxio.job.workflow.composite.CompositeConfig)1 JournalContext (alluxio.master.journal.JournalContext)1 AddTransformJobInfoEntry (alluxio.proto.journal.Table.AddTransformJobInfoEntry)1 UdbPartition (alluxio.table.common.UdbPartition)1 HiveLayout (alluxio.table.common.layout.HiveLayout)1 TransformContext (alluxio.table.common.transform.TransformContext)1 TransformDefinition (alluxio.table.common.transform.TransformDefinition)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1