Search in sources :

Example 21 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class MapredCarbonOutputCommitter method setupJob.

@Override
public void setupJob(JobContext jobContext) throws IOException {
    Random random = new Random();
    JobID jobId = new JobID(UUID.randomUUID().toString(), 0);
    TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt());
    TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt());
    org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl context = new TaskAttemptContextImpl(jobContext.getJobConf(), attemptID);
    CarbonLoadModel carbonLoadModel = null;
    String encodedString = jobContext.getJobConf().get(CarbonTableOutputFormat.LOAD_MODEL);
    // org.apache.carbondata.presto.impl.CarbonTableConfig.CARBON_PRESTO_LOAD_MODEL
    if (encodedString != null) {
        carbonLoadModel = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
    }
    boolean setLoadModelToEnv = false;
    if (null == carbonLoadModel) {
        ThreadLocalSessionInfo.setConfigurationToCurrentThread(jobContext.getConfiguration());
        carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(jobContext.getConfiguration());
        CarbonTableOutputFormat.setLoadModel(jobContext.getConfiguration(), carbonLoadModel);
        setLoadModelToEnv = true;
    }
    carbonOutputCommitter = new CarbonOutputCommitter(new Path(carbonLoadModel.getTablePath()), context);
    carbonOutputCommitter.setupJob(jobContext);
    if (setLoadModelToEnv) {
        String loadModelStr = jobContext.getConfiguration().get(CarbonTableOutputFormat.LOAD_MODEL);
        String mapReduceMapTaskEnv = jobContext.getJobConf().get(JobConf.MAPRED_MAP_TASK_ENV);
        // Set the loadModel string to mapreduce.map.env so that it will be published to all
        // containers later during job execution.
        jobContext.getJobConf().set(JobConf.MAPRED_MAP_TASK_ENV, mapReduceMapTaskEnv + ",carbon=" + loadModelStr);
        jobContext.getJobConf().set(JobConf.MAPRED_REDUCE_TASK_ENV, mapReduceMapTaskEnv + ",carbon=" + loadModelStr);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TaskID(org.apache.hadoop.mapreduce.TaskID) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) CarbonOutputCommitter(org.apache.carbondata.hadoop.api.CarbonOutputCommitter) Random(java.util.Random) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) JobID(org.apache.hadoop.mapreduce.JobID)

Aggregations

CarbonLoadModel (org.apache.carbondata.processing.loading.model.CarbonLoadModel)21 IOException (java.io.IOException)9 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)6 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)4 SegmentFileStore (org.apache.carbondata.core.metadata.SegmentFileStore)3 DataLoadMetrics (org.apache.carbondata.core.util.DataLoadMetrics)3 CarbonDataLoadSchema (org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Map (java.util.Map)2 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)2 CarbonProperties (org.apache.carbondata.core.util.CarbonProperties)2 OperationContext (org.apache.carbondata.events.OperationContext)2 Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 HiveInsertTableHandle (io.prestosql.plugin.hive.HiveInsertTableHandle)1 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)1 Table (io.prestosql.plugin.hive.metastore.Table)1 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)1