Search in sources :

Example 16 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class CarbonDataMetaData method beginInsert.

@Override
public CarbonDataInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle) {
    HiveInsertTableHandle hiveInsertTableHandle = super.beginInsert(session, tableHandle);
    SchemaTableName tableName = hiveInsertTableHandle.getSchemaTableName();
    Optional<Table> table = this.metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName());
    Path outputPath = new Path(hiveInsertTableHandle.getLocationHandle().getJsonSerializableTargetPath());
    JobConf jobConf = ConfigurationUtils.toJobConf(this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, hiveInsertTableHandle.getSchemaName(), hiveInsertTableHandle.getTableName()), new Path(hiveInsertTableHandle.getLocationHandle().getJsonSerializableWritePath())));
    jobConf.set("location", outputPath.toString());
    Properties hiveSchema = MetastoreUtil.getHiveSchema(table.get());
    try {
        CarbonLoadModel carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(hiveSchema, jobConf);
        CarbonTableOutputFormat.setLoadModel(jobConf, carbonLoadModel);
    } catch (IOException ex) {
        LOG.error("Error while creating carbon load model", ex);
        throw new RuntimeException(ex);
    }
    try {
        carbonOutputCommitter = new MapredCarbonOutputCommitter();
        jobContext = new JobContextImpl(jobConf, new JobID());
        carbonOutputCommitter.setupJob(jobContext);
        ThreadLocalSessionInfo.setConfigurationToCurrentThread(jobConf);
    } catch (IOException e) {
        LOG.error("error setting the output committer", e);
        throw new RuntimeException("error setting the output committer");
    }
    return new CarbonDataInsertTableHandle(hiveInsertTableHandle.getSchemaTableName().getSchemaName(), hiveInsertTableHandle.getTableName(), hiveInsertTableHandle.getInputColumns(), hiveInsertTableHandle.getPageSinkMetadata(), hiveInsertTableHandle.getLocationHandle(), hiveInsertTableHandle.getBucketProperty(), hiveInsertTableHandle.getTableStorageFormat(), hiveInsertTableHandle.getPartitionStorageFormat(), ImmutableMap.of(CarbonTableConfig.CARBON_PRESTO_LOAD_MODEL, jobContext.getConfiguration().get(CarbonTableOutputFormat.LOAD_MODEL)));
}
Also used : Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) Table(io.prestosql.plugin.hive.metastore.Table) IOException(java.io.IOException) Properties(java.util.Properties) MapredCarbonOutputCommitter(org.apache.carbondata.hive.MapredCarbonOutputCommitter) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) HiveInsertTableHandle(io.prestosql.plugin.hive.HiveInsertTableHandle) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapred.JobID)

Example 17 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class CarbonWriterBuilder method build.

/**
 * Build a {@link CarbonWriter}
 * This writer is not thread safe,
 * use withThreadSafe() configuration in multi thread environment
 *
 * @return CarbonWriter {AvroCarbonWriter/CSVCarbonWriter/JsonCarbonWriter based on Input Type }
 * @throws IOException
 * @throws InvalidLoadOptionException
 */
public CarbonWriter build() throws IOException, InvalidLoadOptionException {
    Objects.requireNonNull(path, "path should not be null");
    if (this.writerType == null) {
        throw new RuntimeException("'writerType' must be set, use withCsvInput() or withAvroInput() or withJsonInput()  " + "API based on input");
    }
    if (this.writtenByApp == null || this.writtenByApp.isEmpty()) {
        throw new RuntimeException("'writtenBy' must be set when writing carbon files, use writtenBy() API to " + "set it, it can be the name of the application which is using the SDK");
    }
    if (this.schema == null) {
        throw new RuntimeException("schema should be set");
    }
    if (taskNo == null) {
        taskNo = UUID.randomUUID().toString().replace("-", "");
    }
    CarbonLoadModel loadModel = buildLoadModel(schema);
    loadModel.setSdkWriterCores(numOfThreads);
    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, writtenByApp);
    if (hadoopConf == null) {
        hadoopConf = new Configuration(FileFactory.getConfiguration());
    }
    CarbonWriter carbonWriter;
    if (this.writerType == WRITER_TYPE.AVRO) {
        // AVRO records are pushed to Carbon as Object not as Strings. This was done in order to
        // handle multi level complex type support. As there are no conversion converter step is
        // removed from the load. LoadWithoutConverter flag is going to point to the Loader Builder
        // which will skip Conversion Step.
        loadModel.setLoadWithoutConverterStep(true);
        carbonWriter = new AvroCarbonWriter(loadModel, hadoopConf, this.avroSchema);
    } else if (this.writerType == WRITER_TYPE.JSON) {
        loadModel.setJsonFileLoad(true);
        carbonWriter = new JsonCarbonWriter(loadModel, hadoopConf);
    } else if (this.writerType == WRITER_TYPE.PARQUET) {
        loadModel.setLoadWithoutConverterStep(true);
        carbonWriter = new ParquetCarbonWriter(loadModel, hadoopConf, this.avroSchema);
    } else if (this.writerType == WRITER_TYPE.ORC) {
        carbonWriter = new ORCCarbonWriter(loadModel, hadoopConf);
    } else {
        // CSV
        CSVCarbonWriter csvCarbonWriter = new CSVCarbonWriter(loadModel, hadoopConf);
        if (!StringUtils.isEmpty(filePath) && !this.options.containsKey(CarbonCommonConstants.FILE_HEADER)) {
            csvCarbonWriter.setSkipHeader(true);
        }
        carbonWriter = csvCarbonWriter;
    }
    if (!StringUtils.isEmpty(this.filePath)) {
        carbonWriter.setDataFiles(this.dataFiles);
    }
    return carbonWriter;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel)

Example 18 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class CarbonWriterBuilder method buildLoadModel.

/**
 * Build a {@link CarbonLoadModel}
 */
private CarbonLoadModel buildLoadModel(CarbonTable table, long timestamp, String taskNo, Map<String, String> options) throws InvalidLoadOptionException, IOException {
    if (options == null) {
        options = new HashMap<>();
    }
    CarbonLoadModelBuilder builder = new CarbonLoadModelBuilder(table);
    CarbonLoadModel model = builder.build(options, timestamp, taskNo);
    setCsvHeader(model);
    return model;
}
Also used : CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) CarbonLoadModelBuilder(org.apache.carbondata.processing.loading.model.CarbonLoadModelBuilder)

Example 19 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class CarbonStreamOutputFormatTest method testGetCarbonLoadModel.

@Test
public void testGetCarbonLoadModel() {
    try {
        CarbonStreamOutputFormat.setCarbonLoadModel(hadoopConf, carbonLoadModel);
        CarbonLoadModel model = CarbonStreamOutputFormat.getCarbonLoadModel(hadoopConf);
        Assert.assertNotNull("Failed to get CarbonLoadModel", model);
        Assert.assertEquals("CarbonLoadModel should be same with previous", carbonLoadModel.getFactTimeStamp(), model.getFactTimeStamp());
    } catch (IOException e) {
        Assert.fail("Failed to get CarbonLoadModel for CarbonStreamOutputFormat");
    }
}
Also used : CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) IOException(java.io.IOException) Test(org.junit.Test)

Example 20 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class MapredCarbonOutputCommitter method commitJob.

@Override
public void commitJob(JobContext jobContext) throws IOException {
    try {
        Configuration configuration = jobContext.getConfiguration();
        CarbonLoadModel carbonLoadModel = MapredCarbonOutputFormat.getLoadModel(configuration);
        ThreadLocalSessionInfo.unsetAll();
        CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
        new CarbonIndexFileMergeWriter(carbonTable).mergeCarbonIndexFilesOfSegment(carbonLoadModel.getSegmentId(), carbonTable.getTablePath(), false, String.valueOf(carbonLoadModel.getFactTimeStamp()));
        SegmentFileStore.writeSegmentFile(carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable(), carbonLoadModel.getSegmentId(), String.valueOf(carbonLoadModel.getFactTimeStamp()));
        CarbonTableOutputFormat.setLoadModel(configuration, carbonLoadModel);
        carbonOutputCommitter.commitJob(jobContext);
    } catch (Exception e) {
        LOGGER.error(e);
        throw e;
    }
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonIndexFileMergeWriter(org.apache.carbondata.core.writer.CarbonIndexFileMergeWriter) Configuration(org.apache.hadoop.conf.Configuration) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) IOException(java.io.IOException)

Aggregations

CarbonLoadModel (org.apache.carbondata.processing.loading.model.CarbonLoadModel)21 IOException (java.io.IOException)9 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)6 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)4 SegmentFileStore (org.apache.carbondata.core.metadata.SegmentFileStore)3 DataLoadMetrics (org.apache.carbondata.core.util.DataLoadMetrics)3 CarbonDataLoadSchema (org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Map (java.util.Map)2 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)2 CarbonProperties (org.apache.carbondata.core.util.CarbonProperties)2 OperationContext (org.apache.carbondata.events.OperationContext)2 Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 HiveInsertTableHandle (io.prestosql.plugin.hive.HiveInsertTableHandle)1 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)1 Table (io.prestosql.plugin.hive.metastore.Table)1 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)1