use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class CarbonDataMetaData method beginInsert.
@Override
public CarbonDataInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle) {
HiveInsertTableHandle hiveInsertTableHandle = super.beginInsert(session, tableHandle);
SchemaTableName tableName = hiveInsertTableHandle.getSchemaTableName();
Optional<Table> table = this.metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName());
Path outputPath = new Path(hiveInsertTableHandle.getLocationHandle().getJsonSerializableTargetPath());
JobConf jobConf = ConfigurationUtils.toJobConf(this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, hiveInsertTableHandle.getSchemaName(), hiveInsertTableHandle.getTableName()), new Path(hiveInsertTableHandle.getLocationHandle().getJsonSerializableWritePath())));
jobConf.set("location", outputPath.toString());
Properties hiveSchema = MetastoreUtil.getHiveSchema(table.get());
try {
CarbonLoadModel carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(hiveSchema, jobConf);
CarbonTableOutputFormat.setLoadModel(jobConf, carbonLoadModel);
} catch (IOException ex) {
LOG.error("Error while creating carbon load model", ex);
throw new RuntimeException(ex);
}
try {
carbonOutputCommitter = new MapredCarbonOutputCommitter();
jobContext = new JobContextImpl(jobConf, new JobID());
carbonOutputCommitter.setupJob(jobContext);
ThreadLocalSessionInfo.setConfigurationToCurrentThread(jobConf);
} catch (IOException e) {
LOG.error("error setting the output committer", e);
throw new RuntimeException("error setting the output committer");
}
return new CarbonDataInsertTableHandle(hiveInsertTableHandle.getSchemaTableName().getSchemaName(), hiveInsertTableHandle.getTableName(), hiveInsertTableHandle.getInputColumns(), hiveInsertTableHandle.getPageSinkMetadata(), hiveInsertTableHandle.getLocationHandle(), hiveInsertTableHandle.getBucketProperty(), hiveInsertTableHandle.getTableStorageFormat(), hiveInsertTableHandle.getPartitionStorageFormat(), ImmutableMap.of(CarbonTableConfig.CARBON_PRESTO_LOAD_MODEL, jobContext.getConfiguration().get(CarbonTableOutputFormat.LOAD_MODEL)));
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class CarbonWriterBuilder method build.
/**
* Build a {@link CarbonWriter}
* This writer is not thread safe,
* use withThreadSafe() configuration in multi thread environment
*
* @return CarbonWriter {AvroCarbonWriter/CSVCarbonWriter/JsonCarbonWriter based on Input Type }
* @throws IOException
* @throws InvalidLoadOptionException
*/
public CarbonWriter build() throws IOException, InvalidLoadOptionException {
Objects.requireNonNull(path, "path should not be null");
if (this.writerType == null) {
throw new RuntimeException("'writerType' must be set, use withCsvInput() or withAvroInput() or withJsonInput() " + "API based on input");
}
if (this.writtenByApp == null || this.writtenByApp.isEmpty()) {
throw new RuntimeException("'writtenBy' must be set when writing carbon files, use writtenBy() API to " + "set it, it can be the name of the application which is using the SDK");
}
if (this.schema == null) {
throw new RuntimeException("schema should be set");
}
if (taskNo == null) {
taskNo = UUID.randomUUID().toString().replace("-", "");
}
CarbonLoadModel loadModel = buildLoadModel(schema);
loadModel.setSdkWriterCores(numOfThreads);
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, writtenByApp);
if (hadoopConf == null) {
hadoopConf = new Configuration(FileFactory.getConfiguration());
}
CarbonWriter carbonWriter;
if (this.writerType == WRITER_TYPE.AVRO) {
// AVRO records are pushed to Carbon as Object not as Strings. This was done in order to
// handle multi level complex type support. As there are no conversion converter step is
// removed from the load. LoadWithoutConverter flag is going to point to the Loader Builder
// which will skip Conversion Step.
loadModel.setLoadWithoutConverterStep(true);
carbonWriter = new AvroCarbonWriter(loadModel, hadoopConf, this.avroSchema);
} else if (this.writerType == WRITER_TYPE.JSON) {
loadModel.setJsonFileLoad(true);
carbonWriter = new JsonCarbonWriter(loadModel, hadoopConf);
} else if (this.writerType == WRITER_TYPE.PARQUET) {
loadModel.setLoadWithoutConverterStep(true);
carbonWriter = new ParquetCarbonWriter(loadModel, hadoopConf, this.avroSchema);
} else if (this.writerType == WRITER_TYPE.ORC) {
carbonWriter = new ORCCarbonWriter(loadModel, hadoopConf);
} else {
// CSV
CSVCarbonWriter csvCarbonWriter = new CSVCarbonWriter(loadModel, hadoopConf);
if (!StringUtils.isEmpty(filePath) && !this.options.containsKey(CarbonCommonConstants.FILE_HEADER)) {
csvCarbonWriter.setSkipHeader(true);
}
carbonWriter = csvCarbonWriter;
}
if (!StringUtils.isEmpty(this.filePath)) {
carbonWriter.setDataFiles(this.dataFiles);
}
return carbonWriter;
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class CarbonWriterBuilder method buildLoadModel.
/**
* Build a {@link CarbonLoadModel}
*/
private CarbonLoadModel buildLoadModel(CarbonTable table, long timestamp, String taskNo, Map<String, String> options) throws InvalidLoadOptionException, IOException {
if (options == null) {
options = new HashMap<>();
}
CarbonLoadModelBuilder builder = new CarbonLoadModelBuilder(table);
CarbonLoadModel model = builder.build(options, timestamp, taskNo);
setCsvHeader(model);
return model;
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class CarbonStreamOutputFormatTest method testGetCarbonLoadModel.
@Test
public void testGetCarbonLoadModel() {
try {
CarbonStreamOutputFormat.setCarbonLoadModel(hadoopConf, carbonLoadModel);
CarbonLoadModel model = CarbonStreamOutputFormat.getCarbonLoadModel(hadoopConf);
Assert.assertNotNull("Failed to get CarbonLoadModel", model);
Assert.assertEquals("CarbonLoadModel should be same with previous", carbonLoadModel.getFactTimeStamp(), model.getFactTimeStamp());
} catch (IOException e) {
Assert.fail("Failed to get CarbonLoadModel for CarbonStreamOutputFormat");
}
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class MapredCarbonOutputCommitter method commitJob.
@Override
public void commitJob(JobContext jobContext) throws IOException {
try {
Configuration configuration = jobContext.getConfiguration();
CarbonLoadModel carbonLoadModel = MapredCarbonOutputFormat.getLoadModel(configuration);
ThreadLocalSessionInfo.unsetAll();
CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
new CarbonIndexFileMergeWriter(carbonTable).mergeCarbonIndexFilesOfSegment(carbonLoadModel.getSegmentId(), carbonTable.getTablePath(), false, String.valueOf(carbonLoadModel.getFactTimeStamp()));
SegmentFileStore.writeSegmentFile(carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable(), carbonLoadModel.getSegmentId(), String.valueOf(carbonLoadModel.getFactTimeStamp()));
CarbonTableOutputFormat.setLoadModel(configuration, carbonLoadModel);
carbonOutputCommitter.commitJob(jobContext);
} catch (Exception e) {
LOGGER.error(e);
throw e;
}
}
Aggregations