use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class CarbonOutputCommitter method abortJob.
/**
* Update the tablestatus as fail if any fail happens.And also clean up the temp folders if any
* are existed.
*/
@Override
public void abortJob(JobContext context, JobStatus.State state) throws IOException {
try {
super.abortJob(context, state);
CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
String segmentFileName = loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp();
LoadMetadataDetails metadataDetail = loadModel.getCurrentLoadMetadataDetail();
if (metadataDetail != null) {
// will be used while cleaning.
if (!metadataDetail.getSegmentStatus().equals(SegmentStatus.SUCCESS)) {
String readPath = CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()) + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + CarbonTablePath.SEGMENT_EXT;
if (FileFactory.getCarbonFile(readPath).exists()) {
metadataDetail.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
}
}
}
// Clean the temp files
CarbonFile segTmpFolder = FileFactory.getCarbonFile(CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()) + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + ".tmp");
// delete temp segment folder
if (segTmpFolder.exists()) {
FileFactory.deleteAllCarbonFilesOfDir(segTmpFolder);
}
CarbonFile segmentFilePath = FileFactory.getCarbonFile(CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()) + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + CarbonTablePath.SEGMENT_EXT);
// Delete the temp data folders of this job if exists
if (segmentFilePath.exists()) {
SegmentFileStore fileStore = new SegmentFileStore(loadModel.getTablePath(), segmentFileName + CarbonTablePath.SEGMENT_EXT);
SegmentFileStore.removeTempFolder(fileStore.getLocationMap(), segmentFileName + ".tmp", loadModel.getTablePath());
}
LOGGER.error("Loading failed with job status : " + state);
} finally {
if (segmentLock != null) {
segmentLock.unlock();
}
}
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class CarbonOutputCommitter method commitJob.
/**
* Update the tablestatus as success after job is success
*/
@Override
public void commitJob(JobContext context) throws IOException {
// comma separated partitions
String partitionPath = context.getConfiguration().get("carbon.output.partitions.name");
long t1 = System.currentTimeMillis();
try {
super.commitJob(context);
} catch (IOException e) {
// ignore, in case of concurrent load it try to remove temporary folders by other load may
// cause file not found exception. This will not impact carbon load,
LOGGER.warn(e.getMessage());
}
LOGGER.info("$$$ Time taken for the super.commitJob in ms: " + (System.currentTimeMillis() - t1));
boolean overwriteSet = CarbonTableOutputFormat.isOverwriteSet(context.getConfiguration());
CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
List<PartitionSpec> currentPartitionsOfTable = (List<PartitionSpec>) ObjectSerializationUtil.convertStringToObject(context.getConfiguration().get("carbon.currentpartition"));
if (loadModel.getCarbonDataLoadSchema().getCarbonTable().isHivePartitionTable()) {
try {
commitJobForPartition(context, overwriteSet, loadModel, partitionPath, currentPartitionsOfTable);
} catch (Exception e) {
CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
LOGGER.error("commit job failed", e);
throw new IOException(e.getMessage());
} finally {
if (segmentLock != null) {
segmentLock.unlock();
}
}
return;
}
LoadMetadataDetails newMetaEntry = loadModel.getCurrentLoadMetadataDetail();
String readPath = CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()) + CarbonCommonConstants.FILE_SEPARATOR + loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp() + ".tmp";
// Merge all partition files into a single file.
String segmentFileName = SegmentFileStore.genSegmentFileName(loadModel.getSegmentId(), String.valueOf(loadModel.getFactTimeStamp()));
SegmentFileStore.SegmentFile segmentFile = SegmentFileStore.mergeSegmentFiles(readPath, segmentFileName, CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()));
if (segmentFile != null) {
if (null == newMetaEntry) {
throw new RuntimeException("Internal Error");
}
// Move all files from temp directory of each segment to partition directory
SegmentFileStore.moveFromTempFolder(segmentFile, loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp() + ".tmp", loadModel.getTablePath());
newMetaEntry.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
}
OperationContext operationContext = (OperationContext) getOperationContext();
CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
String uuid = "";
newMetaEntry.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
CarbonLoaderUtil.populateNewLoadMetaEntry(newMetaEntry, SegmentStatus.SUCCESS, loadModel.getFactTimeStamp(), true);
long segmentSize = CarbonLoaderUtil.addDataIndexSizeIntoMetaEntry(newMetaEntry, loadModel.getSegmentId(), carbonTable);
if (segmentSize > 0 || overwriteSet) {
if (operationContext != null) {
operationContext.setProperty(CarbonCommonConstants.CURRENT_SEGMENTFILE, newMetaEntry.getSegmentFile());
LoadEvents.LoadTablePreStatusUpdateEvent event = new LoadEvents.LoadTablePreStatusUpdateEvent(carbonTable.getCarbonTableIdentifier(), loadModel);
try {
OperationListenerBus.getInstance().fireEvent(event, operationContext);
} catch (Exception e) {
throw new IOException(e);
}
}
// After merging index, update newMetaEntry with updated merge index size
boolean isMergeIndexEnabled = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT));
if (isMergeIndexEnabled) {
CarbonLoaderUtil.addIndexSizeIntoMetaEntry(newMetaEntry, loadModel.getSegmentId(), carbonTable);
}
String uniqueId = null;
if (overwriteSet) {
if (!loadModel.isCarbonTransactionalTable()) {
CarbonLoaderUtil.deleteNonTransactionalTableForInsertOverwrite(loadModel);
} else {
if (segmentSize == 0) {
newMetaEntry.setSegmentStatus(SegmentStatus.MARKED_FOR_DELETE);
}
List<String> partitionList = (List<String>) ObjectSerializationUtil.convertStringToObject(partitionPath);
uniqueId = overwritePartitions(loadModel, newMetaEntry, uuid, partitionList, currentPartitionsOfTable);
}
} else {
CarbonLoaderUtil.recordNewLoadMetadata(newMetaEntry, loadModel, false, false, uuid, false);
}
commitJobFinal(context, loadModel, operationContext, carbonTable, uniqueId);
} else {
CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
}
if (segmentLock != null) {
segmentLock.unlock();
}
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class CarbonOutputCommitter method setupJob.
/**
* Update the tablestatus with in-progress while setup the job.
*/
@Override
public void setupJob(JobContext context) throws IOException {
super.setupJob(context);
boolean overwriteSet = CarbonTableOutputFormat.isOverwriteSet(context.getConfiguration());
CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
if (loadModel.getSegmentId() == null) {
CarbonLoaderUtil.readAndUpdateLoadProgressInTableMeta(loadModel, overwriteSet);
}
// Take segment lock
segmentLock = CarbonLockFactory.getCarbonLockObj(loadModel.getCarbonDataLoadSchema().getCarbonTable().getAbsoluteTableIdentifier(), CarbonTablePath.addSegmentPrefix(loadModel.getSegmentId()) + LockUsage.LOCK);
if (!segmentLock.lockWithRetries()) {
throw new RuntimeException("Already segment is locked for loading, not supposed happen");
}
CarbonTableOutputFormat.setLoadModel(context.getConfiguration(), loadModel);
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class CarbonTableOutputFormat method getLoadModel.
public static CarbonLoadModel getLoadModel(Configuration conf) throws IOException {
CarbonLoadModel model;
String encodedString = conf.get(LOAD_MODEL);
if (encodedString != null) {
model = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
return model;
}
model = new CarbonLoadModel();
CarbonProperties carbonProperty = CarbonProperties.getInstance();
model.setDatabaseName(CarbonTableOutputFormat.getDatabaseName(conf));
model.setTableName(CarbonTableOutputFormat.getTableName(conf));
model.setCarbonTransactionalTable(true);
model.setMetrics(new DataLoadMetrics());
CarbonTable carbonTable = getCarbonTable(conf);
// global dictionary is not supported since 2.0
if (carbonTable.getTableInfo().getFactTable().getTableProperties().containsKey(CarbonCommonConstants.DICTIONARY_INCLUDE)) {
DeprecatedFeatureException.globalDictNotSupported();
}
String columnCompressor = carbonTable.getTableInfo().getFactTable().getTableProperties().get(CarbonCommonConstants.COMPRESSOR);
if (null == columnCompressor) {
columnCompressor = CompressorFactory.getInstance().getCompressor().getName();
}
model.setColumnCompressor(columnCompressor);
model.setCarbonDataLoadSchema(new CarbonDataLoadSchema(carbonTable));
model.setTablePath(getTablePath(conf));
setFileHeader(conf, model);
model.setSerializationNullFormat(conf.get(SERIALIZATION_NULL_FORMAT, "\\N"));
model.setBadRecordsLoggerEnable(conf.get(BAD_RECORDS_LOGGER_ENABLE, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE, CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE_DEFAULT)));
model.setBadRecordsAction(conf.get(BAD_RECORDS_LOGGER_ACTION, carbonProperty.getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT)));
model.setIsEmptyDataBadRecord(conf.get(IS_EMPTY_DATA_BAD_RECORD, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD, CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD_DEFAULT)));
model.setSkipEmptyLine(conf.get(SKIP_EMPTY_LINE, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SKIP_EMPTY_LINE)));
String complexDelim = conf.get(COMPLEX_DELIMITERS);
if (null == complexDelim) {
complexDelim = ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_1.value() + "," + ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_2.value() + "," + ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_3.value() + "," + ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_4.value();
}
String[] split = complexDelim.split(",");
model.setComplexDelimiter(split[0]);
if (split.length > 3) {
model.setComplexDelimiter(split[1]);
model.setComplexDelimiter(split[2]);
model.setComplexDelimiter(split[3]);
} else if (split.length > 2) {
model.setComplexDelimiter(split[1]);
model.setComplexDelimiter(split[2]);
} else if (split.length > 1) {
model.setComplexDelimiter(split[1]);
}
model.setDateFormat(conf.get(DATE_FORMAT, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT, CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT_DEFAULT)));
model.setTimestampFormat(conf.get(TIMESTAMP_FORMAT, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_TIMESTAMPFORMAT, CarbonLoadOptionConstants.CARBON_OPTIONS_TIMESTAMPFORMAT_DEFAULT)));
model.setGlobalSortPartitions(conf.get(GLOBAL_SORT_PARTITIONS, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_GLOBAL_SORT_PARTITIONS, null)));
String badRecordsPath = conf.get(BAD_RECORD_PATH);
if (StringUtils.isEmpty(badRecordsPath)) {
badRecordsPath = carbonTable.getTableInfo().getFactTable().getTableProperties().get("bad_record_path");
if (StringUtils.isEmpty(badRecordsPath)) {
badRecordsPath = carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH, carbonProperty.getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, CarbonCommonConstants.CARBON_BADRECORDS_LOC_DEFAULT_VAL));
}
}
model.setBadRecordsLocation(badRecordsPath);
return model;
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class CarbonTableOutputFormat method getRecordWriter.
@Override
public RecordWriter<NullWritable, ObjectArrayWritable> getRecordWriter(final TaskAttemptContext taskAttemptContext) throws IOException {
final CarbonLoadModel loadModel = getLoadModel(taskAttemptContext.getConfiguration());
loadModel.setMetrics(new DataLoadMetrics());
String appName = taskAttemptContext.getConfiguration().get(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME);
if (null != appName) {
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, appName);
}
// if loadModel having taskNo already(like in SDK) then no need to overwrite
short sdkWriterCores = loadModel.getSdkWriterCores();
int itrSize = (sdkWriterCores > 0) ? sdkWriterCores : 1;
final CarbonOutputIteratorWrapper[] iterators = new CarbonOutputIteratorWrapper[itrSize];
for (int i = 0; i < itrSize; i++) {
iterators[i] = new CarbonOutputIteratorWrapper();
}
if (null == loadModel.getTaskNo() || loadModel.getTaskNo().isEmpty()) {
loadModel.setTaskNo(taskAttemptContext.getConfiguration().get("carbon.outputformat.taskno", String.valueOf(DEFAULT_TASK_NO.getAndIncrement())));
}
loadModel.setDataWritePath(taskAttemptContext.getConfiguration().get("carbon.outputformat.writepath"));
final String[] tempStoreLocations = getTempStoreLocations(taskAttemptContext);
DataTypeUtil.clearFormatter();
final DataLoadExecutor dataLoadExecutor = new DataLoadExecutor();
final ExecutorService executorService = Executors.newFixedThreadPool(1, new CarbonThreadFactory("CarbonRecordWriter:" + loadModel.getTableName(), true));
// It should be started in new thread as the underlying iterator uses blocking queue.
Future future = executorService.submit(() -> {
ThreadLocalSessionInfo.getOrCreateCarbonSessionInfo().getNonSerializableExtraInfo().put("carbonConf", taskAttemptContext.getConfiguration());
try {
dataLoadExecutor.execute(loadModel, tempStoreLocations, iterators);
} catch (Exception e) {
executorService.shutdownNow();
for (CarbonOutputIteratorWrapper iterator : iterators) {
iterator.closeWriter(true);
}
try {
dataLoadExecutor.close();
} catch (Exception ex) {
// As already exception happened before close() send that exception.
throw new RuntimeException(e);
}
throw new RuntimeException(e);
} finally {
ThreadLocalSessionInfo.unsetAll();
}
});
if (sdkWriterCores > 0) {
// CarbonMultiRecordWriter handles the load balancing of the write rows in round robin.
return new CarbonMultiRecordWriter(iterators, dataLoadExecutor, loadModel, future, executorService);
} else {
return new CarbonRecordWriter(iterators[0], dataLoadExecutor, loadModel, future, executorService);
}
}
Aggregations