Search in sources :

Example 11 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class CarbonOutputCommitter method abortJob.

/**
 * Update the tablestatus as fail if any fail happens.And also clean up the temp folders if any
 * are existed.
 */
@Override
public void abortJob(JobContext context, JobStatus.State state) throws IOException {
    try {
        super.abortJob(context, state);
        CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
        CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
        String segmentFileName = loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp();
        LoadMetadataDetails metadataDetail = loadModel.getCurrentLoadMetadataDetail();
        if (metadataDetail != null) {
            // will be used while cleaning.
            if (!metadataDetail.getSegmentStatus().equals(SegmentStatus.SUCCESS)) {
                String readPath = CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()) + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + CarbonTablePath.SEGMENT_EXT;
                if (FileFactory.getCarbonFile(readPath).exists()) {
                    metadataDetail.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
                }
            }
        }
        // Clean the temp files
        CarbonFile segTmpFolder = FileFactory.getCarbonFile(CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()) + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + ".tmp");
        // delete temp segment folder
        if (segTmpFolder.exists()) {
            FileFactory.deleteAllCarbonFilesOfDir(segTmpFolder);
        }
        CarbonFile segmentFilePath = FileFactory.getCarbonFile(CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()) + CarbonCommonConstants.FILE_SEPARATOR + segmentFileName + CarbonTablePath.SEGMENT_EXT);
        // Delete the temp data folders of this job if exists
        if (segmentFilePath.exists()) {
            SegmentFileStore fileStore = new SegmentFileStore(loadModel.getTablePath(), segmentFileName + CarbonTablePath.SEGMENT_EXT);
            SegmentFileStore.removeTempFolder(fileStore.getLocationMap(), segmentFileName + ".tmp", loadModel.getTablePath());
        }
        LOGGER.error("Loading failed with job status : " + state);
    } finally {
        if (segmentLock != null) {
            segmentLock.unlock();
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore)

Example 12 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class CarbonOutputCommitter method commitJob.

/**
 * Update the tablestatus as success after job is success
 */
@Override
public void commitJob(JobContext context) throws IOException {
    // comma separated partitions
    String partitionPath = context.getConfiguration().get("carbon.output.partitions.name");
    long t1 = System.currentTimeMillis();
    try {
        super.commitJob(context);
    } catch (IOException e) {
        // ignore, in case of concurrent load it try to remove temporary folders by other load may
        // cause file not found exception. This will not impact carbon load,
        LOGGER.warn(e.getMessage());
    }
    LOGGER.info("$$$ Time taken for the super.commitJob in ms: " + (System.currentTimeMillis() - t1));
    boolean overwriteSet = CarbonTableOutputFormat.isOverwriteSet(context.getConfiguration());
    CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
    List<PartitionSpec> currentPartitionsOfTable = (List<PartitionSpec>) ObjectSerializationUtil.convertStringToObject(context.getConfiguration().get("carbon.currentpartition"));
    if (loadModel.getCarbonDataLoadSchema().getCarbonTable().isHivePartitionTable()) {
        try {
            commitJobForPartition(context, overwriteSet, loadModel, partitionPath, currentPartitionsOfTable);
        } catch (Exception e) {
            CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
            LOGGER.error("commit job failed", e);
            throw new IOException(e.getMessage());
        } finally {
            if (segmentLock != null) {
                segmentLock.unlock();
            }
        }
        return;
    }
    LoadMetadataDetails newMetaEntry = loadModel.getCurrentLoadMetadataDetail();
    String readPath = CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()) + CarbonCommonConstants.FILE_SEPARATOR + loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp() + ".tmp";
    // Merge all partition files into a single file.
    String segmentFileName = SegmentFileStore.genSegmentFileName(loadModel.getSegmentId(), String.valueOf(loadModel.getFactTimeStamp()));
    SegmentFileStore.SegmentFile segmentFile = SegmentFileStore.mergeSegmentFiles(readPath, segmentFileName, CarbonTablePath.getSegmentFilesLocation(loadModel.getTablePath()));
    if (segmentFile != null) {
        if (null == newMetaEntry) {
            throw new RuntimeException("Internal Error");
        }
        // Move all files from temp directory of each segment to partition directory
        SegmentFileStore.moveFromTempFolder(segmentFile, loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp() + ".tmp", loadModel.getTablePath());
        newMetaEntry.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
    }
    OperationContext operationContext = (OperationContext) getOperationContext();
    CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
    String uuid = "";
    newMetaEntry.setSegmentFile(segmentFileName + CarbonTablePath.SEGMENT_EXT);
    CarbonLoaderUtil.populateNewLoadMetaEntry(newMetaEntry, SegmentStatus.SUCCESS, loadModel.getFactTimeStamp(), true);
    long segmentSize = CarbonLoaderUtil.addDataIndexSizeIntoMetaEntry(newMetaEntry, loadModel.getSegmentId(), carbonTable);
    if (segmentSize > 0 || overwriteSet) {
        if (operationContext != null) {
            operationContext.setProperty(CarbonCommonConstants.CURRENT_SEGMENTFILE, newMetaEntry.getSegmentFile());
            LoadEvents.LoadTablePreStatusUpdateEvent event = new LoadEvents.LoadTablePreStatusUpdateEvent(carbonTable.getCarbonTableIdentifier(), loadModel);
            try {
                OperationListenerBus.getInstance().fireEvent(event, operationContext);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
        // After merging index, update newMetaEntry with updated merge index size
        boolean isMergeIndexEnabled = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT));
        if (isMergeIndexEnabled) {
            CarbonLoaderUtil.addIndexSizeIntoMetaEntry(newMetaEntry, loadModel.getSegmentId(), carbonTable);
        }
        String uniqueId = null;
        if (overwriteSet) {
            if (!loadModel.isCarbonTransactionalTable()) {
                CarbonLoaderUtil.deleteNonTransactionalTableForInsertOverwrite(loadModel);
            } else {
                if (segmentSize == 0) {
                    newMetaEntry.setSegmentStatus(SegmentStatus.MARKED_FOR_DELETE);
                }
                List<String> partitionList = (List<String>) ObjectSerializationUtil.convertStringToObject(partitionPath);
                uniqueId = overwritePartitions(loadModel, newMetaEntry, uuid, partitionList, currentPartitionsOfTable);
            }
        } else {
            CarbonLoaderUtil.recordNewLoadMetadata(newMetaEntry, loadModel, false, false, uuid, false);
        }
        commitJobFinal(context, loadModel, operationContext, carbonTable, uniqueId);
    } else {
        CarbonLoaderUtil.updateTableStatusForFailure(loadModel);
    }
    if (segmentLock != null) {
        segmentLock.unlock();
    }
}
Also used : OperationContext(org.apache.carbondata.events.OperationContext) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore) IOException(java.io.IOException) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) IOException(java.io.IOException) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) LoadEvents(org.apache.carbondata.processing.loading.events.LoadEvents) ArrayList(java.util.ArrayList) List(java.util.List)

Example 13 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class CarbonOutputCommitter method setupJob.

/**
 * Update the tablestatus with in-progress while setup the job.
 */
@Override
public void setupJob(JobContext context) throws IOException {
    super.setupJob(context);
    boolean overwriteSet = CarbonTableOutputFormat.isOverwriteSet(context.getConfiguration());
    CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(context.getConfiguration());
    if (loadModel.getSegmentId() == null) {
        CarbonLoaderUtil.readAndUpdateLoadProgressInTableMeta(loadModel, overwriteSet);
    }
    // Take segment lock
    segmentLock = CarbonLockFactory.getCarbonLockObj(loadModel.getCarbonDataLoadSchema().getCarbonTable().getAbsoluteTableIdentifier(), CarbonTablePath.addSegmentPrefix(loadModel.getSegmentId()) + LockUsage.LOCK);
    if (!segmentLock.lockWithRetries()) {
        throw new RuntimeException("Already segment is locked for loading, not supposed happen");
    }
    CarbonTableOutputFormat.setLoadModel(context.getConfiguration(), loadModel);
}
Also used : CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel)

Example 14 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class CarbonTableOutputFormat method getLoadModel.

public static CarbonLoadModel getLoadModel(Configuration conf) throws IOException {
    CarbonLoadModel model;
    String encodedString = conf.get(LOAD_MODEL);
    if (encodedString != null) {
        model = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
        return model;
    }
    model = new CarbonLoadModel();
    CarbonProperties carbonProperty = CarbonProperties.getInstance();
    model.setDatabaseName(CarbonTableOutputFormat.getDatabaseName(conf));
    model.setTableName(CarbonTableOutputFormat.getTableName(conf));
    model.setCarbonTransactionalTable(true);
    model.setMetrics(new DataLoadMetrics());
    CarbonTable carbonTable = getCarbonTable(conf);
    // global dictionary is not supported since 2.0
    if (carbonTable.getTableInfo().getFactTable().getTableProperties().containsKey(CarbonCommonConstants.DICTIONARY_INCLUDE)) {
        DeprecatedFeatureException.globalDictNotSupported();
    }
    String columnCompressor = carbonTable.getTableInfo().getFactTable().getTableProperties().get(CarbonCommonConstants.COMPRESSOR);
    if (null == columnCompressor) {
        columnCompressor = CompressorFactory.getInstance().getCompressor().getName();
    }
    model.setColumnCompressor(columnCompressor);
    model.setCarbonDataLoadSchema(new CarbonDataLoadSchema(carbonTable));
    model.setTablePath(getTablePath(conf));
    setFileHeader(conf, model);
    model.setSerializationNullFormat(conf.get(SERIALIZATION_NULL_FORMAT, "\\N"));
    model.setBadRecordsLoggerEnable(conf.get(BAD_RECORDS_LOGGER_ENABLE, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE, CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE_DEFAULT)));
    model.setBadRecordsAction(conf.get(BAD_RECORDS_LOGGER_ACTION, carbonProperty.getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT)));
    model.setIsEmptyDataBadRecord(conf.get(IS_EMPTY_DATA_BAD_RECORD, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD, CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD_DEFAULT)));
    model.setSkipEmptyLine(conf.get(SKIP_EMPTY_LINE, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SKIP_EMPTY_LINE)));
    String complexDelim = conf.get(COMPLEX_DELIMITERS);
    if (null == complexDelim) {
        complexDelim = ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_1.value() + "," + ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_2.value() + "," + ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_3.value() + "," + ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_4.value();
    }
    String[] split = complexDelim.split(",");
    model.setComplexDelimiter(split[0]);
    if (split.length > 3) {
        model.setComplexDelimiter(split[1]);
        model.setComplexDelimiter(split[2]);
        model.setComplexDelimiter(split[3]);
    } else if (split.length > 2) {
        model.setComplexDelimiter(split[1]);
        model.setComplexDelimiter(split[2]);
    } else if (split.length > 1) {
        model.setComplexDelimiter(split[1]);
    }
    model.setDateFormat(conf.get(DATE_FORMAT, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT, CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT_DEFAULT)));
    model.setTimestampFormat(conf.get(TIMESTAMP_FORMAT, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_TIMESTAMPFORMAT, CarbonLoadOptionConstants.CARBON_OPTIONS_TIMESTAMPFORMAT_DEFAULT)));
    model.setGlobalSortPartitions(conf.get(GLOBAL_SORT_PARTITIONS, carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_GLOBAL_SORT_PARTITIONS, null)));
    String badRecordsPath = conf.get(BAD_RECORD_PATH);
    if (StringUtils.isEmpty(badRecordsPath)) {
        badRecordsPath = carbonTable.getTableInfo().getFactTable().getTableProperties().get("bad_record_path");
        if (StringUtils.isEmpty(badRecordsPath)) {
            badRecordsPath = carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH, carbonProperty.getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, CarbonCommonConstants.CARBON_BADRECORDS_LOC_DEFAULT_VAL));
        }
    }
    model.setBadRecordsLocation(badRecordsPath);
    return model;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) DataLoadMetrics(org.apache.carbondata.core.util.DataLoadMetrics) CarbonProperties(org.apache.carbondata.core.util.CarbonProperties) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) CarbonDataLoadSchema(org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema)

Example 15 with CarbonLoadModel

use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.

the class CarbonTableOutputFormat method getRecordWriter.

@Override
public RecordWriter<NullWritable, ObjectArrayWritable> getRecordWriter(final TaskAttemptContext taskAttemptContext) throws IOException {
    final CarbonLoadModel loadModel = getLoadModel(taskAttemptContext.getConfiguration());
    loadModel.setMetrics(new DataLoadMetrics());
    String appName = taskAttemptContext.getConfiguration().get(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME);
    if (null != appName) {
        CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, appName);
    }
    // if loadModel having taskNo already(like in SDK) then no need to overwrite
    short sdkWriterCores = loadModel.getSdkWriterCores();
    int itrSize = (sdkWriterCores > 0) ? sdkWriterCores : 1;
    final CarbonOutputIteratorWrapper[] iterators = new CarbonOutputIteratorWrapper[itrSize];
    for (int i = 0; i < itrSize; i++) {
        iterators[i] = new CarbonOutputIteratorWrapper();
    }
    if (null == loadModel.getTaskNo() || loadModel.getTaskNo().isEmpty()) {
        loadModel.setTaskNo(taskAttemptContext.getConfiguration().get("carbon.outputformat.taskno", String.valueOf(DEFAULT_TASK_NO.getAndIncrement())));
    }
    loadModel.setDataWritePath(taskAttemptContext.getConfiguration().get("carbon.outputformat.writepath"));
    final String[] tempStoreLocations = getTempStoreLocations(taskAttemptContext);
    DataTypeUtil.clearFormatter();
    final DataLoadExecutor dataLoadExecutor = new DataLoadExecutor();
    final ExecutorService executorService = Executors.newFixedThreadPool(1, new CarbonThreadFactory("CarbonRecordWriter:" + loadModel.getTableName(), true));
    // It should be started in new thread as the underlying iterator uses blocking queue.
    Future future = executorService.submit(() -> {
        ThreadLocalSessionInfo.getOrCreateCarbonSessionInfo().getNonSerializableExtraInfo().put("carbonConf", taskAttemptContext.getConfiguration());
        try {
            dataLoadExecutor.execute(loadModel, tempStoreLocations, iterators);
        } catch (Exception e) {
            executorService.shutdownNow();
            for (CarbonOutputIteratorWrapper iterator : iterators) {
                iterator.closeWriter(true);
            }
            try {
                dataLoadExecutor.close();
            } catch (Exception ex) {
                // As already exception happened before close() send that exception.
                throw new RuntimeException(e);
            }
            throw new RuntimeException(e);
        } finally {
            ThreadLocalSessionInfo.unsetAll();
        }
    });
    if (sdkWriterCores > 0) {
        // CarbonMultiRecordWriter handles the load balancing of the write rows in round robin.
        return new CarbonMultiRecordWriter(iterators, dataLoadExecutor, loadModel, future, executorService);
    } else {
        return new CarbonRecordWriter(iterators[0], dataLoadExecutor, loadModel, future, executorService);
    }
}
Also used : DataLoadMetrics(org.apache.carbondata.core.util.DataLoadMetrics) IOException(java.io.IOException) DeprecatedFeatureException(org.apache.carbondata.common.exceptions.DeprecatedFeatureException) ExecutionException(java.util.concurrent.ExecutionException) CarbonOutputIteratorWrapper(org.apache.carbondata.processing.loading.iterator.CarbonOutputIteratorWrapper) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) ExecutorService(java.util.concurrent.ExecutorService) CarbonThreadFactory(org.apache.carbondata.core.util.CarbonThreadFactory) Future(java.util.concurrent.Future) DataLoadExecutor(org.apache.carbondata.processing.loading.DataLoadExecutor)

Aggregations

CarbonLoadModel (org.apache.carbondata.processing.loading.model.CarbonLoadModel)21 IOException (java.io.IOException)9 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)6 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)4 SegmentFileStore (org.apache.carbondata.core.metadata.SegmentFileStore)3 DataLoadMetrics (org.apache.carbondata.core.util.DataLoadMetrics)3 CarbonDataLoadSchema (org.apache.carbondata.processing.loading.model.CarbonDataLoadSchema)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Map (java.util.Map)2 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)2 CarbonProperties (org.apache.carbondata.core.util.CarbonProperties)2 OperationContext (org.apache.carbondata.events.OperationContext)2 Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 HiveInsertTableHandle (io.prestosql.plugin.hive.HiveInsertTableHandle)1 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)1 Table (io.prestosql.plugin.hive.metastore.Table)1 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)1