Search in sources :

Example 31 with CarbonTableIdentifier

use of org.apache.carbondata.core.metadata.CarbonTableIdentifier in project carbondata by apache.

the class DataConverterProcessorStepImpl method createBadRecordLogger.

private BadRecordsLogger createBadRecordLogger() {
    boolean badRecordsLogRedirect = false;
    boolean badRecordConvertNullDisable = false;
    boolean isDataLoadFail = false;
    boolean badRecordsLoggerEnable = Boolean.parseBoolean(configuration.getDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE).toString());
    Object bad_records_action = configuration.getDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION).toString();
    if (null != bad_records_action) {
        LoggerAction loggerAction = null;
        try {
            loggerAction = LoggerAction.valueOf(bad_records_action.toString().toUpperCase());
        } catch (IllegalArgumentException e) {
            loggerAction = LoggerAction.FORCE;
        }
        switch(loggerAction) {
            case FORCE:
                badRecordConvertNullDisable = false;
                break;
            case REDIRECT:
                badRecordsLogRedirect = true;
                badRecordConvertNullDisable = true;
                break;
            case IGNORE:
                badRecordsLogRedirect = false;
                badRecordConvertNullDisable = true;
                break;
            case FAIL:
                isDataLoadFail = true;
                break;
        }
    }
    CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
    return new BadRecordsLogger(identifier.getBadRecordLoggerKey(), identifier.getTableName() + '_' + System.currentTimeMillis(), getBadLogStoreLocation(identifier.getDatabaseName() + CarbonCommonConstants.FILE_SEPARATOR + identifier.getTableName() + CarbonCommonConstants.FILE_SEPARATOR + configuration.getSegmentId() + CarbonCommonConstants.FILE_SEPARATOR + configuration.getTaskNo()), badRecordsLogRedirect, badRecordsLoggerEnable, badRecordConvertNullDisable, isDataLoadFail);
}
Also used : CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) LoggerAction(org.apache.carbondata.processing.constants.LoggerAction) BadRecordsLogger(org.apache.carbondata.processing.surrogatekeysgenerator.csvbased.BadRecordsLogger)

Example 32 with CarbonTableIdentifier

use of org.apache.carbondata.core.metadata.CarbonTableIdentifier in project carbondata by apache.

the class DataConverterProcessorWithBucketingStepImpl method createBadRecordLogger.

private BadRecordsLogger createBadRecordLogger() {
    boolean badRecordsLogRedirect = false;
    boolean badRecordConvertNullDisable = false;
    boolean isDataLoadFail = false;
    boolean badRecordsLoggerEnable = Boolean.parseBoolean(configuration.getDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE).toString());
    Object bad_records_action = configuration.getDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION).toString();
    if (null != bad_records_action) {
        LoggerAction loggerAction = null;
        try {
            loggerAction = LoggerAction.valueOf(bad_records_action.toString().toUpperCase());
        } catch (IllegalArgumentException e) {
            loggerAction = LoggerAction.FORCE;
        }
        switch(loggerAction) {
            case FORCE:
                badRecordConvertNullDisable = false;
                break;
            case REDIRECT:
                badRecordsLogRedirect = true;
                badRecordConvertNullDisable = true;
                break;
            case IGNORE:
                badRecordsLogRedirect = false;
                badRecordConvertNullDisable = true;
                break;
            case FAIL:
                isDataLoadFail = true;
                break;
        }
    }
    CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
    return new BadRecordsLogger(identifier.getBadRecordLoggerKey(), identifier.getTableName() + '_' + System.currentTimeMillis(), getBadLogStoreLocation(identifier.getDatabaseName() + CarbonCommonConstants.FILE_SEPARATOR + identifier.getTableName() + CarbonCommonConstants.FILE_SEPARATOR + configuration.getSegmentId() + CarbonCommonConstants.FILE_SEPARATOR + configuration.getTaskNo()), badRecordsLogRedirect, badRecordsLoggerEnable, badRecordConvertNullDisable, isDataLoadFail);
}
Also used : CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) LoggerAction(org.apache.carbondata.processing.constants.LoggerAction) BadRecordsLogger(org.apache.carbondata.processing.surrogatekeysgenerator.csvbased.BadRecordsLogger)

Example 33 with CarbonTableIdentifier

use of org.apache.carbondata.core.metadata.CarbonTableIdentifier in project carbondata by apache.

the class CarbonFactDataHandlerModel method createCarbonFactDataHandlerModel.

/**
   * Create the model using @{@link CarbonDataLoadConfiguration}
   */
public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(CarbonDataLoadConfiguration configuration, String storeLocation, int bucketId, int taskExtension) {
    CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
    boolean[] isUseInvertedIndex = CarbonDataProcessorUtil.getIsUseInvertedIndex(configuration.getDataFields());
    int[] dimLensWithComplex = configuration.getCardinalityFinder().getCardinality();
    if (!configuration.isSortTable()) {
        for (int i = 0; i < dimLensWithComplex.length; i++) {
            if (dimLensWithComplex[i] != 0) {
                dimLensWithComplex[i] = Integer.MAX_VALUE;
            }
        }
    }
    CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(identifier.getDatabaseName() + CarbonCommonConstants.UNDERSCORE + identifier.getTableName());
    List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(identifier.getTableName()), carbonTable.getMeasureByTableName(identifier.getTableName()));
    int[] colCardinality = CarbonUtil.getFormattedCardinality(dimLensWithComplex, wrapperColumnSchema);
    SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchema, colCardinality);
    int[] dimLens = configuration.calcDimensionLengths();
    int dimensionCount = configuration.getDimensionCount();
    int noDictionaryCount = configuration.getNoDictionaryCount();
    int complexDimensionCount = configuration.getComplexColumnCount();
    int measureCount = configuration.getMeasureCount();
    int simpleDimsCount = dimensionCount - noDictionaryCount - complexDimensionCount;
    int[] simpleDimsLen = new int[simpleDimsCount];
    for (int i = 0; i < simpleDimsCount; i++) {
        simpleDimsLen[i] = dimLens[i];
    }
    //To Set MDKey Index of each primitive type in complex type
    int surrIndex = simpleDimsCount;
    Iterator<Map.Entry<String, GenericDataType>> complexMap = CarbonDataProcessorUtil.getComplexTypesMap(configuration.getDataFields()).entrySet().iterator();
    Map<Integer, GenericDataType> complexIndexMap = new HashMap<>(complexDimensionCount);
    while (complexMap.hasNext()) {
        Map.Entry<String, GenericDataType> complexDataType = complexMap.next();
        complexDataType.getValue().setOutputArrayIndex(0);
        complexIndexMap.put(simpleDimsCount, complexDataType.getValue());
        simpleDimsCount++;
        List<GenericDataType> primitiveTypes = new ArrayList<GenericDataType>();
        complexDataType.getValue().getAllPrimitiveChildren(primitiveTypes);
        for (GenericDataType eachPrimitive : primitiveTypes) {
            eachPrimitive.setSurrogateIndex(surrIndex++);
        }
    }
    CarbonDataFileAttributes carbonDataFileAttributes = new CarbonDataFileAttributes(Integer.parseInt(configuration.getTaskNo()), (Long) configuration.getDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP));
    String carbonDataDirectoryPath = getCarbonDataFolderLocation(configuration);
    CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
    carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(configuration.getSchemaUpdatedTimeStamp());
    carbonFactDataHandlerModel.setDatabaseName(identifier.getDatabaseName());
    carbonFactDataHandlerModel.setTableName(identifier.getTableName());
    carbonFactDataHandlerModel.setMeasureCount(measureCount);
    carbonFactDataHandlerModel.setStoreLocation(storeLocation);
    carbonFactDataHandlerModel.setDimLens(dimLens);
    carbonFactDataHandlerModel.setNoDictionaryCount(noDictionaryCount);
    carbonFactDataHandlerModel.setDimensionCount(configuration.getDimensionCount() - noDictionaryCount);
    carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
    carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
    carbonFactDataHandlerModel.setColCardinality(colCardinality);
    carbonFactDataHandlerModel.setMeasureDataType(configuration.getMeasureDataType());
    carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
    carbonFactDataHandlerModel.setPrimitiveDimLens(simpleDimsLen);
    carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes);
    carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
    carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndex);
    carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
    carbonFactDataHandlerModel.setComplexDimensionKeyGenerator(configuration.createKeyGeneratorForComplexDimension());
    carbonFactDataHandlerModel.bucketId = bucketId;
    carbonFactDataHandlerModel.segmentId = configuration.getSegmentId();
    carbonFactDataHandlerModel.taskExtension = taskExtension;
    return carbonFactDataHandlerModel;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) HashMap(java.util.HashMap) Map(java.util.Map)

Example 34 with CarbonTableIdentifier

use of org.apache.carbondata.core.metadata.CarbonTableIdentifier in project carbondata by apache.

the class SortParameters method createSortParameters.

public static SortParameters createSortParameters(CarbonDataLoadConfiguration configuration) {
    SortParameters parameters = new SortParameters();
    CarbonTableIdentifier tableIdentifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
    CarbonProperties carbonProperties = CarbonProperties.getInstance();
    parameters.setDatabaseName(tableIdentifier.getDatabaseName());
    parameters.setTableName(tableIdentifier.getTableName());
    parameters.setPartitionID(configuration.getPartitionId());
    parameters.setSegmentId(configuration.getSegmentId());
    parameters.setTaskNo(configuration.getTaskNo());
    parameters.setMeasureColCount(configuration.getMeasureCount());
    parameters.setDimColCount(configuration.getDimensionCount() - configuration.getComplexColumnCount());
    parameters.setNoDictionaryCount(configuration.getNoDictionaryCount());
    parameters.setComplexDimColCount(configuration.getComplexColumnCount());
    parameters.setNoDictionaryDimnesionColumn(CarbonDataProcessorUtil.getNoDictionaryMapping(configuration.getDataFields()));
    parameters.setBatchSortSizeinMb(CarbonDataProcessorUtil.getBatchSortSizeinMb(configuration));
    parameters.setNumberOfSortColumns(configuration.getNumberOfSortColumns());
    parameters.setNumberOfNoDictSortColumns(configuration.getNumberOfNoDictSortColumns());
    setNoDictionarySortColumnMapping(parameters);
    parameters.setObserver(new SortObserver());
    // get sort buffer size
    parameters.setSortBufferSize(Integer.parseInt(carbonProperties.getProperty(CarbonCommonConstants.SORT_SIZE, CarbonCommonConstants.SORT_SIZE_DEFAULT_VAL)));
    LOGGER.info("Sort size for table: " + parameters.getSortBufferSize());
    // set number of intermedaite file to merge
    parameters.setNumberOfIntermediateFileToBeMerged(Integer.parseInt(carbonProperties.getProperty(CarbonCommonConstants.SORT_INTERMEDIATE_FILES_LIMIT, CarbonCommonConstants.SORT_INTERMEDIATE_FILES_LIMIT_DEFAULT_VALUE)));
    LOGGER.info("Number of intermediate file to be merged: " + parameters.getNumberOfIntermediateFileToBeMerged());
    // get file buffer size
    parameters.setFileBufferSize(CarbonDataProcessorUtil.getFileBufferSize(parameters.getNumberOfIntermediateFileToBeMerged(), carbonProperties, CarbonCommonConstants.CONSTANT_SIZE_TEN));
    LOGGER.info("File Buffer Size: " + parameters.getFileBufferSize());
    String carbonDataDirectoryPath = CarbonDataProcessorUtil.getLocalDataFolderLocation(tableIdentifier.getDatabaseName(), tableIdentifier.getTableName(), configuration.getTaskNo(), configuration.getPartitionId(), configuration.getSegmentId(), false);
    parameters.setTempFileLocation(carbonDataDirectoryPath + File.separator + CarbonCommonConstants.SORT_TEMP_FILE_LOCATION);
    LOGGER.info("temp file location" + parameters.getTempFileLocation());
    int numberOfCores;
    try {
        numberOfCores = Integer.parseInt(carbonProperties.getProperty(CarbonCommonConstants.NUM_CORES_LOADING, CarbonCommonConstants.NUM_CORES_DEFAULT_VAL));
        numberOfCores = numberOfCores / 2;
    } catch (NumberFormatException exc) {
        numberOfCores = Integer.parseInt(CarbonCommonConstants.NUM_CORES_DEFAULT_VAL);
    }
    parameters.setNumberOfCores(numberOfCores > 0 ? numberOfCores : 1);
    parameters.setFileWriteBufferSize(Integer.parseInt(carbonProperties.getProperty(CarbonCommonConstants.CARBON_SORT_FILE_WRITE_BUFFER_SIZE, CarbonCommonConstants.CARBON_SORT_FILE_WRITE_BUFFER_SIZE_DEFAULT_VALUE)));
    parameters.setSortFileCompressionEnabled(Boolean.parseBoolean(carbonProperties.getProperty(CarbonCommonConstants.IS_SORT_TEMP_FILE_COMPRESSION_ENABLED, CarbonCommonConstants.IS_SORT_TEMP_FILE_COMPRESSION_ENABLED_DEFAULTVALUE)));
    int sortTempFileNoOFRecordsInCompression;
    try {
        sortTempFileNoOFRecordsInCompression = Integer.parseInt(carbonProperties.getProperty(CarbonCommonConstants.SORT_TEMP_FILE_NO_OF_RECORDS_FOR_COMPRESSION, CarbonCommonConstants.SORT_TEMP_FILE_NO_OF_RECORD_FOR_COMPRESSION_DEFAULTVALUE));
        if (sortTempFileNoOFRecordsInCompression < 1) {
            LOGGER.error("Invalid value for: " + CarbonCommonConstants.SORT_TEMP_FILE_NO_OF_RECORDS_FOR_COMPRESSION + ":Only Positive Integer value(greater than zero) is allowed.Default value will " + "be used");
            sortTempFileNoOFRecordsInCompression = Integer.parseInt(CarbonCommonConstants.SORT_TEMP_FILE_NO_OF_RECORD_FOR_COMPRESSION_DEFAULTVALUE);
        }
    } catch (NumberFormatException e) {
        LOGGER.error("Invalid value for: " + CarbonCommonConstants.SORT_TEMP_FILE_NO_OF_RECORDS_FOR_COMPRESSION + ", only Positive Integer value is allowed. Default value will be used");
        sortTempFileNoOFRecordsInCompression = Integer.parseInt(CarbonCommonConstants.SORT_TEMP_FILE_NO_OF_RECORD_FOR_COMPRESSION_DEFAULTVALUE);
    }
    parameters.setSortTempFileNoOFRecordsInCompression(sortTempFileNoOFRecordsInCompression);
    if (parameters.isSortFileCompressionEnabled()) {
        LOGGER.info("Compression will be used for writing the sort temp File");
    }
    parameters.setPrefetch(CarbonCommonConstants.CARBON_PREFETCH_IN_MERGE_VALUE);
    parameters.setBufferSize(Integer.parseInt(carbonProperties.getProperty(CarbonCommonConstants.CARBON_PREFETCH_BUFFERSIZE, CarbonCommonConstants.CARBON_PREFETCH_BUFFERSIZE_DEFAULT)));
    DataType[] measureDataType = configuration.getMeasureDataType();
    parameters.setMeasureDataType(measureDataType);
    return parameters;
}
Also used : CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) CarbonProperties(org.apache.carbondata.core.util.CarbonProperties) DataType(org.apache.carbondata.core.metadata.datatype.DataType) SortObserver(org.apache.carbondata.processing.schema.metadata.SortObserver)

Example 35 with CarbonTableIdentifier

use of org.apache.carbondata.core.metadata.CarbonTableIdentifier in project carbondata by apache.

the class DataWriterBatchProcessorStepImpl method execute.

@Override
public Iterator<CarbonRowBatch>[] execute() throws CarbonDataLoadingException {
    Iterator<CarbonRowBatch>[] iterators = child.execute();
    CarbonTableIdentifier tableIdentifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
    String tableName = tableIdentifier.getTableName();
    try {
        CarbonTimeStatisticsFactory.getLoadStatisticsInstance().recordDictionaryValue2MdkAdd2FileTime(configuration.getPartitionId(), System.currentTimeMillis());
        int i = 0;
        for (Iterator<CarbonRowBatch> iterator : iterators) {
            String storeLocation = getStoreLocation(tableIdentifier, String.valueOf(i));
            int k = 0;
            while (iterator.hasNext()) {
                CarbonRowBatch next = iterator.next();
                CarbonFactDataHandlerModel model = CarbonFactDataHandlerModel.createCarbonFactDataHandlerModel(configuration, storeLocation, i, k++);
                CarbonFactHandler dataHandler = CarbonFactHandlerFactory.createCarbonFactHandler(model, CarbonFactHandlerFactory.FactHandlerType.COLUMNAR);
                dataHandler.initialise();
                processBatch(next, dataHandler);
                finish(tableName, dataHandler);
            }
            i++;
        }
    } catch (Exception e) {
        LOGGER.error(e, "Failed for table: " + tableName + " in DataWriterBatchProcessorStepImpl");
        throw new CarbonDataLoadingException("There is an unexpected error: " + e.getMessage());
    }
    return null;
}
Also used : CarbonRowBatch(org.apache.carbondata.processing.newflow.row.CarbonRowBatch) CarbonFactDataHandlerModel(org.apache.carbondata.processing.store.CarbonFactDataHandlerModel) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) Iterator(java.util.Iterator) CarbonFactHandler(org.apache.carbondata.processing.store.CarbonFactHandler) IOException(java.io.IOException) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException)

Aggregations

CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)42 Test (org.junit.Test)13 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)7 ColumnIdentifier (org.apache.carbondata.core.metadata.ColumnIdentifier)7 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)6 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)6 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)5 Before (org.junit.Before)5 BeforeClass (org.junit.BeforeClass)5 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)4 File (java.io.File)3 HashMap (java.util.HashMap)3 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)3 TableBlockUniqueIdentifier (org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier)3 CarbonDataLoadingException (org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException)3 Iterator (java.util.Iterator)2 ExecutorService (java.util.concurrent.ExecutorService)2 ICarbonLock (org.apache.carbondata.core.locks.ICarbonLock)2 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)2