Search in sources :

Example 1 with IndexWriterListener

use of org.apache.carbondata.processing.index.IndexWriterListener in project carbondata by apache.

the class CarbonRowDataWriterProcessorStepImpl method doExecute.

private void doExecute(Iterator<CarbonRowBatch> iterator, int iteratorIndex) {
    String[] storeLocation = getStoreLocation();
    IndexWriterListener listener = getIndexWriterListener(0);
    CarbonFactDataHandlerModel model = CarbonFactDataHandlerModel.createCarbonFactDataHandlerModel(configuration, storeLocation, 0, iteratorIndex, listener);
    model.setColumnLocalDictGenMap(localDictionaryGeneratorMap);
    CarbonFactHandler dataHandler = null;
    boolean rowsNotExist = true;
    while (iterator.hasNext()) {
        if (rowsNotExist) {
            rowsNotExist = false;
            dataHandler = CarbonFactHandlerFactory.createCarbonFactHandler(model);
            this.carbonFactHandlers.add(dataHandler);
            dataHandler.initialise();
        }
        processBatch(iterator.next(), dataHandler, iteratorIndex);
    }
    try {
        if (!rowsNotExist) {
            finish(dataHandler, iteratorIndex);
        }
    } finally {
        carbonFactHandlers.remove(dataHandler);
    }
}
Also used : CarbonFactDataHandlerModel(org.apache.carbondata.processing.store.CarbonFactDataHandlerModel) CarbonFactHandler(org.apache.carbondata.processing.store.CarbonFactHandler) IndexWriterListener(org.apache.carbondata.processing.index.IndexWriterListener)

Example 2 with IndexWriterListener

use of org.apache.carbondata.processing.index.IndexWriterListener in project carbondata by apache.

the class CarbonFactDataHandlerModel method createCarbonFactDataHandlerModel.

/**
 * Create the model using @{@link CarbonDataLoadConfiguration}
 */
public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(CarbonDataLoadConfiguration configuration, String[] storeLocation, int bucketId, int taskExtension, IndexWriterListener listener) {
    CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
    CarbonTable carbonTable = configuration.getTableSpec().getCarbonTable();
    List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getVisibleDimensions(), carbonTable.getVisibleMeasures());
    SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchema);
    int complexDimensionCount = segmentProperties.getNumberOfComplexDimensions();
    int simpleDimsCount = segmentProperties.getNumberOfPrimitiveDimensions();
    int surrIndex = simpleDimsCount;
    Iterator<Map.Entry<String, GenericDataType>> complexMap = CarbonDataProcessorUtil.getComplexTypesMap(configuration.getDataFields(), configuration.getDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT).toString()).entrySet().iterator();
    Map<Integer, GenericDataType> complexIndexMap = new HashMap<>(complexDimensionCount);
    while (complexMap.hasNext()) {
        Map.Entry<String, GenericDataType> complexDataType = complexMap.next();
        complexDataType.getValue().setOutputArrayIndex(0);
        complexIndexMap.put(simpleDimsCount, complexDataType.getValue());
        simpleDimsCount++;
        List<GenericDataType> primitiveTypes = new ArrayList<GenericDataType>();
        complexDataType.getValue().getAllPrimitiveChildren(primitiveTypes);
        for (GenericDataType eachPrimitive : primitiveTypes) {
            if (eachPrimitive.getIsColumnDictionary()) {
                eachPrimitive.setSurrogateIndex(surrIndex++);
            }
        }
    }
    List<DataType> noDictDataTypesList = new ArrayList<>();
    for (DataField dataField : configuration.getDataFields()) {
        if (!dataField.isDateDataType() && dataField.getColumn().isDimension()) {
            noDictDataTypesList.add(dataField.getColumn().getDataType());
        }
    }
    CarbonDataFileAttributes carbonDataFileAttributes = new CarbonDataFileAttributes(configuration.getTaskNo(), (Long) configuration.getDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP));
    String carbonDataDirectoryPath = getCarbonDataFolderLocation(configuration);
    CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
    carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(configuration.getSchemaUpdatedTimeStamp());
    carbonFactDataHandlerModel.setDatabaseName(identifier.getDatabaseName());
    carbonFactDataHandlerModel.setTableName(identifier.getTableName());
    carbonFactDataHandlerModel.setStoreLocation(storeLocation);
    carbonFactDataHandlerModel.setNoDictDataTypesList(noDictDataTypesList);
    carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
    carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
    carbonFactDataHandlerModel.setMeasureDataType(configuration.getMeasureDataType());
    carbonFactDataHandlerModel.setNoDictAndComplexColumns(configuration.getNoDictAndComplexDimensions());
    carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
    carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes);
    carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
    carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
    carbonFactDataHandlerModel.bucketId = bucketId;
    carbonFactDataHandlerModel.segmentId = configuration.getSegmentId();
    carbonFactDataHandlerModel.taskExtension = taskExtension;
    carbonFactDataHandlerModel.tableSpec = configuration.getTableSpec();
    carbonFactDataHandlerModel.sortScope = CarbonDataProcessorUtil.getSortScope(configuration);
    carbonFactDataHandlerModel.columnCompressor = configuration.getColumnCompressor();
    if (listener == null) {
        listener = new IndexWriterListener();
        listener.registerAllWriter(configuration.getTableSpec().getCarbonTable(), configuration.getSegmentId(), CarbonTablePath.getShardName(carbonDataFileAttributes.getTaskId(), bucketId, taskExtension, String.valueOf(carbonDataFileAttributes.getFactTimeStamp()), configuration.getSegmentId()), segmentProperties);
    }
    carbonFactDataHandlerModel.indexWriterlistener = listener;
    carbonFactDataHandlerModel.writingCoresCount = configuration.getWritingCoresCount();
    carbonFactDataHandlerModel.initNumberOfCores();
    carbonFactDataHandlerModel.setMetrics(configuration.getMetrics());
    return carbonFactDataHandlerModel;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) DataField(org.apache.carbondata.processing.loading.DataField) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) DataType(org.apache.carbondata.core.metadata.datatype.DataType) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) HashMap(java.util.HashMap) Map(java.util.Map) IndexWriterListener(org.apache.carbondata.processing.index.IndexWriterListener)

Example 3 with IndexWriterListener

use of org.apache.carbondata.processing.index.IndexWriterListener in project carbondata by apache.

the class CarbonFactDataHandlerModel method getCarbonFactDataHandlerModel.

/**
 * This method will create a model object for carbon fact data handler
 *
 * @param loadModel
 * @return
 */
public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoadModel loadModel, CarbonTable carbonTable, SegmentProperties segmentProperties, String tableName, String[] tempStoreLocation, String carbonDataDirectoryPath) {
    // for dynamic page size in write step if varchar columns exist
    List<CarbonDimension> allDimensions = carbonTable.getVisibleDimensions();
    CarbonColumn[] noDicAndComplexColumns = new CarbonColumn[segmentProperties.getNumberOfNoDictionaryDimension() + segmentProperties.getComplexDimensions().size()];
    int noDicAndComp = 0;
    List<DataType> noDictDataTypesList = new ArrayList<>();
    for (CarbonDimension dim : allDimensions) {
        if (dim.getDataType() != DataTypes.DATE) {
            noDicAndComplexColumns[noDicAndComp++] = new CarbonColumn(dim.getColumnSchema(), dim.getOrdinal(), dim.getSchemaOrdinal());
            noDictDataTypesList.add(dim.getDataType());
        }
    }
    CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
    carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
    carbonFactDataHandlerModel.setDatabaseName(loadModel.getDatabaseName());
    carbonFactDataHandlerModel.setTableName(tableName);
    carbonFactDataHandlerModel.setStoreLocation(tempStoreLocation);
    carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
    carbonFactDataHandlerModel.setSegmentId(loadModel.getSegmentId());
    List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getVisibleDimensions(), carbonTable.getVisibleMeasures());
    carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
    carbonFactDataHandlerModel.setComplexIndexMap(convertComplexDimensionToComplexIndexMap(segmentProperties, loadModel.getSerializationNullFormat()));
    DataType[] measureDataTypes = new DataType[segmentProperties.getMeasures().size()];
    int i = 0;
    for (CarbonMeasure msr : segmentProperties.getMeasures()) {
        measureDataTypes[i++] = msr.getDataType();
    }
    carbonFactDataHandlerModel.setMeasureDataType(measureDataTypes);
    carbonFactDataHandlerModel.setNoDictAndComplexColumns(noDicAndComplexColumns);
    carbonFactDataHandlerModel.setNoDictDataTypesList(noDictDataTypesList);
    CarbonUtil.checkAndCreateFolderWithPermission(carbonDataDirectoryPath);
    carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
    carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
    carbonFactDataHandlerModel.setColumnCompressor(loadModel.getColumnCompressor());
    carbonFactDataHandlerModel.tableSpec = new TableSpec(carbonTable, false);
    IndexWriterListener listener = new IndexWriterListener();
    listener.registerAllWriter(carbonTable, loadModel.getSegmentId(), CarbonTablePath.getShardName(CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(loadModel.getTaskNo()), carbonFactDataHandlerModel.getBucketId(), carbonFactDataHandlerModel.getTaskExtension(), String.valueOf(loadModel.getFactTimeStamp()), loadModel.getSegmentId()), segmentProperties);
    carbonFactDataHandlerModel.indexWriterlistener = listener;
    carbonFactDataHandlerModel.initNumberOfCores();
    carbonFactDataHandlerModel.setColumnLocalDictGenMap(CarbonUtil.getLocalDictionaryModel(carbonTable));
    carbonFactDataHandlerModel.sortScope = carbonTable.getSortScope();
    carbonFactDataHandlerModel.setMetrics(loadModel.getMetrics());
    return carbonFactDataHandlerModel;
}
Also used : TableSpec(org.apache.carbondata.core.datastore.TableSpec) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) DataType(org.apache.carbondata.core.metadata.datatype.DataType) IndexWriterListener(org.apache.carbondata.processing.index.IndexWriterListener)

Aggregations

IndexWriterListener (org.apache.carbondata.processing.index.IndexWriterListener)3 ArrayList (java.util.ArrayList)2 DataType (org.apache.carbondata.core.metadata.datatype.DataType)2 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)2 GenericDataType (org.apache.carbondata.processing.datatypes.GenericDataType)2 HashMap (java.util.HashMap)1 Map (java.util.Map)1 TableSpec (org.apache.carbondata.core.datastore.TableSpec)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1 CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)1 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)1 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)1 DataField (org.apache.carbondata.processing.loading.DataField)1 CarbonFactDataHandlerModel (org.apache.carbondata.processing.store.CarbonFactDataHandlerModel)1 CarbonFactHandler (org.apache.carbondata.processing.store.CarbonFactHandler)1