use of org.apache.carbondata.processing.index.IndexWriterListener in project carbondata by apache.
the class CarbonRowDataWriterProcessorStepImpl method doExecute.
private void doExecute(Iterator<CarbonRowBatch> iterator, int iteratorIndex) {
String[] storeLocation = getStoreLocation();
IndexWriterListener listener = getIndexWriterListener(0);
CarbonFactDataHandlerModel model = CarbonFactDataHandlerModel.createCarbonFactDataHandlerModel(configuration, storeLocation, 0, iteratorIndex, listener);
model.setColumnLocalDictGenMap(localDictionaryGeneratorMap);
CarbonFactHandler dataHandler = null;
boolean rowsNotExist = true;
while (iterator.hasNext()) {
if (rowsNotExist) {
rowsNotExist = false;
dataHandler = CarbonFactHandlerFactory.createCarbonFactHandler(model);
this.carbonFactHandlers.add(dataHandler);
dataHandler.initialise();
}
processBatch(iterator.next(), dataHandler, iteratorIndex);
}
try {
if (!rowsNotExist) {
finish(dataHandler, iteratorIndex);
}
} finally {
carbonFactHandlers.remove(dataHandler);
}
}
use of org.apache.carbondata.processing.index.IndexWriterListener in project carbondata by apache.
the class CarbonFactDataHandlerModel method createCarbonFactDataHandlerModel.
/**
* Create the model using @{@link CarbonDataLoadConfiguration}
*/
public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(CarbonDataLoadConfiguration configuration, String[] storeLocation, int bucketId, int taskExtension, IndexWriterListener listener) {
CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
CarbonTable carbonTable = configuration.getTableSpec().getCarbonTable();
List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getVisibleDimensions(), carbonTable.getVisibleMeasures());
SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchema);
int complexDimensionCount = segmentProperties.getNumberOfComplexDimensions();
int simpleDimsCount = segmentProperties.getNumberOfPrimitiveDimensions();
int surrIndex = simpleDimsCount;
Iterator<Map.Entry<String, GenericDataType>> complexMap = CarbonDataProcessorUtil.getComplexTypesMap(configuration.getDataFields(), configuration.getDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT).toString()).entrySet().iterator();
Map<Integer, GenericDataType> complexIndexMap = new HashMap<>(complexDimensionCount);
while (complexMap.hasNext()) {
Map.Entry<String, GenericDataType> complexDataType = complexMap.next();
complexDataType.getValue().setOutputArrayIndex(0);
complexIndexMap.put(simpleDimsCount, complexDataType.getValue());
simpleDimsCount++;
List<GenericDataType> primitiveTypes = new ArrayList<GenericDataType>();
complexDataType.getValue().getAllPrimitiveChildren(primitiveTypes);
for (GenericDataType eachPrimitive : primitiveTypes) {
if (eachPrimitive.getIsColumnDictionary()) {
eachPrimitive.setSurrogateIndex(surrIndex++);
}
}
}
List<DataType> noDictDataTypesList = new ArrayList<>();
for (DataField dataField : configuration.getDataFields()) {
if (!dataField.isDateDataType() && dataField.getColumn().isDimension()) {
noDictDataTypesList.add(dataField.getColumn().getDataType());
}
}
CarbonDataFileAttributes carbonDataFileAttributes = new CarbonDataFileAttributes(configuration.getTaskNo(), (Long) configuration.getDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP));
String carbonDataDirectoryPath = getCarbonDataFolderLocation(configuration);
CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(configuration.getSchemaUpdatedTimeStamp());
carbonFactDataHandlerModel.setDatabaseName(identifier.getDatabaseName());
carbonFactDataHandlerModel.setTableName(identifier.getTableName());
carbonFactDataHandlerModel.setStoreLocation(storeLocation);
carbonFactDataHandlerModel.setNoDictDataTypesList(noDictDataTypesList);
carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
carbonFactDataHandlerModel.setMeasureDataType(configuration.getMeasureDataType());
carbonFactDataHandlerModel.setNoDictAndComplexColumns(configuration.getNoDictAndComplexDimensions());
carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes);
carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
carbonFactDataHandlerModel.bucketId = bucketId;
carbonFactDataHandlerModel.segmentId = configuration.getSegmentId();
carbonFactDataHandlerModel.taskExtension = taskExtension;
carbonFactDataHandlerModel.tableSpec = configuration.getTableSpec();
carbonFactDataHandlerModel.sortScope = CarbonDataProcessorUtil.getSortScope(configuration);
carbonFactDataHandlerModel.columnCompressor = configuration.getColumnCompressor();
if (listener == null) {
listener = new IndexWriterListener();
listener.registerAllWriter(configuration.getTableSpec().getCarbonTable(), configuration.getSegmentId(), CarbonTablePath.getShardName(carbonDataFileAttributes.getTaskId(), bucketId, taskExtension, String.valueOf(carbonDataFileAttributes.getFactTimeStamp()), configuration.getSegmentId()), segmentProperties);
}
carbonFactDataHandlerModel.indexWriterlistener = listener;
carbonFactDataHandlerModel.writingCoresCount = configuration.getWritingCoresCount();
carbonFactDataHandlerModel.initNumberOfCores();
carbonFactDataHandlerModel.setMetrics(configuration.getMetrics());
return carbonFactDataHandlerModel;
}
use of org.apache.carbondata.processing.index.IndexWriterListener in project carbondata by apache.
the class CarbonFactDataHandlerModel method getCarbonFactDataHandlerModel.
/**
* This method will create a model object for carbon fact data handler
*
* @param loadModel
* @return
*/
public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoadModel loadModel, CarbonTable carbonTable, SegmentProperties segmentProperties, String tableName, String[] tempStoreLocation, String carbonDataDirectoryPath) {
// for dynamic page size in write step if varchar columns exist
List<CarbonDimension> allDimensions = carbonTable.getVisibleDimensions();
CarbonColumn[] noDicAndComplexColumns = new CarbonColumn[segmentProperties.getNumberOfNoDictionaryDimension() + segmentProperties.getComplexDimensions().size()];
int noDicAndComp = 0;
List<DataType> noDictDataTypesList = new ArrayList<>();
for (CarbonDimension dim : allDimensions) {
if (dim.getDataType() != DataTypes.DATE) {
noDicAndComplexColumns[noDicAndComp++] = new CarbonColumn(dim.getColumnSchema(), dim.getOrdinal(), dim.getSchemaOrdinal());
noDictDataTypesList.add(dim.getDataType());
}
}
CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
carbonFactDataHandlerModel.setDatabaseName(loadModel.getDatabaseName());
carbonFactDataHandlerModel.setTableName(tableName);
carbonFactDataHandlerModel.setStoreLocation(tempStoreLocation);
carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
carbonFactDataHandlerModel.setSegmentId(loadModel.getSegmentId());
List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getVisibleDimensions(), carbonTable.getVisibleMeasures());
carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
carbonFactDataHandlerModel.setComplexIndexMap(convertComplexDimensionToComplexIndexMap(segmentProperties, loadModel.getSerializationNullFormat()));
DataType[] measureDataTypes = new DataType[segmentProperties.getMeasures().size()];
int i = 0;
for (CarbonMeasure msr : segmentProperties.getMeasures()) {
measureDataTypes[i++] = msr.getDataType();
}
carbonFactDataHandlerModel.setMeasureDataType(measureDataTypes);
carbonFactDataHandlerModel.setNoDictAndComplexColumns(noDicAndComplexColumns);
carbonFactDataHandlerModel.setNoDictDataTypesList(noDictDataTypesList);
CarbonUtil.checkAndCreateFolderWithPermission(carbonDataDirectoryPath);
carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
carbonFactDataHandlerModel.setColumnCompressor(loadModel.getColumnCompressor());
carbonFactDataHandlerModel.tableSpec = new TableSpec(carbonTable, false);
IndexWriterListener listener = new IndexWriterListener();
listener.registerAllWriter(carbonTable, loadModel.getSegmentId(), CarbonTablePath.getShardName(CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(loadModel.getTaskNo()), carbonFactDataHandlerModel.getBucketId(), carbonFactDataHandlerModel.getTaskExtension(), String.valueOf(loadModel.getFactTimeStamp()), loadModel.getSegmentId()), segmentProperties);
carbonFactDataHandlerModel.indexWriterlistener = listener;
carbonFactDataHandlerModel.initNumberOfCores();
carbonFactDataHandlerModel.setColumnLocalDictGenMap(CarbonUtil.getLocalDictionaryModel(carbonTable));
carbonFactDataHandlerModel.sortScope = carbonTable.getSortScope();
carbonFactDataHandlerModel.setMetrics(loadModel.getMetrics());
return carbonFactDataHandlerModel;
}
Aggregations