use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonDataProcessorUtil method getLocalDataFolderLocation.
/**
* This method will form the local data folder store location
*
* @param databaseName
* @param tableName
* @param taskId
* @param partitionId
* @param segmentId
* @return
*/
public static String getLocalDataFolderLocation(String databaseName, String tableName, String taskId, String partitionId, String segmentId, boolean isCompactionFlow) {
String tempLocationKey = getTempStoreLocationKey(databaseName, tableName, taskId, isCompactionFlow);
String baseStorePath = CarbonProperties.getInstance().getProperty(tempLocationKey, CarbonCommonConstants.STORE_LOCATION_DEFAULT_VAL);
CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName + CarbonCommonConstants.UNDERSCORE + tableName);
CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(baseStorePath, carbonTable.getCarbonTableIdentifier());
String carbonDataDirectoryPath = carbonTablePath.getCarbonDataDirectoryPath(partitionId, segmentId + "");
return carbonDataDirectoryPath + File.separator + taskId;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonFactDataHandlerModel method createCarbonFactDataHandlerModel.
/**
* Create the model using @{@link CarbonDataLoadConfiguration}
*/
public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(CarbonDataLoadConfiguration configuration, String storeLocation, int bucketId, int taskExtension) {
CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
boolean[] isUseInvertedIndex = CarbonDataProcessorUtil.getIsUseInvertedIndex(configuration.getDataFields());
int[] dimLensWithComplex = configuration.getCardinalityFinder().getCardinality();
if (!configuration.isSortTable()) {
for (int i = 0; i < dimLensWithComplex.length; i++) {
if (dimLensWithComplex[i] != 0) {
dimLensWithComplex[i] = Integer.MAX_VALUE;
}
}
}
CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(identifier.getDatabaseName() + CarbonCommonConstants.UNDERSCORE + identifier.getTableName());
List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(identifier.getTableName()), carbonTable.getMeasureByTableName(identifier.getTableName()));
int[] colCardinality = CarbonUtil.getFormattedCardinality(dimLensWithComplex, wrapperColumnSchema);
SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchema, colCardinality);
int[] dimLens = configuration.calcDimensionLengths();
int dimensionCount = configuration.getDimensionCount();
int noDictionaryCount = configuration.getNoDictionaryCount();
int complexDimensionCount = configuration.getComplexColumnCount();
int measureCount = configuration.getMeasureCount();
int simpleDimsCount = dimensionCount - noDictionaryCount - complexDimensionCount;
int[] simpleDimsLen = new int[simpleDimsCount];
for (int i = 0; i < simpleDimsCount; i++) {
simpleDimsLen[i] = dimLens[i];
}
//To Set MDKey Index of each primitive type in complex type
int surrIndex = simpleDimsCount;
Iterator<Map.Entry<String, GenericDataType>> complexMap = CarbonDataProcessorUtil.getComplexTypesMap(configuration.getDataFields()).entrySet().iterator();
Map<Integer, GenericDataType> complexIndexMap = new HashMap<>(complexDimensionCount);
while (complexMap.hasNext()) {
Map.Entry<String, GenericDataType> complexDataType = complexMap.next();
complexDataType.getValue().setOutputArrayIndex(0);
complexIndexMap.put(simpleDimsCount, complexDataType.getValue());
simpleDimsCount++;
List<GenericDataType> primitiveTypes = new ArrayList<GenericDataType>();
complexDataType.getValue().getAllPrimitiveChildren(primitiveTypes);
for (GenericDataType eachPrimitive : primitiveTypes) {
eachPrimitive.setSurrogateIndex(surrIndex++);
}
}
CarbonDataFileAttributes carbonDataFileAttributes = new CarbonDataFileAttributes(Integer.parseInt(configuration.getTaskNo()), (Long) configuration.getDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP));
String carbonDataDirectoryPath = getCarbonDataFolderLocation(configuration);
CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(configuration.getSchemaUpdatedTimeStamp());
carbonFactDataHandlerModel.setDatabaseName(identifier.getDatabaseName());
carbonFactDataHandlerModel.setTableName(identifier.getTableName());
carbonFactDataHandlerModel.setMeasureCount(measureCount);
carbonFactDataHandlerModel.setStoreLocation(storeLocation);
carbonFactDataHandlerModel.setDimLens(dimLens);
carbonFactDataHandlerModel.setNoDictionaryCount(noDictionaryCount);
carbonFactDataHandlerModel.setDimensionCount(configuration.getDimensionCount() - noDictionaryCount);
carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
carbonFactDataHandlerModel.setColCardinality(colCardinality);
carbonFactDataHandlerModel.setMeasureDataType(configuration.getMeasureDataType());
carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
carbonFactDataHandlerModel.setPrimitiveDimLens(simpleDimsLen);
carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes);
carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndex);
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
carbonFactDataHandlerModel.setComplexDimensionKeyGenerator(configuration.createKeyGeneratorForComplexDimension());
carbonFactDataHandlerModel.bucketId = bucketId;
carbonFactDataHandlerModel.segmentId = configuration.getSegmentId();
carbonFactDataHandlerModel.taskExtension = taskExtension;
return carbonFactDataHandlerModel;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class DataLoadProcessBuilder method createConfiguration.
private CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel, String storeLocation) throws Exception {
if (!new File(storeLocation).mkdirs()) {
LOGGER.error("Error while creating the temp store path: " + storeLocation);
}
CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration();
String databaseName = loadModel.getDatabaseName();
String tableName = loadModel.getTableName();
String tempLocationKey = CarbonDataProcessorUtil.getTempStoreLocationKey(databaseName, tableName, loadModel.getTaskNo(), false);
CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, loadModel.getStorePath());
CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
configuration.setTableIdentifier(identifier);
configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
configuration.setHeader(loadModel.getCsvHeaderColumns());
configuration.setPartitionId(loadModel.getPartitionId());
configuration.setSegmentId(loadModel.getSegmentId());
configuration.setTaskNo(loadModel.getTaskNo());
configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, new String[] { loadModel.getComplexDelimiterLevel1(), loadModel.getComplexDelimiterLevel2() });
configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp());
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, loadModel.getBatchSortSizeInMb());
CarbonMetadata.getInstance().addCarbonTable(carbonTable);
List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getFactTableName());
List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getFactTableName());
Map<String, String> dateFormatMap = CarbonDataProcessorUtil.getDateFormatMap(loadModel.getDateFormat());
List<DataField> dataFields = new ArrayList<>();
List<DataField> complexDataFields = new ArrayList<>();
// And then add complex data types and measures.
for (CarbonColumn column : dimensions) {
DataField dataField = new DataField(column);
dataField.setDateFormat(dateFormatMap.get(column.getColName()));
if (column.isComplex()) {
complexDataFields.add(dataField);
} else {
dataFields.add(dataField);
}
}
dataFields.addAll(complexDataFields);
for (CarbonColumn column : measures) {
// This dummy measure is added when no measure was present. We no need to load it.
if (!(column.getColName().equals("default_dummy_measure"))) {
dataFields.add(new DataField(column));
}
}
configuration.setDataFields(dataFields.toArray(new DataField[dataFields.size()]));
configuration.setBucketingInfo(carbonTable.getBucketingInfo(carbonTable.getFactTableName()));
// configuration for one pass load: dictionary server info
configuration.setUseOnePass(loadModel.getUseOnePass());
configuration.setDictionaryServerHost(loadModel.getDictionaryServerHost());
configuration.setDictionaryServerPort(loadModel.getDictionaryServerPort());
configuration.setPreFetch(loadModel.isPreFetch());
configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns());
configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns());
return configuration;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonInputFormat method populateCarbonTable.
/**
* this method will read the schema from the physical file and populate into CARBON_TABLE
* @param configuration
* @throws IOException
*/
private static void populateCarbonTable(Configuration configuration) throws IOException {
String dirs = configuration.get(INPUT_DIR, "");
String[] inputPaths = StringUtils.split(dirs);
if (inputPaths.length == 0) {
throw new InvalidPathException("No input paths specified in job");
}
AbsoluteTableIdentifier absoluteTableIdentifier = AbsoluteTableIdentifier.fromTablePath(inputPaths[0]);
// read the schema file to get the absoluteTableIdentifier having the correct table id
// persisted in the schema
CarbonTable carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier);
setCarbonTable(configuration, carbonTable);
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonFactDataHandlerModel method createCarbonFactDataHandlerModel.
/**
* Create the model using @{@link CarbonDataLoadConfiguration}
*/
public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(CarbonDataLoadConfiguration configuration, String[] storeLocation, int bucketId, int taskExtension) {
CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
boolean[] isUseInvertedIndex = CarbonDataProcessorUtil.getIsUseInvertedIndex(configuration.getDataFields());
int[] dimLensWithComplex = configuration.getCardinalityFinder().getCardinality();
if (!configuration.isSortTable()) {
for (int i = 0; i < dimLensWithComplex.length; i++) {
if (dimLensWithComplex[i] != 0) {
dimLensWithComplex[i] = Integer.MAX_VALUE;
}
}
}
CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(identifier.getDatabaseName(), identifier.getTableName());
List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(identifier.getTableName()), carbonTable.getMeasureByTableName(identifier.getTableName()));
int[] colCardinality = CarbonUtil.getFormattedCardinality(dimLensWithComplex, wrapperColumnSchema);
SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchema, colCardinality);
int[] dimLens = configuration.calcDimensionLengths();
int dimensionCount = configuration.getDimensionCount();
int noDictionaryCount = configuration.getNoDictionaryCount();
int complexDimensionCount = configuration.getComplexColumnCount();
int measureCount = configuration.getMeasureCount();
int simpleDimsCount = dimensionCount - noDictionaryCount - complexDimensionCount;
int[] simpleDimsLen = new int[simpleDimsCount];
for (int i = 0; i < simpleDimsCount; i++) {
simpleDimsLen[i] = dimLens[i];
}
// To Set MDKey Index of each primitive type in complex type
int surrIndex = simpleDimsCount;
Iterator<Map.Entry<String, GenericDataType>> complexMap = CarbonDataProcessorUtil.getComplexTypesMap(configuration.getDataFields()).entrySet().iterator();
Map<Integer, GenericDataType> complexIndexMap = new HashMap<>(complexDimensionCount);
while (complexMap.hasNext()) {
Map.Entry<String, GenericDataType> complexDataType = complexMap.next();
complexDataType.getValue().setOutputArrayIndex(0);
complexIndexMap.put(simpleDimsCount, complexDataType.getValue());
simpleDimsCount++;
List<GenericDataType> primitiveTypes = new ArrayList<GenericDataType>();
complexDataType.getValue().getAllPrimitiveChildren(primitiveTypes);
for (GenericDataType eachPrimitive : primitiveTypes) {
eachPrimitive.setSurrogateIndex(surrIndex++);
}
}
CarbonDataFileAttributes carbonDataFileAttributes = new CarbonDataFileAttributes(Long.parseLong(configuration.getTaskNo()), (Long) configuration.getDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP));
String carbonDataDirectoryPath = getCarbonDataFolderLocation(configuration);
CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(configuration.getSchemaUpdatedTimeStamp());
carbonFactDataHandlerModel.setDatabaseName(identifier.getDatabaseName());
carbonFactDataHandlerModel.setTableName(identifier.getTableName());
carbonFactDataHandlerModel.setMeasureCount(measureCount);
carbonFactDataHandlerModel.setStoreLocation(storeLocation);
carbonFactDataHandlerModel.setDimLens(dimLens);
carbonFactDataHandlerModel.setNoDictionaryCount(noDictionaryCount);
carbonFactDataHandlerModel.setDimensionCount(configuration.getDimensionCount() - noDictionaryCount);
carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
carbonFactDataHandlerModel.setColCardinality(colCardinality);
carbonFactDataHandlerModel.setMeasureDataType(configuration.getMeasureDataType());
carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
carbonFactDataHandlerModel.setPrimitiveDimLens(simpleDimsLen);
carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes);
carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndex);
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
carbonFactDataHandlerModel.setComplexDimensionKeyGenerator(configuration.createKeyGeneratorForComplexDimension());
carbonFactDataHandlerModel.bucketId = bucketId;
carbonFactDataHandlerModel.segmentId = configuration.getSegmentId();
carbonFactDataHandlerModel.taskExtension = taskExtension;
carbonFactDataHandlerModel.tableSpec = configuration.getTableSpec();
carbonFactDataHandlerModel.sortScope = CarbonDataProcessorUtil.getSortScope(configuration);
DataMapWriterListener listener = new DataMapWriterListener();
listener.registerAllWriter(configuration.getTableSpec().getCarbonTable(), configuration.getSegmentId(), storeLocation[new Random().nextInt(storeLocation.length)]);
carbonFactDataHandlerModel.dataMapWriterlistener = listener;
carbonFactDataHandlerModel.writingCoresCount = configuration.getWritingCoresCount();
return carbonFactDataHandlerModel;
}
Aggregations