use of org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure in project carbondata by apache.
the class StoreCreator method writeDictionary.
private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
String header = reader.readLine();
String[] split = header.split(",");
List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
List<CarbonDimension> dims = table.getDimensionByTableName(table.getFactTableName());
allCols.addAll(dims);
List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getFactTableName());
allCols.addAll(msrs);
Set<String>[] set = new HashSet[dims.size()];
for (int i = 0; i < set.length; i++) {
set[i] = new HashSet<String>();
}
String line = reader.readLine();
while (line != null) {
String[] data = line.split(",");
for (int i = 0; i < set.length; i++) {
set[i].add(data[i]);
}
line = reader.readLine();
}
Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, absoluteTableIdentifier.getStorePath());
for (int i = 0; i < set.length; i++) {
ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier);
for (String value : set[i]) {
writer.write(value);
}
writer.close();
writer.commit();
Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, dims.get(i).getDataType()));
CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
List<String> newDistinctValues = new ArrayList<String>();
CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, absoluteTableIdentifier.getStorePath());
try {
carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
} finally {
carbonDictionaryWriter.close();
}
}
reader.close();
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure in project carbondata by apache.
the class CarbonFactDataHandlerModel method getCarbonFactDataHandlerModel.
/**
* This method will create a model object for carbon fact data handler
*
* @param loadModel
* @return
*/
public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoadModel loadModel, CarbonTable carbonTable, SegmentProperties segmentProperties, String tableName, String tempStoreLocation) {
CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
carbonFactDataHandlerModel.setDatabaseName(loadModel.getDatabaseName());
carbonFactDataHandlerModel.setTableName(tableName);
carbonFactDataHandlerModel.setMeasureCount(segmentProperties.getMeasures().size());
carbonFactDataHandlerModel.setStoreLocation(tempStoreLocation);
carbonFactDataHandlerModel.setDimLens(segmentProperties.getDimColumnsCardinality());
carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
carbonFactDataHandlerModel.setNoDictionaryCount(segmentProperties.getNumberOfNoDictionaryDimension());
carbonFactDataHandlerModel.setDimensionCount(segmentProperties.getDimensions().size() - carbonFactDataHandlerModel.getNoDictionaryCount());
List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(tableName), carbonTable.getMeasureByTableName(tableName));
carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
// get the cardinality for all all the columns including no dictionary columns
int[] formattedCardinality = CarbonUtil.getFormattedCardinality(segmentProperties.getDimColumnsCardinality(), wrapperColumnSchema);
carbonFactDataHandlerModel.setColCardinality(formattedCardinality);
//TO-DO Need to handle complex types here .
Map<Integer, GenericDataType> complexIndexMap = new HashMap<Integer, GenericDataType>(segmentProperties.getComplexDimensions().size());
carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
DataType[] aggType = new DataType[segmentProperties.getMeasures().size()];
int i = 0;
for (CarbonMeasure msr : segmentProperties.getMeasures()) {
aggType[i++] = msr.getDataType();
}
carbonFactDataHandlerModel.setMeasureDataType(aggType);
String carbonDataDirectoryPath = CarbonDataProcessorUtil.checkAndCreateCarbonStoreLocation(loadModel.getStorePath(), loadModel.getDatabaseName(), tableName, loadModel.getPartitionId(), loadModel.getSegmentId());
carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
List<CarbonDimension> dimensionByTableName = loadModel.getCarbonDataLoadSchema().getCarbonTable().getDimensionByTableName(tableName);
boolean[] isUseInvertedIndexes = new boolean[dimensionByTableName.size()];
int index = 0;
for (CarbonDimension dimension : dimensionByTableName) {
isUseInvertedIndexes[index++] = dimension.isUseInvertedIndex();
}
carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndexes);
carbonFactDataHandlerModel.setPrimitiveDimLens(segmentProperties.getDimColumnsCardinality());
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
return carbonFactDataHandlerModel;
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure in project carbondata by apache.
the class QueryModel method setDimAndMsrColumnNode.
private static void setDimAndMsrColumnNode(List<CarbonDimension> dimensions, List<CarbonMeasure> measures, ColumnExpression col) {
CarbonDimension dim;
CarbonMeasure msr;
String columnName;
columnName = col.getColumnName();
dim = CarbonUtil.findDimension(dimensions, columnName);
col.setCarbonColumn(dim);
col.setDimension(dim);
col.setDimension(true);
if (null == dim) {
msr = getCarbonMetadataMeasure(columnName, measures);
col.setCarbonColumn(msr);
col.setDimension(false);
}
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure in project carbondata by apache.
the class CarbonCompactionUtil method updateColumnSchemaAndGetCardinality.
/**
* This method will return the updated cardinality according to the master schema
*
* @param columnCardinalityMap
* @param carbonTable
* @param updatedColumnSchemaList
* @return
*/
public static int[] updateColumnSchemaAndGetCardinality(Map<String, Integer> columnCardinalityMap, CarbonTable carbonTable, List<ColumnSchema> updatedColumnSchemaList) {
List<CarbonDimension> masterDimensions = carbonTable.getDimensionByTableName(carbonTable.getFactTableName());
List<Integer> updatedCardinalityList = new ArrayList<>(columnCardinalityMap.size());
for (CarbonDimension dimension : masterDimensions) {
Integer value = columnCardinalityMap.get(dimension.getColumnId());
if (null == value) {
updatedCardinalityList.add(getDimensionDefaultCardinality(dimension));
} else {
updatedCardinalityList.add(value);
}
updatedColumnSchemaList.add(dimension.getColumnSchema());
}
// add measures to the column schema list
List<CarbonMeasure> masterSchemaMeasures = carbonTable.getMeasureByTableName(carbonTable.getFactTableName());
for (CarbonMeasure measure : masterSchemaMeasures) {
updatedColumnSchemaList.add(measure.getColumnSchema());
}
return ArrayUtils.toPrimitive(updatedCardinalityList.toArray(new Integer[updatedCardinalityList.size()]));
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure in project carbondata by apache.
the class DataLoadProcessBuilder method createConfiguration.
private CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel, String storeLocation) throws Exception {
if (!new File(storeLocation).mkdirs()) {
LOGGER.error("Error while creating the temp store path: " + storeLocation);
}
CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration();
String databaseName = loadModel.getDatabaseName();
String tableName = loadModel.getTableName();
String tempLocationKey = CarbonDataProcessorUtil.getTempStoreLocationKey(databaseName, tableName, loadModel.getTaskNo(), false);
CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, loadModel.getStorePath());
CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
configuration.setTableIdentifier(identifier);
configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
configuration.setHeader(loadModel.getCsvHeaderColumns());
configuration.setPartitionId(loadModel.getPartitionId());
configuration.setSegmentId(loadModel.getSegmentId());
configuration.setTaskNo(loadModel.getTaskNo());
configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, new String[] { loadModel.getComplexDelimiterLevel1(), loadModel.getComplexDelimiterLevel2() });
configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp());
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, loadModel.getBatchSortSizeInMb());
CarbonMetadata.getInstance().addCarbonTable(carbonTable);
List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getFactTableName());
List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getFactTableName());
Map<String, String> dateFormatMap = CarbonDataProcessorUtil.getDateFormatMap(loadModel.getDateFormat());
List<DataField> dataFields = new ArrayList<>();
List<DataField> complexDataFields = new ArrayList<>();
// And then add complex data types and measures.
for (CarbonColumn column : dimensions) {
DataField dataField = new DataField(column);
dataField.setDateFormat(dateFormatMap.get(column.getColName()));
if (column.isComplex()) {
complexDataFields.add(dataField);
} else {
dataFields.add(dataField);
}
}
dataFields.addAll(complexDataFields);
for (CarbonColumn column : measures) {
// This dummy measure is added when no measure was present. We no need to load it.
if (!(column.getColName().equals("default_dummy_measure"))) {
dataFields.add(new DataField(column));
}
}
configuration.setDataFields(dataFields.toArray(new DataField[dataFields.size()]));
configuration.setBucketingInfo(carbonTable.getBucketingInfo(carbonTable.getFactTableName()));
// configuration for one pass load: dictionary server info
configuration.setUseOnePass(loadModel.getUseOnePass());
configuration.setDictionaryServerHost(loadModel.getDictionaryServerHost());
configuration.setDictionaryServerPort(loadModel.getDictionaryServerPort());
configuration.setPreFetch(loadModel.isPreFetch());
configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns());
configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns());
return configuration;
}
Aggregations