use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class IndexWriterListener method onPageAdded.
/**
* Pick corresponding column pages and add to all registered index
*
* @param pageId sequence number of page, start from 0
* @param tablePage page data
*/
public void onPageAdded(int blockletId, int pageId, TablePage tablePage) throws IOException {
Set<Map.Entry<List<CarbonColumn>, List<IndexWriter>>> entries = registry.entrySet();
for (Map.Entry<List<CarbonColumn>, List<IndexWriter>> entry : entries) {
List<CarbonColumn> indexedColumns = entry.getKey();
ColumnPage[] pages = new ColumnPage[indexedColumns.size()];
for (int i = 0; i < indexedColumns.size(); i++) {
pages[i] = tablePage.getColumnPage(indexedColumns.get(i).getColName());
}
List<IndexWriter> writers = entry.getValue();
int pageSize = pages[0].getPageSize();
for (IndexWriter writer : writers) {
writer.onPageAdded(blockletId, pageId, pageSize, pages);
}
}
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class IndexWriterListener method register.
/**
* Register a IndexWriter
*/
private void register(IndexFactory factory, String segmentId, String taskNo, SegmentProperties segmentProperties) {
assert (factory != null);
assert (segmentId != null);
IndexMeta meta = factory.getMeta();
if (meta == null) {
// if index does not have meta, no need to register
return;
}
List<CarbonColumn> columns = factory.getMeta().getIndexedColumns();
List<IndexWriter> writers = registry.get(columns);
IndexWriter writer = null;
try {
writer = factory.createWriter(new Segment(segmentId), taskNo, segmentProperties);
} catch (IOException e) {
LOG.error("Failed to create IndexWriter: " + e.getMessage(), e);
throw new IndexWriterException(e);
}
if (writers != null) {
writers.add(writer);
} else {
writers = new ArrayList<>();
writers.add(writer);
registry.put(columns, writers);
}
LOG.info("IndexWriter " + writer + " added");
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class SparkDataTypeConverterImpl method convertCarbonSchemaToSparkSchema.
/**
* convert from CarbonColumn array to Spark's StructField array
*/
@Override
public Object[] convertCarbonSchemaToSparkSchema(CarbonColumn[] carbonColumns) {
StructField[] fields = new StructField[carbonColumns.length];
for (int i = 0; i < carbonColumns.length; i++) {
CarbonColumn carbonColumn = carbonColumns[i];
if (carbonColumn.isDimension()) {
if (carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator generator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(carbonColumn.getDataType());
fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(generator.getReturnType()), true, null);
} else if (!carbonColumn.hasEncoding(Encoding.DICTIONARY)) {
fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(carbonColumn.getDataType()), true, null);
} else if (carbonColumn.isComplex()) {
fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(carbonColumn.getDataType()), true, null);
} else {
fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(org.apache.carbondata.core.metadata.datatype.DataTypes.INT), true, null);
}
} else if (carbonColumn.isMeasure()) {
DataType dataType = carbonColumn.getDataType();
if (dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BOOLEAN || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.INT || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.LONG || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BINARY || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.VARCHAR) {
fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(dataType), true, null);
} else if (org.apache.carbondata.core.metadata.datatype.DataTypes.isDecimal(dataType)) {
CarbonMeasure measure = (CarbonMeasure) carbonColumn;
fields[i] = new StructField(carbonColumn.getColName(), new DecimalType(measure.getPrecision(), measure.getScale()), true, null);
} else {
fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(org.apache.carbondata.core.metadata.datatype.DataTypes.DOUBLE), true, null);
}
}
}
return fields;
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class StoreCreator method writeDictionary.
private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
String header = reader.readLine();
String[] split = header.split(",");
List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
List<CarbonDimension> dims = table.getDimensionByTableName(table.getFactTableName());
allCols.addAll(dims);
List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getFactTableName());
allCols.addAll(msrs);
Set<String>[] set = new HashSet[dims.size()];
for (int i = 0; i < set.length; i++) {
set[i] = new HashSet<String>();
}
String line = reader.readLine();
while (line != null) {
String[] data = line.split(",");
for (int i = 0; i < set.length; i++) {
set[i].add(data[i]);
}
line = reader.readLine();
}
Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, absoluteTableIdentifier.getStorePath());
for (int i = 0; i < set.length; i++) {
ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier);
for (String value : set[i]) {
writer.write(value);
}
writer.close();
writer.commit();
Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, dims.get(i).getDataType()));
CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
List<String> newDistinctValues = new ArrayList<String>();
CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, absoluteTableIdentifier.getStorePath());
try {
carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
} finally {
carbonDictionaryWriter.close();
}
}
reader.close();
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class DataLoadProcessBuilder method createConfiguration.
private CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel, String storeLocation) throws Exception {
if (!new File(storeLocation).mkdirs()) {
LOGGER.error("Error while creating the temp store path: " + storeLocation);
}
CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration();
String databaseName = loadModel.getDatabaseName();
String tableName = loadModel.getTableName();
String tempLocationKey = CarbonDataProcessorUtil.getTempStoreLocationKey(databaseName, tableName, loadModel.getTaskNo(), false);
CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, loadModel.getStorePath());
CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
configuration.setTableIdentifier(identifier);
configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
configuration.setHeader(loadModel.getCsvHeaderColumns());
configuration.setPartitionId(loadModel.getPartitionId());
configuration.setSegmentId(loadModel.getSegmentId());
configuration.setTaskNo(loadModel.getTaskNo());
configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, new String[] { loadModel.getComplexDelimiterLevel1(), loadModel.getComplexDelimiterLevel2() });
configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp());
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, loadModel.getBatchSortSizeInMb());
CarbonMetadata.getInstance().addCarbonTable(carbonTable);
List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getFactTableName());
List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getFactTableName());
Map<String, String> dateFormatMap = CarbonDataProcessorUtil.getDateFormatMap(loadModel.getDateFormat());
List<DataField> dataFields = new ArrayList<>();
List<DataField> complexDataFields = new ArrayList<>();
// And then add complex data types and measures.
for (CarbonColumn column : dimensions) {
DataField dataField = new DataField(column);
dataField.setDateFormat(dateFormatMap.get(column.getColName()));
if (column.isComplex()) {
complexDataFields.add(dataField);
} else {
dataFields.add(dataField);
}
}
dataFields.addAll(complexDataFields);
for (CarbonColumn column : measures) {
// This dummy measure is added when no measure was present. We no need to load it.
if (!(column.getColName().equals("default_dummy_measure"))) {
dataFields.add(new DataField(column));
}
}
configuration.setDataFields(dataFields.toArray(new DataField[dataFields.size()]));
configuration.setBucketingInfo(carbonTable.getBucketingInfo(carbonTable.getFactTableName()));
// configuration for one pass load: dictionary server info
configuration.setUseOnePass(loadModel.getUseOnePass());
configuration.setDictionaryServerHost(loadModel.getDictionaryServerHost());
configuration.setDictionaryServerPort(loadModel.getDictionaryServerPort());
configuration.setPreFetch(loadModel.isPreFetch());
configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns());
configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns());
return configuration;
}
Aggregations