use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class StoreCreator method writeDictionary.
private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
String header = reader.readLine();
String[] split = header.split(",");
List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
List<CarbonDimension> dims = table.getDimensionByTableName(table.getTableName());
allCols.addAll(dims);
List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getTableName());
allCols.addAll(msrs);
Set<String>[] set = new HashSet[dims.size()];
for (int i = 0; i < set.length; i++) {
set[i] = new HashSet<String>();
}
String line = reader.readLine();
while (line != null) {
String[] data = line.split(",");
for (int i = 0; i < set.length; i++) {
set[i].add(data[i]);
}
line = reader.readLine();
}
Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
for (int i = 0; i < set.length; i++) {
ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(table.getAbsoluteTableIdentifier(), columnIdentifier, columnIdentifier.getDataType());
CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(dictionaryColumnUniqueIdentifier);
for (String value : set[i]) {
writer.write(value);
}
writer.close();
writer.commit();
Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(identifier, columnIdentifier, dims.get(i).getDataType()));
CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
List<String> newDistinctValues = new ArrayList<String>();
CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier);
try {
carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
} finally {
carbonDictionaryWriter.close();
}
}
reader.close();
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class CarbonStreamRecordReader method initialize.
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
// input
if (split instanceof CarbonInputSplit) {
fileSplit = (CarbonInputSplit) split;
} else if (split instanceof CarbonMultiBlockSplit) {
fileSplit = ((CarbonMultiBlockSplit) split).getAllSplits().get(0);
} else {
fileSplit = (FileSplit) split;
}
// metadata
hadoopConf = context.getConfiguration();
if (model == null) {
CarbonTableInputFormat format = new CarbonTableInputFormat<Object>();
model = format.createQueryModel(split, context);
}
carbonTable = model.getTable();
List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getTableName());
dimensionCount = dimensions.size();
List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getTableName());
measureCount = measures.size();
List<CarbonColumn> carbonColumnList = carbonTable.getStreamStorageOrderColumn(carbonTable.getTableName());
storageColumns = carbonColumnList.toArray(new CarbonColumn[carbonColumnList.size()]);
isNoDictColumn = CarbonDataProcessorUtil.getNoDictionaryMapping(storageColumns);
directDictionaryGenerators = new DirectDictionaryGenerator[storageColumns.length];
for (int i = 0; i < storageColumns.length; i++) {
if (storageColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY)) {
directDictionaryGenerators[i] = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(storageColumns[i].getDataType());
}
}
measureDataTypes = new DataType[measureCount];
for (int i = 0; i < measureCount; i++) {
measureDataTypes[i] = storageColumns[dimensionCount + i].getDataType();
}
// decode data
allNonNull = new BitSet(storageColumns.length);
projection = model.getProjectionColumns();
isRequired = new boolean[storageColumns.length];
boolean[] isFiltlerDimensions = model.getIsFilterDimensions();
boolean[] isFiltlerMeasures = model.getIsFilterMeasures();
isFilterRequired = new boolean[storageColumns.length];
filterMap = new int[storageColumns.length];
for (int i = 0; i < storageColumns.length; i++) {
if (storageColumns[i].isDimension()) {
if (isFiltlerDimensions[storageColumns[i].getOrdinal()]) {
isRequired[i] = true;
isFilterRequired[i] = true;
filterMap[i] = storageColumns[i].getOrdinal();
}
} else {
if (isFiltlerMeasures[storageColumns[i].getOrdinal()]) {
isRequired[i] = true;
isFilterRequired[i] = true;
filterMap[i] = carbonTable.getDimensionOrdinalMax() + storageColumns[i].getOrdinal();
}
}
}
isProjectionRequired = new boolean[storageColumns.length];
projectionMap = new int[storageColumns.length];
for (int i = 0; i < storageColumns.length; i++) {
for (int j = 0; j < projection.length; j++) {
if (storageColumns[i].getColName().equals(projection[j].getColName())) {
isRequired[i] = true;
isProjectionRequired[i] = true;
projectionMap[i] = j;
break;
}
}
}
// initialize filter
if (null != model.getFilterExpressionResolverTree()) {
initializeFilter();
} else if (projection.length == 0) {
skipScanData = true;
}
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class CarbonLoadModelBuilder method build.
/**
* build CarbonLoadModel for data loading
* @param options Load options from user input
* @return a new CarbonLoadModel instance
*/
public CarbonLoadModel build(Map<String, String> options) throws InvalidLoadOptionException, IOException {
Map<String, String> optionsFinal = LoadOption.fillOptionWithDefaultValue(options);
if (!options.containsKey("fileheader")) {
List<CarbonColumn> csvHeader = table.getCreateOrderColumn(table.getTableName());
String[] columns = new String[csvHeader.size()];
for (int i = 0; i < columns.length; i++) {
columns[i] = csvHeader.get(i).getColName();
}
optionsFinal.put("fileheader", Strings.mkString(columns, ","));
}
CarbonLoadModel model = new CarbonLoadModel();
// we have provided 'fileheader', so it hadoopConf can be null
build(options, optionsFinal, model, null);
// set default values
model.setTimestampformat(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
model.setDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
model.setUseOnePass(Boolean.parseBoolean(Maps.getOrDefault(options, "onepass", "false")));
model.setDictionaryServerHost(Maps.getOrDefault(options, "dicthost", null));
try {
model.setDictionaryServerPort(Integer.parseInt(Maps.getOrDefault(options, "dictport", "-1")));
} catch (NumberFormatException e) {
throw new InvalidLoadOptionException(e.getMessage());
}
return model;
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class MapredCarbonInputFormat method getProjection.
/**
* Return the Projection for the CarbonQuery.
*
* @param configuration
* @param carbonTable
* @param tableName
* @return
*/
private String getProjection(Configuration configuration, CarbonTable carbonTable, String tableName) {
// query plan includes projection column
String projection = getColumnProjection(configuration);
if (projection == null) {
projection = configuration.get("hive.io.file.readcolumn.names");
}
List<CarbonColumn> carbonColumns = carbonTable.getCreateOrderColumn(tableName);
List<String> carbonColumnNames = new ArrayList<>();
StringBuilder allColumns = new StringBuilder();
StringBuilder projectionColumns = new StringBuilder();
for (CarbonColumn column : carbonColumns) {
carbonColumnNames.add(column.getColName().toLowerCase());
allColumns.append(column.getColName() + ",");
}
if (!projection.equals("")) {
String[] columnNames = projection.split(",");
// verify that the columns parsed by Hive exist in the table
for (String col : columnNames) {
// show columns command will return these data
if (carbonColumnNames.contains(col.toLowerCase())) {
projectionColumns.append(col + ",");
}
}
return projectionColumns.substring(0, projectionColumns.lastIndexOf(","));
} else {
return allColumns.substring(0, allColumns.lastIndexOf(","));
}
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class CarbondataMetadata method getTableMetadata.
private ConnectorTableMetadata getTableMetadata(SchemaTableName schemaTableName) {
if (!listSchemaNamesInternal().contains(schemaTableName.getSchemaName())) {
throw new SchemaNotFoundException(schemaTableName.getSchemaName());
}
CarbonTable carbonTable = carbonTableReader.getTable(schemaTableName);
List<ColumnMetadata> columnsMetaList = new LinkedList<>();
List<CarbonColumn> carbonColumns = carbonTable.getCreateOrderColumn(schemaTableName.getTableName());
for (CarbonColumn col : carbonColumns) {
// show columns command will return these data
Type columnType = carbonDataType2SpiMapper(col.getColumnSchema());
ColumnMetadata columnMeta = new ColumnMetadata(col.getColumnSchema().getColumnName(), columnType);
columnsMetaList.add(columnMeta);
}
// carbondata connector's table metadata
return new ConnectorTableMetadata(schemaTableName, columnsMetaList);
}
Aggregations