use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class IncrementalColumnDictionaryGenerator method writeDictionaryData.
@Override
public void writeDictionaryData(String tableUniqueName) throws IOException {
// initialize params
CarbonMetadata metadata = CarbonMetadata.getInstance();
CarbonTable carbonTable = metadata.getCarbonTable(tableUniqueName);
CarbonTableIdentifier tableIdentifier = carbonTable.getCarbonTableIdentifier();
ColumnIdentifier columnIdentifier = dimension.getColumnIdentifier();
String storePath = carbonTable.getStorePath();
DictionaryService dictionaryService = CarbonCommonFactory.getDictionaryService();
// create dictionary cache from dictionary File
DictionaryColumnUniqueIdentifier identifier = new DictionaryColumnUniqueIdentifier(tableIdentifier, columnIdentifier, columnIdentifier.getDataType());
Boolean isDictExists = CarbonUtil.isFileExistsForGivenColumn(storePath, identifier);
Dictionary dictionary = null;
long t1 = System.currentTimeMillis();
if (isDictExists) {
Cache<DictionaryColumnUniqueIdentifier, Dictionary> dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, storePath);
dictionary = dictCache.get(identifier);
}
long dictCacheTime = System.currentTimeMillis() - t1;
long t2 = System.currentTimeMillis();
// write dictionary
CarbonDictionaryWriter dictionaryWriter = null;
dictionaryWriter = dictionaryService.getDictionaryWriter(tableIdentifier, columnIdentifier, storePath);
List<String> distinctValues = writeDictionary(dictionaryWriter, isDictExists);
long dictWriteTime = System.currentTimeMillis() - t2;
long t3 = System.currentTimeMillis();
// write sort index
if (distinctValues.size() > 0) {
writeSortIndex(distinctValues, dictionary, dictionaryService, tableIdentifier, columnIdentifier, storePath);
}
long sortIndexWriteTime = System.currentTimeMillis() - t3;
// update Meta Data
updateMetaData(dictionaryWriter);
LOGGER.audit("\n columnName: " + dimension.getColName() + "\n columnId: " + dimension.getColumnId() + "\n new distinct values count: " + distinctValues.size() + "\n create dictionary cache: " + dictCacheTime + "\n sort list, distinct and write: " + dictWriteTime + "\n write sort info: " + sortIndexWriteTime);
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class ServerDictionaryGenerator method initializeGeneratorForTable.
public void initializeGeneratorForTable(DictionaryMessage key) {
CarbonMetadata metadata = CarbonMetadata.getInstance();
CarbonTable carbonTable = metadata.getCarbonTable(key.getTableUniqueName());
CarbonDimension dimension = carbonTable.getPrimitiveDimensionByName(key.getTableUniqueName(), key.getColumnName());
// initialize TableDictionaryGenerator first
if (tableMap.get(key.getTableUniqueName()) == null) {
tableMap.put(key.getTableUniqueName(), new TableDictionaryGenerator(dimension));
} else {
tableMap.get(key.getTableUniqueName()).updateGenerator(dimension);
}
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class QueryUtil method resolveQueryModel.
/**
* Below method will be used to resolve the query model
* resolve will be setting the actual dimension and measure object
* as from driver only column name will be passes to avoid the heavy object
* serialization
*
* @param queryModel query model
*/
public static void resolveQueryModel(QueryModel queryModel) {
CarbonMetadata.getInstance().addCarbonTable(queryModel.getTable());
// TODO need to load the table from table identifier
CarbonTable carbonTable = queryModel.getTable();
String tableName = queryModel.getAbsoluteTableIdentifier().getCarbonTableIdentifier().getTableName();
// resolve query dimension
for (QueryDimension queryDimension : queryModel.getQueryDimension()) {
queryDimension.setDimension(carbonTable.getDimensionByName(tableName, queryDimension.getColumnName()));
}
// resolve query measure
for (QueryMeasure queryMeasure : queryModel.getQueryMeasures()) {
//never come false but if in future we can remove so not removing first if check
if (queryMeasure.getColumnName().equals("count(*)")) {
if (carbonTable.getMeasureByTableName(tableName).size() > 0 && !carbonTable.getMeasureByTableName(tableName).get(0).getColName().equals(CarbonCommonConstants.DEFAULT_INVISIBLE_DUMMY_MEASURE)) {
queryMeasure.setMeasure(carbonTable.getMeasureByTableName(tableName).get(0));
} else {
CarbonMeasure dummyMeasure = new CarbonMeasure(carbonTable.getDimensionByTableName(tableName).get(0).getColumnSchema(), 0);
queryMeasure.setMeasure(dummyMeasure);
}
} else {
queryMeasure.setMeasure(carbonTable.getMeasureByName(tableName, queryMeasure.getColumnName()));
}
}
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class QueryUtil method getDictionaryColumnUniqueIdentifierList.
/**
* Below method will be used to get the dictionary column unique identifier
*
* @param dictionaryColumnIdList dictionary
* @param carbonTableIdentifier
* @return
*/
private static List<DictionaryColumnUniqueIdentifier> getDictionaryColumnUniqueIdentifierList(List<String> dictionaryColumnIdList, CarbonTableIdentifier carbonTableIdentifier) {
CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(carbonTableIdentifier.getTableUniqueName());
List<DictionaryColumnUniqueIdentifier> dictionaryColumnUniqueIdentifiers = new ArrayList<>(dictionaryColumnIdList.size());
for (String columnId : dictionaryColumnIdList) {
CarbonDimension dimension = CarbonMetadata.getInstance().getCarbonDimensionBasedOnColIdentifier(carbonTable, columnId);
if (dimension != null) {
dictionaryColumnUniqueIdentifiers.add(new DictionaryColumnUniqueIdentifier(carbonTableIdentifier, dimension.getColumnIdentifier(), dimension.getDataType()));
}
}
return dictionaryColumnUniqueIdentifiers;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonInputFormat method getSplits.
/**
* {@inheritDoc}
* Configurations FileInputFormat.INPUT_DIR
* are used to get table path to read.
*
* @param job
* @return List<InputSplit> list of CarbonInputSplit
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
CacheClient cacheClient = new CacheClient(identifier.getStorePath());
try {
List<String> invalidSegments = new ArrayList<>();
List<UpdateVO> invalidTimestampsList = new ArrayList<>();
// get all valid segments and set them into the configuration
if (getSegmentsToAccess(job).length == 0) {
SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
if (segments.getValidSegments().size() == 0) {
return new ArrayList<>(0);
}
// remove entry in the segment index if there are invalid segments
invalidSegments.addAll(segments.getInvalidSegments());
for (String invalidSegmentId : invalidSegments) {
invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
}
if (invalidSegments.size() > 0) {
List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
for (String segId : invalidSegments) {
invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
}
cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
}
}
// process and resolve the expression
Expression filter = getFilterPredicates(job.getConfiguration());
CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
// this will be null in case of corrupt schema file.
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
// prune partitions for filter query on partition table
BitSet matchedPartitions = null;
if (null != filter) {
PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
if (null != partitionInfo) {
Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
if (matchedPartitions.cardinality() == 0) {
// no partition is required
return new ArrayList<InputSplit>();
}
if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
// all partitions are required, no need to prune partitions
matchedPartitions = null;
}
}
}
FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
// do block filtering and get split
List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
// pass the invalid segment to task side in order to remove index entry in task side
if (invalidSegments.size() > 0) {
for (InputSplit split : splits) {
((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
}
}
return splits;
} finally {
// close the cache cache client to clear LRU cache memory
cacheClient.close();
}
}
Aggregations