use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.
the class SecondaryIndex method prune.
@Override
public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, FilterExecutor filterExecutor, CarbonTable carbonTable) {
Set<String> blockletPaths = getPositionReferences(carbonTable.getDatabaseName(), indexName, filterExp.getFilterExpression());
List<Blocklet> blocklets = new ArrayList<>();
if (!this.validSegmentIds.contains(currentSegmentId)) {
// if current segment is not a valid SI segment then
// add the list of blocklet pruned by default index.
blocklets.addAll(defaultIndexPrunedBlocklet);
} else {
for (String blockletPath : blockletPaths) {
blockletPath = blockletPath.substring(blockletPath.indexOf(CarbonCommonConstants.DASH) + 1).replace(CarbonCommonConstants.UNDERSCORE, CarbonTablePath.BATCH_PREFIX);
int blockletIndex = blockletPath.lastIndexOf("/");
blocklets.add(new Blocklet(blockletPath.substring(0, blockletIndex), blockletPath.substring(blockletIndex + 1)));
}
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(String.format("Secondary Index pruned blocklet count for segment %s is %d ", currentSegmentId, blocklets.size()));
}
return blocklets;
}
use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.
the class TableDataMap method prune.
/**
* Pass the valid segments and prune the datamap using filter expression
*
* @param segments
* @param filterExp
* @return
*/
public List<ExtendedBlocklet> prune(List<Segment> segments, FilterResolverIntf filterExp, List<PartitionSpec> partitions) throws IOException {
List<ExtendedBlocklet> blocklets = new ArrayList<>();
SegmentProperties segmentProperties;
for (Segment segment : segments) {
List<Blocklet> pruneBlocklets = new ArrayList<>();
// if filter is not passed then return all the blocklets
if (filterExp == null) {
pruneBlocklets = blockletDetailsFetcher.getAllBlocklets(segment, partitions);
} else {
List<DataMap> dataMaps = dataMapFactory.getDataMaps(segment);
segmentProperties = segmentPropertiesFetcher.getSegmentProperties(segment);
for (DataMap dataMap : dataMaps) {
pruneBlocklets.addAll(dataMap.prune(filterExp, segmentProperties, partitions));
}
}
blocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(pruneBlocklets, segment), segment.getSegmentNo()));
}
return blocklets;
}
use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.
the class TableDataMap method prune.
/**
* This method is used from any machine after it is distributed. It takes the distributable object
* to prune the filters.
*
* @param distributable
* @param filterExp
* @return
*/
public List<ExtendedBlocklet> prune(DataMapDistributable distributable, FilterResolverIntf filterExp, List<PartitionSpec> partitions) throws IOException {
List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
List<Blocklet> blocklets = new ArrayList<>();
List<DataMap> dataMaps = dataMapFactory.getDataMaps(distributable);
for (DataMap dataMap : dataMaps) {
blocklets.addAll(dataMap.prune(filterExp, segmentPropertiesFetcher.getSegmentProperties(distributable.getSegment()), partitions));
}
BlockletSerializer serializer = new BlockletSerializer();
String writePath = identifier.getTablePath() + CarbonCommonConstants.FILE_SEPARATOR + dataMapSchema.getDataMapName();
if (dataMapFactory.getDataMapType() == DataMapLevel.FG) {
FileFactory.mkdirs(writePath, FileFactory.getFileType(writePath));
}
for (Blocklet blocklet : blocklets) {
ExtendedBlocklet detailedBlocklet = blockletDetailsFetcher.getExtendedBlocklet(blocklet, distributable.getSegment());
if (dataMapFactory.getDataMapType() == DataMapLevel.FG) {
String blockletwritePath = writePath + CarbonCommonConstants.FILE_SEPARATOR + System.nanoTime();
detailedBlocklet.setDataMapWriterPath(blockletwritePath);
serializer.serializeBlocklet((FineGrainBlocklet) blocklet, blockletwritePath);
}
detailedBlocklet.setSegmentId(distributable.getSegment().getSegmentNo());
detailedBlocklets.add(detailedBlocklet);
}
return detailedBlocklets;
}
use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.
the class BlockletDataMapFactory method getExtendedBlocklets.
/**
* Get the blocklet detail information based on blockletid, blockid and segmentid. This method is
* exclusively for BlockletDataMapFactory as detail information is only available in this
* default datamap.
*/
@Override
public List<ExtendedBlocklet> getExtendedBlocklets(List<Blocklet> blocklets, Segment segment) throws IOException {
List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
// If it is already detailed blocklet then type cast and return same
if (blocklets.size() > 0 && blocklets.get(0) instanceof ExtendedBlocklet) {
for (Blocklet blocklet : blocklets) {
detailedBlocklets.add((ExtendedBlocklet) blocklet);
}
return detailedBlocklets;
}
List<TableBlockIndexUniqueIdentifier> identifiers = getTableBlockIndexUniqueIdentifiers(segment);
// Retrieve each blocklets detail information from blocklet datamap
for (Blocklet blocklet : blocklets) {
detailedBlocklets.add(getExtendedBlocklet(identifiers, blocklet));
}
return detailedBlocklets;
}
use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.
the class LuceneCoarseGrainDataMap method prune.
/**
* Prune the datamap with filter expression. It returns the list of
* blocklets where these filters can exist.
*/
@Override
public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List<PartitionSpec> partitions) throws IOException {
// convert filter expr into lucene list query
List<String> fields = new ArrayList<String>();
// only for test , query all data
String strQuery = "*:*";
String[] sFields = new String[fields.size()];
fields.toArray(sFields);
// get analyzer
if (analyzer == null) {
analyzer = new StandardAnalyzer();
}
// use MultiFieldQueryParser to parser query
QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer);
Query query;
try {
query = queryParser.parse(strQuery);
} catch (ParseException e) {
String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage());
LOGGER.error(errorMessage);
return null;
}
// execute index search
TopDocs result;
try {
result = indexSearcher.search(query, MAX_RESULT_NUMBER);
} catch (IOException e) {
String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage());
LOGGER.error(errorMessage);
throw new IOException(errorMessage);
}
// temporary data, delete duplicated data
// Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
Map<String, Set<Number>> mapBlocks = new HashMap<String, Set<Number>>();
for (ScoreDoc scoreDoc : result.scoreDocs) {
// get a document
Document doc = indexSearcher.doc(scoreDoc.doc);
// get all fields
List<IndexableField> fieldsInDoc = doc.getFields();
// get this block id Map<BlockId, Set<BlockletId>>>>
String blockId = fieldsInDoc.get(BLOCKID_ID).stringValue();
Set<Number> setBlocklets = mapBlocks.get(blockId);
if (setBlocklets == null) {
setBlocklets = new HashSet<Number>();
mapBlocks.put(blockId, setBlocklets);
}
// get the blocklet id Set<BlockletId>
Number blockletId = fieldsInDoc.get(BLOCKLETID_ID).numericValue();
if (!setBlocklets.contains(blockletId.intValue())) {
setBlocklets.add(blockletId.intValue());
}
}
// result blocklets
List<Blocklet> blocklets = new ArrayList<Blocklet>();
// transform all blocks into result type blocklets Map<BlockId, Set<BlockletId>>
for (Map.Entry<String, Set<Number>> mapBlock : mapBlocks.entrySet()) {
String blockId = mapBlock.getKey();
Set<Number> setBlocklets = mapBlock.getValue();
// for blocklets in this block Set<BlockletId>
for (Number blockletId : setBlocklets) {
// add a CoarseGrainBlocklet
blocklets.add(new Blocklet(blockId, blockletId.toString()));
}
}
return blocklets;
}
Aggregations