Search in sources :

Example 6 with Blocklet

use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.

the class SecondaryIndex method prune.

@Override
public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, FilterExecutor filterExecutor, CarbonTable carbonTable) {
    Set<String> blockletPaths = getPositionReferences(carbonTable.getDatabaseName(), indexName, filterExp.getFilterExpression());
    List<Blocklet> blocklets = new ArrayList<>();
    if (!this.validSegmentIds.contains(currentSegmentId)) {
        // if current segment is not a valid SI segment then
        // add the list of blocklet pruned by default index.
        blocklets.addAll(defaultIndexPrunedBlocklet);
    } else {
        for (String blockletPath : blockletPaths) {
            blockletPath = blockletPath.substring(blockletPath.indexOf(CarbonCommonConstants.DASH) + 1).replace(CarbonCommonConstants.UNDERSCORE, CarbonTablePath.BATCH_PREFIX);
            int blockletIndex = blockletPath.lastIndexOf("/");
            blocklets.add(new Blocklet(blockletPath.substring(0, blockletIndex), blockletPath.substring(blockletIndex + 1)));
        }
    }
    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug(String.format("Secondary Index pruned blocklet count for segment %s is %d ", currentSegmentId, blocklets.size()));
    }
    return blocklets;
}
Also used : ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) Blocklet(org.apache.carbondata.core.indexstore.Blocklet) ArrayList(java.util.ArrayList)

Example 7 with Blocklet

use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.

the class TableDataMap method prune.

/**
 * Pass the valid segments and prune the datamap using filter expression
 *
 * @param segments
 * @param filterExp
 * @return
 */
public List<ExtendedBlocklet> prune(List<Segment> segments, FilterResolverIntf filterExp, List<PartitionSpec> partitions) throws IOException {
    List<ExtendedBlocklet> blocklets = new ArrayList<>();
    SegmentProperties segmentProperties;
    for (Segment segment : segments) {
        List<Blocklet> pruneBlocklets = new ArrayList<>();
        // if filter is not passed then return all the blocklets
        if (filterExp == null) {
            pruneBlocklets = blockletDetailsFetcher.getAllBlocklets(segment, partitions);
        } else {
            List<DataMap> dataMaps = dataMapFactory.getDataMaps(segment);
            segmentProperties = segmentPropertiesFetcher.getSegmentProperties(segment);
            for (DataMap dataMap : dataMaps) {
                pruneBlocklets.addAll(dataMap.prune(filterExp, segmentProperties, partitions));
            }
        }
        blocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(pruneBlocklets, segment), segment.getSegmentNo()));
    }
    return blocklets;
}
Also used : ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) Blocklet(org.apache.carbondata.core.indexstore.Blocklet) FineGrainBlocklet(org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet) ArrayList(java.util.ArrayList) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) DataMap(org.apache.carbondata.core.datamap.dev.DataMap)

Example 8 with Blocklet

use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.

the class TableDataMap method prune.

/**
 * This method is used from any machine after it is distributed. It takes the distributable object
 * to prune the filters.
 *
 * @param distributable
 * @param filterExp
 * @return
 */
public List<ExtendedBlocklet> prune(DataMapDistributable distributable, FilterResolverIntf filterExp, List<PartitionSpec> partitions) throws IOException {
    List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
    List<Blocklet> blocklets = new ArrayList<>();
    List<DataMap> dataMaps = dataMapFactory.getDataMaps(distributable);
    for (DataMap dataMap : dataMaps) {
        blocklets.addAll(dataMap.prune(filterExp, segmentPropertiesFetcher.getSegmentProperties(distributable.getSegment()), partitions));
    }
    BlockletSerializer serializer = new BlockletSerializer();
    String writePath = identifier.getTablePath() + CarbonCommonConstants.FILE_SEPARATOR + dataMapSchema.getDataMapName();
    if (dataMapFactory.getDataMapType() == DataMapLevel.FG) {
        FileFactory.mkdirs(writePath, FileFactory.getFileType(writePath));
    }
    for (Blocklet blocklet : blocklets) {
        ExtendedBlocklet detailedBlocklet = blockletDetailsFetcher.getExtendedBlocklet(blocklet, distributable.getSegment());
        if (dataMapFactory.getDataMapType() == DataMapLevel.FG) {
            String blockletwritePath = writePath + CarbonCommonConstants.FILE_SEPARATOR + System.nanoTime();
            detailedBlocklet.setDataMapWriterPath(blockletwritePath);
            serializer.serializeBlocklet((FineGrainBlocklet) blocklet, blockletwritePath);
        }
        detailedBlocklet.setSegmentId(distributable.getSegment().getSegmentNo());
        detailedBlocklets.add(detailedBlocklet);
    }
    return detailedBlocklets;
}
Also used : ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) Blocklet(org.apache.carbondata.core.indexstore.Blocklet) FineGrainBlocklet(org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet) ArrayList(java.util.ArrayList) BlockletSerializer(org.apache.carbondata.core.datamap.dev.BlockletSerializer) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) DataMap(org.apache.carbondata.core.datamap.dev.DataMap)

Example 9 with Blocklet

use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.

the class BlockletDataMapFactory method getExtendedBlocklets.

/**
 * Get the blocklet detail information based on blockletid, blockid and segmentid. This method is
 * exclusively for BlockletDataMapFactory as detail information is only available in this
 * default datamap.
 */
@Override
public List<ExtendedBlocklet> getExtendedBlocklets(List<Blocklet> blocklets, Segment segment) throws IOException {
    List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
    // If it is already detailed blocklet then type cast and return same
    if (blocklets.size() > 0 && blocklets.get(0) instanceof ExtendedBlocklet) {
        for (Blocklet blocklet : blocklets) {
            detailedBlocklets.add((ExtendedBlocklet) blocklet);
        }
        return detailedBlocklets;
    }
    List<TableBlockIndexUniqueIdentifier> identifiers = getTableBlockIndexUniqueIdentifiers(segment);
    // Retrieve each blocklets detail information from blocklet datamap
    for (Blocklet blocklet : blocklets) {
        detailedBlocklets.add(getExtendedBlocklet(identifiers, blocklet));
    }
    return detailedBlocklets;
}
Also used : ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) Blocklet(org.apache.carbondata.core.indexstore.Blocklet) ArrayList(java.util.ArrayList) TableBlockIndexUniqueIdentifier(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet)

Example 10 with Blocklet

use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.

the class LuceneCoarseGrainDataMap method prune.

/**
 * Prune the datamap with filter expression. It returns the list of
 * blocklets where these filters can exist.
 */
@Override
public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List<PartitionSpec> partitions) throws IOException {
    // convert filter expr into lucene list query
    List<String> fields = new ArrayList<String>();
    // only for test , query all data
    String strQuery = "*:*";
    String[] sFields = new String[fields.size()];
    fields.toArray(sFields);
    // get analyzer
    if (analyzer == null) {
        analyzer = new StandardAnalyzer();
    }
    // use MultiFieldQueryParser to parser query
    QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer);
    Query query;
    try {
        query = queryParser.parse(strQuery);
    } catch (ParseException e) {
        String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage());
        LOGGER.error(errorMessage);
        return null;
    }
    // execute index search
    TopDocs result;
    try {
        result = indexSearcher.search(query, MAX_RESULT_NUMBER);
    } catch (IOException e) {
        String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage());
        LOGGER.error(errorMessage);
        throw new IOException(errorMessage);
    }
    // temporary data, delete duplicated data
    // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
    Map<String, Set<Number>> mapBlocks = new HashMap<String, Set<Number>>();
    for (ScoreDoc scoreDoc : result.scoreDocs) {
        // get a document
        Document doc = indexSearcher.doc(scoreDoc.doc);
        // get all fields
        List<IndexableField> fieldsInDoc = doc.getFields();
        // get this block id Map<BlockId, Set<BlockletId>>>>
        String blockId = fieldsInDoc.get(BLOCKID_ID).stringValue();
        Set<Number> setBlocklets = mapBlocks.get(blockId);
        if (setBlocklets == null) {
            setBlocklets = new HashSet<Number>();
            mapBlocks.put(blockId, setBlocklets);
        }
        // get the blocklet id Set<BlockletId>
        Number blockletId = fieldsInDoc.get(BLOCKLETID_ID).numericValue();
        if (!setBlocklets.contains(blockletId.intValue())) {
            setBlocklets.add(blockletId.intValue());
        }
    }
    // result blocklets
    List<Blocklet> blocklets = new ArrayList<Blocklet>();
    // transform all blocks into result type blocklets Map<BlockId, Set<BlockletId>>
    for (Map.Entry<String, Set<Number>> mapBlock : mapBlocks.entrySet()) {
        String blockId = mapBlock.getKey();
        Set<Number> setBlocklets = mapBlock.getValue();
        // for blocklets in this block Set<BlockletId>
        for (Number blockletId : setBlocklets) {
            // add a CoarseGrainBlocklet
            blocklets.add(new Blocklet(blockId, blockletId.toString()));
        }
    }
    return blocklets;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) Query(org.apache.lucene.search.Query) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) IOException(java.io.IOException) IndexableField(org.apache.lucene.index.IndexableField) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Blocklet(org.apache.carbondata.core.indexstore.Blocklet) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) ParseException(org.apache.lucene.queryparser.classic.ParseException) HashMap(java.util.HashMap) Map(java.util.Map) CoarseGrainDataMap(org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap)

Aggregations

ArrayList (java.util.ArrayList)16 Blocklet (org.apache.carbondata.core.indexstore.Blocklet)16 ExtendedBlocklet (org.apache.carbondata.core.indexstore.ExtendedBlocklet)12 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)5 CoarseGrainIndex (org.apache.carbondata.core.index.dev.cgindex.CoarseGrainIndex)4 HashSet (java.util.HashSet)3 Index (org.apache.carbondata.core.index.dev.Index)3 FineGrainBlocklet (org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet)3 BlockIndex (org.apache.carbondata.core.indexstore.blockletindex.BlockIndex)3 FilterExecuter (org.apache.carbondata.core.scan.filter.executer.FilterExecuter)3 FilterExecutor (org.apache.carbondata.core.scan.filter.executer.FilterExecutor)3 BitSet (java.util.BitSet)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 DataMap (org.apache.carbondata.core.datamap.dev.DataMap)2 CoarseGrainDataMap (org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap)2 FineGrainBlocklet (org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet)2 TableBlockIndexUniqueIdentifier (org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier)2 Expression (org.apache.carbondata.core.scan.expression.Expression)2 IOException (java.io.IOException)1