Search in sources :

Example 1 with FineGrainBlocklet

use of org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet in project carbondata by apache.

the class BlockletSerializer method deserializeBlocklet.

/**
 * Read data from filepath and deserialize blocklet.
 * @param writePath
 * @return
 * @throws IOException
 */
public FineGrainBlocklet deserializeBlocklet(String writePath) throws IOException {
    DataInputStream inputStream = FileFactory.getDataInputStream(writePath);
    FineGrainBlocklet blocklet = new FineGrainBlocklet();
    try {
        blocklet.readFields(inputStream);
    } finally {
        inputStream.close();
    }
    return blocklet;
}
Also used : FineGrainBlocklet(org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet) DataInputStream(java.io.DataInputStream)

Example 2 with FineGrainBlocklet

use of org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet in project carbondata by apache.

the class TableIndex method prune.

/**
 * This method is used from any machine after it is distributed. It takes the distributable object
 * to prune the filters.
 *
 * @param distributable
 * @param filterExp
 * @return
 */
public List<ExtendedBlocklet> prune(List<Index> indices, IndexInputSplit distributable, FilterResolverIntf filterExp, List<PartitionSpec> partitions) throws IOException {
    List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
    List<Blocklet> blocklets = new ArrayList<>();
    Set<Path> partitionsToPrune = getPartitionLocations(partitions);
    SegmentProperties segmentProperties = segmentPropertiesFetcher.getSegmentProperties(distributable.getSegment(), partitionsToPrune);
    FilterExecutor filterExecutor = FilterUtil.getFilterExecutorTree(filterExp, segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
    for (Index index : indices) {
        blocklets.addAll(index.prune(filterExp, segmentProperties, filterExecutor, table));
    }
    BlockletSerializer serializer = new BlockletSerializer();
    String writePath = identifier.getTablePath() + CarbonCommonConstants.FILE_SEPARATOR + indexSchema.getIndexName();
    if (indexFactory.getIndexLevel() == IndexLevel.FG) {
        FileFactory.mkdirs(writePath);
    }
    for (Blocklet blocklet : blocklets) {
        ExtendedBlocklet detailedBlocklet = blockletDetailsFetcher.getExtendedBlocklet(blocklet, distributable.getSegment());
        if (indexFactory.getIndexLevel() == IndexLevel.FG) {
            String blockletWritePath = writePath + CarbonCommonConstants.FILE_SEPARATOR + System.nanoTime();
            detailedBlocklet.setIndexWriterPath(blockletWritePath);
            serializer.serializeBlocklet((FineGrainBlocklet) blocklet, blockletWritePath);
        }
        detailedBlocklet.setSegment(distributable.getSegment());
        detailedBlocklets.add(detailedBlocklet);
    }
    return detailedBlocklets;
}
Also used : Path(org.apache.hadoop.fs.Path) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) Blocklet(org.apache.carbondata.core.indexstore.Blocklet) FineGrainBlocklet(org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet) FilterExecutor(org.apache.carbondata.core.scan.filter.executer.FilterExecutor) ArrayList(java.util.ArrayList) Index(org.apache.carbondata.core.index.dev.Index) BlockIndex(org.apache.carbondata.core.indexstore.blockletindex.BlockIndex) CoarseGrainIndex(org.apache.carbondata.core.index.dev.cgindex.CoarseGrainIndex) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) BlockletSerializer(org.apache.carbondata.core.index.dev.BlockletSerializer) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet)

Example 3 with FineGrainBlocklet

use of org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet in project carbondata by apache.

the class LuceneFineGrainIndex method prune.

/**
 * Prune the index with filter expression. It returns the list of
 * blocklets where these filters can exist.
 */
@Override
public List<FineGrainBlocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, FilterExecutor filterExecutor, CarbonTable carbonTable) throws IOException {
    // convert filter expr into lucene list query
    List<String> fields = new ArrayList<String>();
    // only for test , query all data
    String strQuery = getQueryString(filterExp.getFilterExpression());
    int maxDocs;
    try {
        maxDocs = getMaxDoc(filterExp.getFilterExpression());
    } catch (NumberFormatException e) {
        maxDocs = Integer.MAX_VALUE;
    }
    if (null == strQuery) {
        return null;
    }
    String[] sFields = new String[fields.size()];
    fields.toArray(sFields);
    // get analyzer
    if (analyzer == null) {
        analyzer = new StandardAnalyzer();
    }
    // use MultiFieldQueryParser to parser query
    QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer);
    queryParser.setAllowLeadingWildcard(true);
    Query query;
    try {
        query = queryParser.parse(strQuery);
    } catch (ParseException e) {
        String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage());
        LOGGER.error(errorMessage, e);
        return null;
    }
    // temporary data, delete duplicated data
    // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
    Map<String, Map<Integer, List<Short>>> mapBlocks = new HashMap<>();
    long luceneSearchStartTime = System.currentTimeMillis();
    for (Map.Entry<String, IndexSearcher> searcherEntry : indexSearcherMap.entrySet()) {
        IndexSearcher indexSearcher = searcherEntry.getValue();
        // take the min of total documents available in the reader and limit if set by the user
        maxDocs = Math.min(maxDocs, indexSearcher.getIndexReader().maxDoc());
        // execute index search
        TopDocs result = null;
        // the number of documents to be queried in one search. It will always be minimum of
        // search result and maxDocs
        int numberOfDocumentsToBeQueried = 0;
        // counter for maintaining the total number of documents finished querying
        int documentHitCounter = 0;
        try {
            numberOfDocumentsToBeQueried = Math.min(maxDocs, SEARCH_LIMIT);
            result = indexSearcher.search(query, numberOfDocumentsToBeQueried);
            documentHitCounter += numberOfDocumentsToBeQueried;
        } catch (IOException e) {
            String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage());
            LOGGER.error(errorMessage, e);
            throw new IOException(errorMessage, e);
        }
        ByteBuffer intBuffer = ByteBuffer.allocate(4);
        // last scoreDoc in a result to be used in searchAfter API
        ScoreDoc lastScoreDoc = null;
        while (true) {
            for (ScoreDoc scoreDoc : result.scoreDocs) {
                // get a document
                Document doc = indexSearcher.doc(scoreDoc.doc);
                // get all fields
                List<IndexableField> fieldsInDoc = doc.getFields();
                if (writeCacheSize > 0) {
                    // It fills rowids to the map, its value is combined with multiple rows.
                    fillMapForCombineRows(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey());
                } else {
                    // Fill rowids to the map
                    fillMap(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey());
                }
                lastScoreDoc = scoreDoc;
            }
            // result will have the total number of hits therefore we always need to query on the
            // left over documents
            int remainingHits = result.totalHits - documentHitCounter;
            // break the loop if count reaches maxDocs to be searched or remaining hits become <=0
            if (remainingHits <= 0 || documentHitCounter >= maxDocs) {
                break;
            }
            numberOfDocumentsToBeQueried = Math.min(remainingHits, SEARCH_LIMIT);
            result = indexSearcher.searchAfter(lastScoreDoc, query, numberOfDocumentsToBeQueried);
            documentHitCounter += numberOfDocumentsToBeQueried;
        }
    }
    LOGGER.info("Time taken for lucene search: " + (System.currentTimeMillis() - luceneSearchStartTime) + " ms");
    // result blocklets
    List<FineGrainBlocklet> blocklets = new ArrayList<>();
    // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
    for (Map.Entry<String, Map<Integer, List<Short>>> mapBlocklet : mapBlocks.entrySet()) {
        String blockletId = mapBlocklet.getKey();
        Map<Integer, List<Short>> mapPageIds = mapBlocklet.getValue();
        List<FineGrainBlocklet.Page> pages = new ArrayList<FineGrainBlocklet.Page>();
        // for pages in this blocklet Map<PageId, Set<RowId>>>
        for (Map.Entry<Integer, List<Short>> mapPageId : mapPageIds.entrySet()) {
            // construct array rowid
            int[] rowIds = new int[mapPageId.getValue().size()];
            int i = 0;
            // for rowids in this page Set<RowId>
            for (Short rowid : mapPageId.getValue()) {
                rowIds[i++] = rowid;
            }
            // construct one page
            FineGrainBlocklet.Page page = new FineGrainBlocklet.Page();
            page.setPageId(mapPageId.getKey());
            page.setRowId(rowIds);
            // add this page into list pages
            pages.add(page);
        }
        // add a FineGrainBlocklet
        blocklets.add(new FineGrainBlocklet(filePath, blockletId, pages));
    }
    return blocklets;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) ArrayList(java.util.ArrayList) List(java.util.List) FineGrainBlocklet(org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) IndexableField(org.apache.lucene.index.IndexableField) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) ParseException(org.apache.lucene.queryparser.classic.ParseException) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

FineGrainBlocklet (org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet)3 ArrayList (java.util.ArrayList)2 DataInputStream (java.io.DataInputStream)1 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 BlockletSerializer (org.apache.carbondata.core.index.dev.BlockletSerializer)1 Index (org.apache.carbondata.core.index.dev.Index)1 CoarseGrainIndex (org.apache.carbondata.core.index.dev.cgindex.CoarseGrainIndex)1 Blocklet (org.apache.carbondata.core.indexstore.Blocklet)1 ExtendedBlocklet (org.apache.carbondata.core.indexstore.ExtendedBlocklet)1 BlockIndex (org.apache.carbondata.core.indexstore.blockletindex.BlockIndex)1 FilterExecutor (org.apache.carbondata.core.scan.filter.executer.FilterExecutor)1 Path (org.apache.hadoop.fs.Path)1 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)1 Document (org.apache.lucene.document.Document)1 IndexableField (org.apache.lucene.index.IndexableField)1