Search in sources :

Example 1 with FineGrainBlocklet

use of org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet in project carbondata by apache.

the class BlockletSerializer method deserializeBlocklet.

/**
 * Read data from filepath and deserialize blocklet.
 * @param writePath
 * @return
 * @throws IOException
 */
public FineGrainBlocklet deserializeBlocklet(String writePath) throws IOException {
    DataInputStream inputStream = FileFactory.getDataInputStream(writePath, FileFactory.getFileType(writePath));
    FineGrainBlocklet blocklet = new FineGrainBlocklet();
    blocklet.readFields(inputStream);
    inputStream.close();
    return blocklet;
}
Also used : FineGrainBlocklet(org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet) DataInputStream(java.io.DataInputStream)

Example 2 with FineGrainBlocklet

use of org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet in project carbondata by apache.

the class LuceneFineGrainDataMap method prune.

/**
 * Prune the datamap with filter expression. It returns the list of
 * blocklets where these filters can exist.
 */
@Override
public List<FineGrainBlocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List<PartitionSpec> partitions) throws IOException {
    // convert filter expr into lucene list query
    List<String> fields = new ArrayList<String>();
    // only for test , query all data
    String strQuery = getQueryString(filterExp.getFilterExpression());
    String[] sFields = new String[fields.size()];
    fields.toArray(sFields);
    // get analyzer
    if (analyzer == null) {
        analyzer = new StandardAnalyzer();
    }
    // use MultiFieldQueryParser to parser query
    QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer);
    Query query;
    try {
        query = queryParser.parse(strQuery);
    } catch (ParseException e) {
        String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage());
        LOGGER.error(errorMessage);
        return null;
    }
    // execute index search
    TopDocs result;
    try {
        result = indexSearcher.search(query, MAX_RESULT_NUMBER);
    } catch (IOException e) {
        String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage());
        LOGGER.error(errorMessage);
        throw new IOException(errorMessage);
    }
    // temporary data, delete duplicated data
    // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
    Map<String, Map<String, Map<Integer, Set<Integer>>>> mapBlocks = new HashMap<>();
    for (ScoreDoc scoreDoc : result.scoreDocs) {
        // get a document
        Document doc = indexSearcher.doc(scoreDoc.doc);
        // get all fields
        List<IndexableField> fieldsInDoc = doc.getFields();
        // get this block id Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
        String blockId = fieldsInDoc.get(BLOCKID_ID).stringValue();
        Map<String, Map<Integer, Set<Integer>>> mapBlocklets = mapBlocks.get(blockId);
        if (mapBlocklets == null) {
            mapBlocklets = new HashMap<>();
            mapBlocks.put(blockId, mapBlocklets);
        }
        // get the blocklet id Map<BlockletId, Map<PageId, Set<RowId>>>
        String blockletId = fieldsInDoc.get(BLOCKLETID_ID).stringValue();
        Map<Integer, Set<Integer>> mapPageIds = mapBlocklets.get(blockletId);
        if (mapPageIds == null) {
            mapPageIds = new HashMap<>();
            mapBlocklets.put(blockletId, mapPageIds);
        }
        // get the page id Map<PageId, Set<RowId>>
        Number pageId = fieldsInDoc.get(PAGEID_ID).numericValue();
        Set<Integer> setRowId = mapPageIds.get(pageId.intValue());
        if (setRowId == null) {
            setRowId = new HashSet<>();
            mapPageIds.put(pageId.intValue(), setRowId);
        }
        // get the row id Set<RowId>
        Number rowId = fieldsInDoc.get(ROWID_ID).numericValue();
        setRowId.add(rowId.intValue());
    }
    // result blocklets
    List<FineGrainBlocklet> blocklets = new ArrayList<>();
    // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
    for (Map.Entry<String, Map<String, Map<Integer, Set<Integer>>>> mapBlock : mapBlocks.entrySet()) {
        String blockId = mapBlock.getKey();
        Map<String, Map<Integer, Set<Integer>>> mapBlocklets = mapBlock.getValue();
        // for blocklets in this block Map<BlockletId, Map<PageId, Set<RowId>>>
        for (Map.Entry<String, Map<Integer, Set<Integer>>> mapBlocklet : mapBlocklets.entrySet()) {
            String blockletId = mapBlocklet.getKey();
            Map<Integer, Set<Integer>> mapPageIds = mapBlocklet.getValue();
            List<FineGrainBlocklet.Page> pages = new ArrayList<FineGrainBlocklet.Page>();
            // for pages in this blocklet Map<PageId, Set<RowId>>>
            for (Map.Entry<Integer, Set<Integer>> mapPageId : mapPageIds.entrySet()) {
                // construct array rowid
                int[] rowIds = new int[mapPageId.getValue().size()];
                int i = 0;
                // for rowids in this page Set<RowId>
                for (Integer rowid : mapPageId.getValue()) {
                    rowIds[i++] = rowid;
                }
                // construct one page
                FineGrainBlocklet.Page page = new FineGrainBlocklet.Page();
                page.setPageId(mapPageId.getKey());
                page.setRowId(rowIds);
                // add this page into list pages
                pages.add(page);
            }
            // add a FineGrainBlocklet
            blocklets.add(new FineGrainBlocklet(blockId, blockletId, pages));
        }
    }
    return blocklets;
}
Also used : Document(org.apache.lucene.document.Document) FineGrainBlocklet(org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) IOException(java.io.IOException) IndexableField(org.apache.lucene.index.IndexableField) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) ParseException(org.apache.lucene.queryparser.classic.ParseException) FineGrainDataMap(org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainDataMap)

Example 3 with FineGrainBlocklet

use of org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet in project carbondata by apache.

the class TableDataMap method prune.

/**
 * This method is used from any machine after it is distributed. It takes the distributable object
 * to prune the filters.
 *
 * @param distributable
 * @param filterExp
 * @return
 */
public List<ExtendedBlocklet> prune(DataMapDistributable distributable, FilterResolverIntf filterExp, List<PartitionSpec> partitions) throws IOException {
    List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
    List<Blocklet> blocklets = new ArrayList<>();
    List<DataMap> dataMaps = dataMapFactory.getDataMaps(distributable);
    for (DataMap dataMap : dataMaps) {
        blocklets.addAll(dataMap.prune(filterExp, segmentPropertiesFetcher.getSegmentProperties(distributable.getSegment()), partitions));
    }
    BlockletSerializer serializer = new BlockletSerializer();
    String writePath = identifier.getTablePath() + CarbonCommonConstants.FILE_SEPARATOR + dataMapSchema.getDataMapName();
    if (dataMapFactory.getDataMapType() == DataMapLevel.FG) {
        FileFactory.mkdirs(writePath, FileFactory.getFileType(writePath));
    }
    for (Blocklet blocklet : blocklets) {
        ExtendedBlocklet detailedBlocklet = blockletDetailsFetcher.getExtendedBlocklet(blocklet, distributable.getSegment());
        if (dataMapFactory.getDataMapType() == DataMapLevel.FG) {
            String blockletwritePath = writePath + CarbonCommonConstants.FILE_SEPARATOR + System.nanoTime();
            detailedBlocklet.setDataMapWriterPath(blockletwritePath);
            serializer.serializeBlocklet((FineGrainBlocklet) blocklet, blockletwritePath);
        }
        detailedBlocklet.setSegmentId(distributable.getSegment().getSegmentNo());
        detailedBlocklets.add(detailedBlocklet);
    }
    return detailedBlocklets;
}
Also used : ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) Blocklet(org.apache.carbondata.core.indexstore.Blocklet) FineGrainBlocklet(org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet) ArrayList(java.util.ArrayList) BlockletSerializer(org.apache.carbondata.core.datamap.dev.BlockletSerializer) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) DataMap(org.apache.carbondata.core.datamap.dev.DataMap)

Aggregations

FineGrainBlocklet (org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet)3 DataInputStream (java.io.DataInputStream)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 BlockletSerializer (org.apache.carbondata.core.datamap.dev.BlockletSerializer)1 DataMap (org.apache.carbondata.core.datamap.dev.DataMap)1 FineGrainDataMap (org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainDataMap)1 Blocklet (org.apache.carbondata.core.indexstore.Blocklet)1 ExtendedBlocklet (org.apache.carbondata.core.indexstore.ExtendedBlocklet)1 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)1 Document (org.apache.lucene.document.Document)1 IndexableField (org.apache.lucene.index.IndexableField)1 MultiFieldQueryParser (org.apache.lucene.queryparser.classic.MultiFieldQueryParser)1 ParseException (org.apache.lucene.queryparser.classic.ParseException)1 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)1