use of org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet in project carbondata by apache.
the class BlockletSerializer method deserializeBlocklet.
/**
* Read data from filepath and deserialize blocklet.
* @param writePath
* @return
* @throws IOException
*/
public FineGrainBlocklet deserializeBlocklet(String writePath) throws IOException {
DataInputStream inputStream = FileFactory.getDataInputStream(writePath, FileFactory.getFileType(writePath));
FineGrainBlocklet blocklet = new FineGrainBlocklet();
blocklet.readFields(inputStream);
inputStream.close();
return blocklet;
}
use of org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet in project carbondata by apache.
the class LuceneFineGrainDataMap method prune.
/**
* Prune the datamap with filter expression. It returns the list of
* blocklets where these filters can exist.
*/
@Override
public List<FineGrainBlocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List<PartitionSpec> partitions) throws IOException {
// convert filter expr into lucene list query
List<String> fields = new ArrayList<String>();
// only for test , query all data
String strQuery = getQueryString(filterExp.getFilterExpression());
String[] sFields = new String[fields.size()];
fields.toArray(sFields);
// get analyzer
if (analyzer == null) {
analyzer = new StandardAnalyzer();
}
// use MultiFieldQueryParser to parser query
QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer);
Query query;
try {
query = queryParser.parse(strQuery);
} catch (ParseException e) {
String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage());
LOGGER.error(errorMessage);
return null;
}
// execute index search
TopDocs result;
try {
result = indexSearcher.search(query, MAX_RESULT_NUMBER);
} catch (IOException e) {
String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage());
LOGGER.error(errorMessage);
throw new IOException(errorMessage);
}
// temporary data, delete duplicated data
// Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
Map<String, Map<String, Map<Integer, Set<Integer>>>> mapBlocks = new HashMap<>();
for (ScoreDoc scoreDoc : result.scoreDocs) {
// get a document
Document doc = indexSearcher.doc(scoreDoc.doc);
// get all fields
List<IndexableField> fieldsInDoc = doc.getFields();
// get this block id Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
String blockId = fieldsInDoc.get(BLOCKID_ID).stringValue();
Map<String, Map<Integer, Set<Integer>>> mapBlocklets = mapBlocks.get(blockId);
if (mapBlocklets == null) {
mapBlocklets = new HashMap<>();
mapBlocks.put(blockId, mapBlocklets);
}
// get the blocklet id Map<BlockletId, Map<PageId, Set<RowId>>>
String blockletId = fieldsInDoc.get(BLOCKLETID_ID).stringValue();
Map<Integer, Set<Integer>> mapPageIds = mapBlocklets.get(blockletId);
if (mapPageIds == null) {
mapPageIds = new HashMap<>();
mapBlocklets.put(blockletId, mapPageIds);
}
// get the page id Map<PageId, Set<RowId>>
Number pageId = fieldsInDoc.get(PAGEID_ID).numericValue();
Set<Integer> setRowId = mapPageIds.get(pageId.intValue());
if (setRowId == null) {
setRowId = new HashSet<>();
mapPageIds.put(pageId.intValue(), setRowId);
}
// get the row id Set<RowId>
Number rowId = fieldsInDoc.get(ROWID_ID).numericValue();
setRowId.add(rowId.intValue());
}
// result blocklets
List<FineGrainBlocklet> blocklets = new ArrayList<>();
// Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
for (Map.Entry<String, Map<String, Map<Integer, Set<Integer>>>> mapBlock : mapBlocks.entrySet()) {
String blockId = mapBlock.getKey();
Map<String, Map<Integer, Set<Integer>>> mapBlocklets = mapBlock.getValue();
// for blocklets in this block Map<BlockletId, Map<PageId, Set<RowId>>>
for (Map.Entry<String, Map<Integer, Set<Integer>>> mapBlocklet : mapBlocklets.entrySet()) {
String blockletId = mapBlocklet.getKey();
Map<Integer, Set<Integer>> mapPageIds = mapBlocklet.getValue();
List<FineGrainBlocklet.Page> pages = new ArrayList<FineGrainBlocklet.Page>();
// for pages in this blocklet Map<PageId, Set<RowId>>>
for (Map.Entry<Integer, Set<Integer>> mapPageId : mapPageIds.entrySet()) {
// construct array rowid
int[] rowIds = new int[mapPageId.getValue().size()];
int i = 0;
// for rowids in this page Set<RowId>
for (Integer rowid : mapPageId.getValue()) {
rowIds[i++] = rowid;
}
// construct one page
FineGrainBlocklet.Page page = new FineGrainBlocklet.Page();
page.setPageId(mapPageId.getKey());
page.setRowId(rowIds);
// add this page into list pages
pages.add(page);
}
// add a FineGrainBlocklet
blocklets.add(new FineGrainBlocklet(blockId, blockletId, pages));
}
}
return blocklets;
}
use of org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet in project carbondata by apache.
the class TableDataMap method prune.
/**
* This method is used from any machine after it is distributed. It takes the distributable object
* to prune the filters.
*
* @param distributable
* @param filterExp
* @return
*/
public List<ExtendedBlocklet> prune(DataMapDistributable distributable, FilterResolverIntf filterExp, List<PartitionSpec> partitions) throws IOException {
List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
List<Blocklet> blocklets = new ArrayList<>();
List<DataMap> dataMaps = dataMapFactory.getDataMaps(distributable);
for (DataMap dataMap : dataMaps) {
blocklets.addAll(dataMap.prune(filterExp, segmentPropertiesFetcher.getSegmentProperties(distributable.getSegment()), partitions));
}
BlockletSerializer serializer = new BlockletSerializer();
String writePath = identifier.getTablePath() + CarbonCommonConstants.FILE_SEPARATOR + dataMapSchema.getDataMapName();
if (dataMapFactory.getDataMapType() == DataMapLevel.FG) {
FileFactory.mkdirs(writePath, FileFactory.getFileType(writePath));
}
for (Blocklet blocklet : blocklets) {
ExtendedBlocklet detailedBlocklet = blockletDetailsFetcher.getExtendedBlocklet(blocklet, distributable.getSegment());
if (dataMapFactory.getDataMapType() == DataMapLevel.FG) {
String blockletwritePath = writePath + CarbonCommonConstants.FILE_SEPARATOR + System.nanoTime();
detailedBlocklet.setDataMapWriterPath(blockletwritePath);
serializer.serializeBlocklet((FineGrainBlocklet) blocklet, blockletwritePath);
}
detailedBlocklet.setSegmentId(distributable.getSegment().getSegmentNo());
detailedBlocklets.add(detailedBlocklet);
}
return detailedBlocklets;
}
Aggregations