use of org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet in project carbondata by apache.
the class BlockletSerializer method deserializeBlocklet.
/**
* Read data from filepath and deserialize blocklet.
* @param writePath
* @return
* @throws IOException
*/
public FineGrainBlocklet deserializeBlocklet(String writePath) throws IOException {
DataInputStream inputStream = FileFactory.getDataInputStream(writePath);
FineGrainBlocklet blocklet = new FineGrainBlocklet();
try {
blocklet.readFields(inputStream);
} finally {
inputStream.close();
}
return blocklet;
}
use of org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet in project carbondata by apache.
the class TableIndex method prune.
/**
* This method is used from any machine after it is distributed. It takes the distributable object
* to prune the filters.
*
* @param distributable
* @param filterExp
* @return
*/
public List<ExtendedBlocklet> prune(List<Index> indices, IndexInputSplit distributable, FilterResolverIntf filterExp, List<PartitionSpec> partitions) throws IOException {
List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
List<Blocklet> blocklets = new ArrayList<>();
Set<Path> partitionsToPrune = getPartitionLocations(partitions);
SegmentProperties segmentProperties = segmentPropertiesFetcher.getSegmentProperties(distributable.getSegment(), partitionsToPrune);
FilterExecutor filterExecutor = FilterUtil.getFilterExecutorTree(filterExp, segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
for (Index index : indices) {
blocklets.addAll(index.prune(filterExp, segmentProperties, filterExecutor, table));
}
BlockletSerializer serializer = new BlockletSerializer();
String writePath = identifier.getTablePath() + CarbonCommonConstants.FILE_SEPARATOR + indexSchema.getIndexName();
if (indexFactory.getIndexLevel() == IndexLevel.FG) {
FileFactory.mkdirs(writePath);
}
for (Blocklet blocklet : blocklets) {
ExtendedBlocklet detailedBlocklet = blockletDetailsFetcher.getExtendedBlocklet(blocklet, distributable.getSegment());
if (indexFactory.getIndexLevel() == IndexLevel.FG) {
String blockletWritePath = writePath + CarbonCommonConstants.FILE_SEPARATOR + System.nanoTime();
detailedBlocklet.setIndexWriterPath(blockletWritePath);
serializer.serializeBlocklet((FineGrainBlocklet) blocklet, blockletWritePath);
}
detailedBlocklet.setSegment(distributable.getSegment());
detailedBlocklets.add(detailedBlocklet);
}
return detailedBlocklets;
}
use of org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet in project carbondata by apache.
the class LuceneFineGrainIndex method prune.
/**
* Prune the index with filter expression. It returns the list of
* blocklets where these filters can exist.
*/
@Override
public List<FineGrainBlocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, FilterExecutor filterExecutor, CarbonTable carbonTable) throws IOException {
// convert filter expr into lucene list query
List<String> fields = new ArrayList<String>();
// only for test , query all data
String strQuery = getQueryString(filterExp.getFilterExpression());
int maxDocs;
try {
maxDocs = getMaxDoc(filterExp.getFilterExpression());
} catch (NumberFormatException e) {
maxDocs = Integer.MAX_VALUE;
}
if (null == strQuery) {
return null;
}
String[] sFields = new String[fields.size()];
fields.toArray(sFields);
// get analyzer
if (analyzer == null) {
analyzer = new StandardAnalyzer();
}
// use MultiFieldQueryParser to parser query
QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer);
queryParser.setAllowLeadingWildcard(true);
Query query;
try {
query = queryParser.parse(strQuery);
} catch (ParseException e) {
String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage());
LOGGER.error(errorMessage, e);
return null;
}
// temporary data, delete duplicated data
// Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
Map<String, Map<Integer, List<Short>>> mapBlocks = new HashMap<>();
long luceneSearchStartTime = System.currentTimeMillis();
for (Map.Entry<String, IndexSearcher> searcherEntry : indexSearcherMap.entrySet()) {
IndexSearcher indexSearcher = searcherEntry.getValue();
// take the min of total documents available in the reader and limit if set by the user
maxDocs = Math.min(maxDocs, indexSearcher.getIndexReader().maxDoc());
// execute index search
TopDocs result = null;
// the number of documents to be queried in one search. It will always be minimum of
// search result and maxDocs
int numberOfDocumentsToBeQueried = 0;
// counter for maintaining the total number of documents finished querying
int documentHitCounter = 0;
try {
numberOfDocumentsToBeQueried = Math.min(maxDocs, SEARCH_LIMIT);
result = indexSearcher.search(query, numberOfDocumentsToBeQueried);
documentHitCounter += numberOfDocumentsToBeQueried;
} catch (IOException e) {
String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage());
LOGGER.error(errorMessage, e);
throw new IOException(errorMessage, e);
}
ByteBuffer intBuffer = ByteBuffer.allocate(4);
// last scoreDoc in a result to be used in searchAfter API
ScoreDoc lastScoreDoc = null;
while (true) {
for (ScoreDoc scoreDoc : result.scoreDocs) {
// get a document
Document doc = indexSearcher.doc(scoreDoc.doc);
// get all fields
List<IndexableField> fieldsInDoc = doc.getFields();
if (writeCacheSize > 0) {
// It fills rowids to the map, its value is combined with multiple rows.
fillMapForCombineRows(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey());
} else {
// Fill rowids to the map
fillMap(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey());
}
lastScoreDoc = scoreDoc;
}
// result will have the total number of hits therefore we always need to query on the
// left over documents
int remainingHits = result.totalHits - documentHitCounter;
// break the loop if count reaches maxDocs to be searched or remaining hits become <=0
if (remainingHits <= 0 || documentHitCounter >= maxDocs) {
break;
}
numberOfDocumentsToBeQueried = Math.min(remainingHits, SEARCH_LIMIT);
result = indexSearcher.searchAfter(lastScoreDoc, query, numberOfDocumentsToBeQueried);
documentHitCounter += numberOfDocumentsToBeQueried;
}
}
LOGGER.info("Time taken for lucene search: " + (System.currentTimeMillis() - luceneSearchStartTime) + " ms");
// result blocklets
List<FineGrainBlocklet> blocklets = new ArrayList<>();
// Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
for (Map.Entry<String, Map<Integer, List<Short>>> mapBlocklet : mapBlocks.entrySet()) {
String blockletId = mapBlocklet.getKey();
Map<Integer, List<Short>> mapPageIds = mapBlocklet.getValue();
List<FineGrainBlocklet.Page> pages = new ArrayList<FineGrainBlocklet.Page>();
// for pages in this blocklet Map<PageId, Set<RowId>>>
for (Map.Entry<Integer, List<Short>> mapPageId : mapPageIds.entrySet()) {
// construct array rowid
int[] rowIds = new int[mapPageId.getValue().size()];
int i = 0;
// for rowids in this page Set<RowId>
for (Short rowid : mapPageId.getValue()) {
rowIds[i++] = rowid;
}
// construct one page
FineGrainBlocklet.Page page = new FineGrainBlocklet.Page();
page.setPageId(mapPageId.getKey());
page.setRowId(rowIds);
// add this page into list pages
pages.add(page);
}
// add a FineGrainBlocklet
blocklets.add(new FineGrainBlocklet(filePath, blockletId, pages));
}
return blocklets;
}
Aggregations