use of com.linkedin.pinot.core.common.BlockSingleValIterator in project pinot by linkedin.
the class Projection method run.
public ResultTable run() {
ResultTable resultTable = new ResultTable(_columnList, _filteredDocIds.size());
resultTable.setResultType(ResultTable.ResultType.Selection);
for (Pair pair : _columnList) {
String column = (String) pair.getFirst();
if (!_mvColumns.contains(column)) {
BlockSingleValIterator bvIter = (BlockSingleValIterator) _indexSegment.getDataSource(column).getNextBlock().getBlockValueSet().iterator();
int rowId = 0;
for (Integer docId : _filteredDocIds) {
bvIter.skipTo(docId);
resultTable.add(rowId++, bvIter.nextIntVal());
}
} else {
BlockMultiValIterator bvIter = (BlockMultiValIterator) _indexSegment.getDataSource(column).getNextBlock().getBlockValueSet().iterator();
int rowId = 0;
for (Integer docId : _filteredDocIds) {
bvIter.skipTo(docId);
int[] dictIds = _mvColumnArrayMap.get(column);
int numMVValues = bvIter.nextIntVal(dictIds);
dictIds = Arrays.copyOf(dictIds, numMVValues);
resultTable.add(rowId++, ArrayUtils.toObject(dictIds));
}
}
}
return transformFromIdToValues(resultTable, _dictionaryMap, _addCountStar);
}
use of com.linkedin.pinot.core.common.BlockSingleValIterator in project pinot by linkedin.
the class IndexSegmentImpl method iterator.
public Iterator<GenericRow> iterator(final int startDocId, final int endDocId) {
final Map<String, BlockSingleValIterator> singleValIteratorMap = new HashMap<>();
final Map<String, BlockMultiValIterator> multiValIteratorMap = new HashMap<>();
for (String column : getColumnNames()) {
DataSource dataSource = getDataSource(column);
BlockValIterator iterator = dataSource.getNextBlock().getBlockValueSet().iterator();
if (dataSource.getDataSourceMetadata().isSingleValue()) {
singleValIteratorMap.put(column, (BlockSingleValIterator) iterator);
} else {
multiValIteratorMap.put(column, (BlockMultiValIterator) iterator);
}
}
return new Iterator<GenericRow>() {
int docId = startDocId;
@Override
public boolean hasNext() {
return docId < endDocId;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public GenericRow next() {
Map<String, Object> map = new HashMap<>();
for (String column : singleValIteratorMap.keySet()) {
int dictId = singleValIteratorMap.get(column).nextIntVal();
Dictionary dictionary = getDictionaryFor(column);
map.put(column, dictionary.get(dictId));
}
for (String column : multiValIteratorMap.keySet()) {
//TODO:handle multi value
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
docId++;
return genericRow;
}
};
}
use of com.linkedin.pinot.core.common.BlockSingleValIterator in project pinot by linkedin.
the class SegmentDumpTool method doMain.
public void doMain(String[] args) throws Exception {
CmdLineParser parser = new CmdLineParser(this);
parser.parseArgument(args);
File segmentDir = new File(segmentPath);
SegmentMetadata metadata = new SegmentMetadataImpl(segmentDir);
// All columns by default
if (columnNames == null) {
columnNames = new ArrayList<String>(metadata.getSchema().getColumnNames());
Collections.sort(columnNames);
}
IndexSegment indexSegment = Loaders.IndexSegment.load(segmentDir, ReadMode.mmap);
Map<String, Dictionary> dictionaries = new HashMap<String, Dictionary>();
Map<String, BlockSingleValIterator> iterators = new HashMap<String, BlockSingleValIterator>();
for (String columnName : columnNames) {
DataSource dataSource = indexSegment.getDataSource(columnName);
dataSource.open();
Block block = dataSource.nextBlock();
BlockValSet blockValSet = block.getBlockValueSet();
BlockSingleValIterator itr = (BlockSingleValIterator) blockValSet.iterator();
iterators.put(columnName, itr);
dictionaries.put(columnName, dataSource.getDictionary());
}
System.out.print("Doc\t");
for (String columnName : columnNames) {
System.out.print(columnName);
System.out.print("\t");
}
System.out.println();
for (int i = 0; i < indexSegment.getSegmentMetadata().getTotalDocs(); i++) {
System.out.print(i);
System.out.print("\t");
for (String columnName : columnNames) {
FieldSpec.DataType columnType = metadata.getSchema().getFieldSpecFor(columnName).getDataType();
BlockSingleValIterator itr = iterators.get(columnName);
Integer encodedValue = itr.nextIntVal();
Object value = dictionaries.get(columnName).get(encodedValue);
System.out.print(value);
System.out.print("\t");
}
System.out.println();
}
if (dumpStarTree) {
System.out.println();
File starTreeFile = new File(segmentDir, V1Constants.STAR_TREE_INDEX_FILE);
StarTreeInterf tree = StarTreeSerDe.fromFile(starTreeFile, ReadMode.mmap);
tree.printTree();
}
}
use of com.linkedin.pinot.core.common.BlockSingleValIterator in project pinot by linkedin.
the class BaseSumStarTreeIndexTest method computeSum.
/**
* Compute 'sum' for a given list of metrics, by scanning the given set of doc-ids.
*
* @param segment
* @param docIdIterator
* @param metricNames
* @return
*/
private Map<String, double[]> computeSum(IndexSegment segment, BlockDocIdIterator docIdIterator, List<String> metricNames, List<String> groupByColumns) {
int docId;
int numMetrics = metricNames.size();
Dictionary[] metricDictionaries = new Dictionary[numMetrics];
BlockSingleValIterator[] metricValIterators = new BlockSingleValIterator[numMetrics];
int numGroupByColumns = groupByColumns.size();
Dictionary[] groupByDictionaries = new Dictionary[numGroupByColumns];
BlockSingleValIterator[] groupByValIterators = new BlockSingleValIterator[numGroupByColumns];
for (int i = 0; i < numMetrics; i++) {
String metricName = metricNames.get(i);
DataSource dataSource = segment.getDataSource(metricName);
metricDictionaries[i] = dataSource.getDictionary();
metricValIterators[i] = (BlockSingleValIterator) dataSource.getNextBlock().getBlockValueSet().iterator();
}
for (int i = 0; i < numGroupByColumns; i++) {
String groupByColumn = groupByColumns.get(i);
DataSource dataSource = segment.getDataSource(groupByColumn);
groupByDictionaries[i] = dataSource.getDictionary();
groupByValIterators[i] = (BlockSingleValIterator) dataSource.getNextBlock().getBlockValueSet().iterator();
}
Map<String, double[]> result = new HashMap<String, double[]>();
while ((docId = docIdIterator.next()) != Constants.EOF) {
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < numGroupByColumns; i++) {
groupByValIterators[i].skipTo(docId);
int dictId = groupByValIterators[i].nextIntVal();
stringBuilder.append(groupByDictionaries[i].getStringValue(dictId));
stringBuilder.append("_");
}
String key = stringBuilder.toString();
if (!result.containsKey(key)) {
result.put(key, new double[numMetrics]);
}
double[] sumsSoFar = result.get(key);
for (int i = 0; i < numMetrics; i++) {
metricValIterators[i].skipTo(docId);
int dictId = metricValIterators[i].nextIntVal();
sumsSoFar[i] += metricDictionaries[i].getDoubleValue(dictId);
}
}
return result;
}
use of com.linkedin.pinot.core.common.BlockSingleValIterator in project pinot by linkedin.
the class RealtimeSegmentTest method testMetricPredicateWithoutInvIdx.
@Test
public void testMetricPredicateWithoutInvIdx() throws Exception {
DataSource ds1 = segmentWithoutInvIdx.getDataSource("count");
List<String> rhs = new ArrayList<String>();
rhs.add("890662862");
Predicate predicate = new EqPredicate("count", rhs);
ScanBasedFilterOperator op = new ScanBasedFilterOperator(predicate, ds1, 0, segmentWithoutInvIdx.getRawDocumentCount() - 1);
Block b = op.nextBlock();
BlockDocIdIterator iterator = b.getBlockDocIdSet().iterator();
DataSource ds2 = segmentWithoutInvIdx.getDataSource("count");
BlockSingleValIterator blockValIterator = (BlockSingleValIterator) ds2.nextBlock().getBlockValueSet().iterator();
int docId = iterator.next();
int counter = 0;
while (docId != Constants.EOF) {
blockValIterator.skipTo(docId);
Assert.assertEquals(ds1.getDictionary().get(blockValIterator.nextIntVal()), 890662862);
docId = iterator.next();
counter++;
}
Assert.assertEquals(counter, 100000);
}
Aggregations