use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class StringDimensionIndexer method makeDimensionSelector.
@Override
public DimensionSelector makeDimensionSelector(final DimensionSpec spec, final IncrementalIndexStorageAdapter.EntryHolder currEntry, final IncrementalIndex.DimensionDesc desc) {
final ExtractionFn extractionFn = spec.getExtractionFn();
final int dimIndex = desc.getIndex();
final int maxId = getCardinality();
class IndexerDimensionSelector implements DimensionSelector, IdLookup {
@Override
public IndexedInts getRow() {
final Object[] dims = currEntry.getKey().getDims();
int[] indices;
if (dimIndex < dims.length) {
indices = (int[]) dims[dimIndex];
} else {
indices = null;
}
int[] row = null;
int rowSize = 0;
if (indices == null || indices.length == 0) {
final int nullId = getEncodedValue(null, false);
if (nullId > -1) {
if (nullId < maxId) {
row = new int[] { nullId };
rowSize = 1;
} else {
// Choose to use ArrayBasedIndexedInts later, instead of EmptyIndexedInts, for monomorphism
row = IntArrays.EMPTY_ARRAY;
rowSize = 0;
}
}
}
if (row == null && indices != null && indices.length > 0) {
row = new int[indices.length];
for (int id : indices) {
if (id < maxId) {
row[rowSize++] = id;
}
}
}
return ArrayBasedIndexedInts.of(row, rowSize);
}
@Override
public ValueMatcher makeValueMatcher(final String value) {
if (extractionFn == null) {
final int valueId = lookupId(value);
if (valueId >= 0 || value == null) {
return new ValueMatcher() {
@Override
public boolean matches() {
Object[] dims = currEntry.getKey().getDims();
if (dimIndex >= dims.length) {
return value == null;
}
int[] dimsInt = (int[]) dims[dimIndex];
if (dimsInt == null || dimsInt.length == 0) {
return value == null;
}
for (int id : dimsInt) {
if (id == valueId) {
return true;
}
}
return false;
}
};
} else {
return BooleanValueMatcher.of(false);
}
} else {
// Employ precomputed BitSet optimization
return makeValueMatcher(Predicates.equalTo(value));
}
}
@Override
public ValueMatcher makeValueMatcher(final Predicate<String> predicate) {
final BitSet predicateMatchingValueIds = DimensionSelectorUtils.makePredicateMatchingSet(this, predicate);
final boolean matchNull = predicate.apply(null);
return new ValueMatcher() {
@Override
public boolean matches() {
Object[] dims = currEntry.getKey().getDims();
if (dimIndex >= dims.length) {
return matchNull;
}
int[] dimsInt = (int[]) dims[dimIndex];
if (dimsInt == null || dimsInt.length == 0) {
return matchNull;
}
for (int id : dimsInt) {
if (predicateMatchingValueIds.get(id)) {
return true;
}
}
return false;
}
};
}
@Override
public int getValueCardinality() {
return maxId;
}
@Override
public String lookupName(int id) {
final String strValue = getActualValue(id, false);
return extractionFn == null ? strValue : extractionFn.apply(strValue);
}
@Override
public boolean nameLookupPossibleInAdvance() {
return true;
}
@Nullable
@Override
public IdLookup idLookup() {
return extractionFn == null ? this : null;
}
@Override
public int lookupId(String name) {
if (extractionFn != null) {
throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function");
}
return getEncodedValue(name, false);
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
inspector.visit("currEntry", currEntry);
}
}
return new IndexerDimensionSelector();
}
use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class IncrementalIndexStorageAdapter method makeCursors.
@Override
public Sequence<Cursor> makeCursors(final Filter filter, final Interval interval, final VirtualColumns virtualColumns, final Granularity gran, final boolean descending) {
if (index.isEmpty()) {
return Sequences.empty();
}
Interval actualIntervalTmp = interval;
final Interval dataInterval = new Interval(getMinTime().getMillis(), gran.bucketEnd(getMaxTime()).getMillis());
if (!actualIntervalTmp.overlaps(dataInterval)) {
return Sequences.empty();
}
if (actualIntervalTmp.getStart().isBefore(dataInterval.getStart())) {
actualIntervalTmp = actualIntervalTmp.withStart(dataInterval.getStart());
}
if (actualIntervalTmp.getEnd().isAfter(dataInterval.getEnd())) {
actualIntervalTmp = actualIntervalTmp.withEnd(dataInterval.getEnd());
}
final Interval actualInterval = actualIntervalTmp;
Iterable<Interval> iterable = gran.getIterable(actualInterval);
if (descending) {
iterable = Lists.reverse(ImmutableList.copyOf(iterable));
}
return Sequences.map(Sequences.simple(iterable), new Function<Interval, Cursor>() {
EntryHolder currEntry = new EntryHolder();
@Override
public Cursor apply(@Nullable final Interval interval) {
final long timeStart = Math.max(interval.getStartMillis(), actualInterval.getStartMillis());
return new Cursor() {
private final ValueMatcher filterMatcher = makeFilterMatcher(filter, this);
private Iterator<Map.Entry<IncrementalIndex.TimeAndDims, Integer>> baseIter;
private Iterable<Map.Entry<IncrementalIndex.TimeAndDims, Integer>> cursorIterable;
private boolean emptyRange;
final DateTime time;
int numAdvanced = -1;
boolean done;
{
cursorIterable = index.getFacts().timeRangeIterable(descending, timeStart, Math.min(actualInterval.getEndMillis(), gran.increment(interval.getStart()).getMillis()));
emptyRange = !cursorIterable.iterator().hasNext();
time = gran.toDateTime(interval.getStartMillis());
reset();
}
@Override
public DateTime getTime() {
return time;
}
@Override
public void advance() {
if (!baseIter.hasNext()) {
done = true;
return;
}
while (baseIter.hasNext()) {
BaseQuery.checkInterrupted();
currEntry.set(baseIter.next());
if (filterMatcher.matches()) {
return;
}
}
if (!filterMatcher.matches()) {
done = true;
}
}
@Override
public void advanceUninterruptibly() {
if (!baseIter.hasNext()) {
done = true;
return;
}
while (baseIter.hasNext()) {
if (Thread.currentThread().isInterrupted()) {
return;
}
currEntry.set(baseIter.next());
if (filterMatcher.matches()) {
return;
}
}
if (!filterMatcher.matches()) {
done = true;
}
}
@Override
public void advanceTo(int offset) {
int count = 0;
while (count < offset && !isDone()) {
advance();
count++;
}
}
@Override
public boolean isDone() {
return done;
}
@Override
public boolean isDoneOrInterrupted() {
return isDone() || Thread.currentThread().isInterrupted();
}
@Override
public void reset() {
baseIter = cursorIterable.iterator();
if (numAdvanced == -1) {
numAdvanced = 0;
} else {
Iterators.advance(baseIter, numAdvanced);
}
BaseQuery.checkInterrupted();
boolean foundMatched = false;
while (baseIter.hasNext()) {
currEntry.set(baseIter.next());
if (filterMatcher.matches()) {
foundMatched = true;
break;
}
numAdvanced++;
}
done = !foundMatched && (emptyRange || !baseIter.hasNext());
}
@Override
public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) {
if (virtualColumns.exists(dimensionSpec.getDimension())) {
return virtualColumns.makeDimensionSelector(dimensionSpec, this);
}
return dimensionSpec.decorate(makeDimensionSelectorUndecorated(dimensionSpec));
}
private DimensionSelector makeDimensionSelectorUndecorated(DimensionSpec dimensionSpec) {
final String dimension = dimensionSpec.getDimension();
final ExtractionFn extractionFn = dimensionSpec.getExtractionFn();
if (dimension.equals(Column.TIME_COLUMN_NAME)) {
DimensionSelector selector = new SingleScanTimeDimSelector(makeLongColumnSelector(dimension), extractionFn, descending);
return selector;
}
final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(dimensionSpec.getDimension());
if (dimensionDesc == null) {
// not a dimension, column may be a metric
ColumnCapabilities capabilities = getColumnCapabilities(dimension);
if (capabilities == null) {
return NullDimensionSelector.instance();
}
if (capabilities.getType() == ValueType.LONG) {
return new LongWrappingDimensionSelector(makeLongColumnSelector(dimension), extractionFn);
}
if (capabilities.getType() == ValueType.FLOAT) {
return new FloatWrappingDimensionSelector(makeFloatColumnSelector(dimension), extractionFn);
}
// if we can't wrap the base column, just return a column of all nulls
return NullDimensionSelector.instance();
} else {
final DimensionIndexer indexer = dimensionDesc.getIndexer();
return indexer.makeDimensionSelector(dimensionSpec, currEntry, dimensionDesc);
}
}
@Override
public FloatColumnSelector makeFloatColumnSelector(String columnName) {
if (virtualColumns.exists(columnName)) {
return virtualColumns.makeFloatColumnSelector(columnName, this);
}
final Integer dimIndex = index.getDimensionIndex(columnName);
if (dimIndex != null) {
final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(columnName);
final DimensionIndexer indexer = dimensionDesc.getIndexer();
return indexer.makeFloatColumnSelector(currEntry, dimensionDesc);
}
final Integer metricIndexInt = index.getMetricIndex(columnName);
if (metricIndexInt == null) {
return ZeroFloatColumnSelector.instance();
}
final int metricIndex = metricIndexInt;
return new FloatColumnSelector() {
@Override
public float get() {
return index.getMetricFloatValue(currEntry.getValue(), metricIndex);
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
inspector.visit("index", index);
}
};
}
@Override
public LongColumnSelector makeLongColumnSelector(String columnName) {
if (virtualColumns.exists(columnName)) {
return virtualColumns.makeLongColumnSelector(columnName, this);
}
if (columnName.equals(Column.TIME_COLUMN_NAME)) {
class TimeLongColumnSelector implements LongColumnSelector {
@Override
public long get() {
return currEntry.getKey().getTimestamp();
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
}
}
return new TimeLongColumnSelector();
}
final Integer dimIndex = index.getDimensionIndex(columnName);
if (dimIndex != null) {
final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(columnName);
final DimensionIndexer indexer = dimensionDesc.getIndexer();
return indexer.makeLongColumnSelector(currEntry, dimensionDesc);
}
final Integer metricIndexInt = index.getMetricIndex(columnName);
if (metricIndexInt == null) {
return ZeroLongColumnSelector.instance();
}
final int metricIndex = metricIndexInt;
return new LongColumnSelector() {
@Override
public long get() {
return index.getMetricLongValue(currEntry.getValue(), metricIndex);
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
inspector.visit("index", index);
}
};
}
@Override
public ObjectColumnSelector makeObjectColumnSelector(String column) {
if (virtualColumns.exists(column)) {
return virtualColumns.makeObjectColumnSelector(column, this);
}
if (column.equals(Column.TIME_COLUMN_NAME)) {
return new ObjectColumnSelector<Long>() {
@Override
public Class classOfObject() {
return Long.TYPE;
}
@Override
public Long get() {
return currEntry.getKey().getTimestamp();
}
};
}
final Integer metricIndexInt = index.getMetricIndex(column);
if (metricIndexInt != null) {
final int metricIndex = metricIndexInt;
final Class classOfObject = index.getMetricClass(column);
return new ObjectColumnSelector() {
@Override
public Class classOfObject() {
return classOfObject;
}
@Override
public Object get() {
return index.getMetricObjectValue(currEntry.getValue(), metricIndex);
}
};
}
IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(column);
if (dimensionDesc == null) {
return null;
} else {
final int dimensionIndex = dimensionDesc.getIndex();
final DimensionIndexer indexer = dimensionDesc.getIndexer();
return new ObjectColumnSelector<Object>() {
@Override
public Class classOfObject() {
return Object.class;
}
@Override
public Object get() {
IncrementalIndex.TimeAndDims key = currEntry.getKey();
if (key == null) {
return null;
}
Object[] dims = key.getDims();
if (dimensionIndex >= dims.length) {
return null;
}
return indexer.convertUnsortedEncodedKeyComponentToActualArrayOrList(dims[dimensionIndex], DimensionIndexer.ARRAY);
}
};
}
}
@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String columnName) {
if (virtualColumns.exists(columnName)) {
return virtualColumns.getColumnCapabilities(columnName);
}
return index.getCapabilities(columnName);
}
};
}
});
}
use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class FilteredAggregatorTest method testAggregateWithExtractionFns.
@Test
public void testAggregateWithExtractionFns() {
final float[] values = { 0.15f, 0.27f };
TestFloatColumnSelector selector;
FilteredAggregatorFactory factory;
String extractionJsFn = "function(str) { return str + 'AARDVARK'; }";
ExtractionFn extractionFn = new JavaScriptExtractionFn(extractionJsFn, false, JavaScriptConfig.getEnabledInstance());
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new SelectorDimFilter("dim", "aAARDVARK", extractionFn));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new InDimFilter("dim", Arrays.asList("NOT-aAARDVARK", "FOOBAR", "aAARDVARK"), extractionFn));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new BoundDimFilter("dim", "aAARDVARK", "aAARDVARK", false, false, true, extractionFn, StringComparators.ALPHANUMERIC));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new RegexDimFilter("dim", "aAARDVARK", extractionFn));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new SearchQueryDimFilter("dim", new ContainsSearchQuerySpec("aAARDVARK", true), extractionFn));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
String jsFn = "function(x) { return(x === 'aAARDVARK') }";
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new JavaScriptDimFilter("dim", jsFn, extractionFn, JavaScriptConfig.getEnabledInstance()));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
}
use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class FilteredAggregatorTest method makeColumnSelector.
private ColumnSelectorFactory makeColumnSelector(final TestFloatColumnSelector selector) {
return new ColumnSelectorFactory() {
@Override
public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) {
final String dimensionName = dimensionSpec.getDimension();
final ExtractionFn extractionFn = dimensionSpec.getExtractionFn();
if (dimensionName.equals("dim")) {
return dimensionSpec.decorate(new DimensionSelector() {
@Override
public IndexedInts getRow() {
if (selector.getIndex() % 3 == 2) {
return ArrayBasedIndexedInts.of(new int[] { 1 });
} else {
return ArrayBasedIndexedInts.of(new int[] { 0 });
}
}
@Override
public ValueMatcher makeValueMatcher(String value) {
return DimensionSelectorUtils.makeValueMatcherGeneric(this, value);
}
@Override
public ValueMatcher makeValueMatcher(Predicate<String> predicate) {
return DimensionSelectorUtils.makeValueMatcherGeneric(this, predicate);
}
@Override
public int getValueCardinality() {
return 2;
}
@Override
public String lookupName(int id) {
switch(id) {
case 0:
return "a";
case 1:
return "b";
default:
throw new IllegalArgumentException();
}
}
@Override
public boolean nameLookupPossibleInAdvance() {
return true;
}
@Nullable
@Override
public IdLookup idLookup() {
return new IdLookup() {
@Override
public int lookupId(String name) {
switch(name) {
case "a":
return 0;
case "b":
return 1;
default:
throw new IllegalArgumentException();
}
}
};
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
}
});
} else {
throw new UnsupportedOperationException();
}
}
@Override
public LongColumnSelector makeLongColumnSelector(String columnName) {
throw new UnsupportedOperationException();
}
@Override
public FloatColumnSelector makeFloatColumnSelector(String columnName) {
if (columnName.equals("value")) {
return selector;
} else {
throw new UnsupportedOperationException();
}
}
@Override
public ObjectColumnSelector makeObjectColumnSelector(String columnName) {
throw new UnsupportedOperationException();
}
@Override
public ColumnCapabilities getColumnCapabilities(String columnName) {
ColumnCapabilitiesImpl caps;
if (columnName.equals("value")) {
caps = new ColumnCapabilitiesImpl();
caps.setType(ValueType.FLOAT);
caps.setDictionaryEncoded(false);
caps.setHasBitmapIndexes(false);
} else {
caps = new ColumnCapabilitiesImpl();
caps.setType(ValueType.STRING);
caps.setDictionaryEncoded(true);
caps.setHasBitmapIndexes(true);
}
return caps;
}
};
}
use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByCardinalityAggWithExtractionFn.
@Test
public void testGroupByCardinalityAggWithExtractionFn() {
String helloJsFn = "function(str) { return 'hello' }";
ExtractionFn helloFn = new JavaScriptExtractionFn(helloJsFn, false, JavaScriptConfig.getEnabledInstance());
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("market", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new CardinalityAggregatorFactory("numVals", ImmutableList.<DimensionSpec>of(new ExtractionDimensionSpec(QueryRunnerTestHelper.qualityDimension, QueryRunnerTestHelper.qualityDimension, helloFn)), false))).setGranularity(QueryRunnerTestHelper.dayGran).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "spot", "rows", 9L, "numVals", 1.0002442201269182d), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "total_market", "rows", 2L, "numVals", 1.0002442201269182d), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "upfront", "rows", 2L, "numVals", 1.0002442201269182d), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "spot", "rows", 9L, "numVals", 1.0002442201269182d), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "total_market", "rows", 2L, "numVals", 1.0002442201269182d), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "upfront", "rows", 2L, "numVals", 1.0002442201269182d));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Aggregations