use of org.apache.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class TimeFilteringTest method testTimeFilterWithTimeFormatExtractionFn.
@Test
public void testTimeFilterWithTimeFormatExtractionFn() {
ExtractionFn exfn = new TimeFormatExtractionFn("EEEE", DateTimes.inferTzFromString("America/New_York"), "en", null, false);
assertFilterMatches(new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "Wednesday", exfn), ImmutableList.of("0", "1", "2", "3", "4", "5"));
}
use of org.apache.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class TimeFilteringTest method testIntervalFilterOnStringDimension.
@Test
public void testIntervalFilterOnStringDimension() {
assertFilterMatches(new IntervalDimFilter("dim0", Collections.singletonList(Intervals.of("1970-01-01T00:00:00.001Z/1970-01-01T00:00:00.005Z")), null), ImmutableList.of("1", "2", "3", "4"));
assertFilterMatches(new IntervalDimFilter("dim0", Arrays.asList(Intervals.of("1970-01-01T00:00:00.000Z/1970-01-01T00:00:00.003Z"), Intervals.of("1970-01-01T00:00:00.004Z/1970-01-01T00:00:00.006Z")), null), ImmutableList.of("0", "1", "2", "4", "5"));
assertFilterMatches(new IntervalDimFilter("dim0", Arrays.asList(Intervals.of("1970-01-01T00:00:00.000Z/1970-01-01T00:00:00.001Z"), Intervals.of("1970-01-01T00:00:00.003Z/1970-01-01T00:00:00.006Z"), Intervals.of("1970-01-01T00:00:00.002Z/1970-01-01T00:00:00.005Z")), null), ImmutableList.of("0", "2", "3", "4", "5"));
assertFilterMatches(new IntervalDimFilter("dim1", Collections.singletonList(Intervals.of("1970-01-01T00:00:00.002Z/1970-01-01T00:00:00.011Z")), null), ImmutableList.of("1", "2"));
// increment timestamp by 2 hours
String timeBoosterJsFn = "function(x) { return(Number(x) + 7200000) }";
ExtractionFn exFn = new JavaScriptExtractionFn(timeBoosterJsFn, true, JavaScriptConfig.getEnabledInstance());
assertFilterMatchesSkipVectorize(new IntervalDimFilter("dim0", Collections.singletonList(Intervals.of("1970-01-01T02:00:00.001Z/1970-01-01T02:00:00.005Z")), exFn), ImmutableList.of("1", "2", "3", "4"));
}
use of org.apache.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class StringDimensionIndexer method makeDimensionSelector.
@Override
public DimensionSelector makeDimensionSelector(final DimensionSpec spec, final IncrementalIndexRowHolder currEntry, final IncrementalIndex.DimensionDesc desc) {
final ExtractionFn extractionFn = spec.getExtractionFn();
final int dimIndex = desc.getIndex();
// maxId is used in concert with getLastRowIndex() in IncrementalIndex to ensure that callers do not encounter
// rows that contain IDs over the initially-reported cardinality. The main idea is that IncrementalIndex establishes
// a watermark at the time a cursor is created, and doesn't allow the cursor to walk past that watermark.
//
// Additionally, this selector explicitly blocks knowledge of IDs past maxId that may occur from other causes
// (for example: nulls getting generated for empty arrays, or calls to lookupId).
final int maxId = getCardinality();
class IndexerDimensionSelector implements DimensionSelector, IdLookup {
private final ArrayBasedIndexedInts indexedInts = new ArrayBasedIndexedInts();
@Nullable
@MonotonicNonNull
private int[] nullIdIntArray;
@Override
public IndexedInts getRow() {
final Object[] dims = currEntry.get().getDims();
int[] indices;
if (dimIndex < dims.length) {
indices = (int[]) dims[dimIndex];
} else {
indices = null;
}
int[] row = null;
int rowSize = 0;
// usually due to currEntry's rowIndex is smaller than the row's rowIndex in which this dim first appears
if (indices == null || indices.length == 0) {
if (hasMultipleValues) {
row = IntArrays.EMPTY_ARRAY;
rowSize = 0;
} else {
final int nullId = getEncodedValue(null, false);
if (nullId >= 0 && nullId < maxId) {
// null was added to the dictionary before this selector was created; return its ID.
if (nullIdIntArray == null) {
nullIdIntArray = new int[] { nullId };
}
row = nullIdIntArray;
rowSize = 1;
} else {
// null doesn't exist in the dictionary; return an empty array.
// Choose to use ArrayBasedIndexedInts later, instead of special "empty" IndexedInts, for monomorphism
row = IntArrays.EMPTY_ARRAY;
rowSize = 0;
}
}
}
if (row == null && indices != null && indices.length > 0) {
row = indices;
rowSize = indices.length;
}
indexedInts.setValues(row, rowSize);
return indexedInts;
}
@Override
public ValueMatcher makeValueMatcher(final String value) {
if (extractionFn == null) {
final int valueId = lookupId(value);
if (valueId >= 0 || value == null) {
return new ValueMatcher() {
@Override
public boolean matches() {
Object[] dims = currEntry.get().getDims();
if (dimIndex >= dims.length) {
return value == null;
}
int[] dimsInt = (int[]) dims[dimIndex];
if (dimsInt == null || dimsInt.length == 0) {
return value == null;
}
for (int id : dimsInt) {
if (id == valueId) {
return true;
}
}
return false;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// nothing to inspect
}
};
} else {
return BooleanValueMatcher.of(false);
}
} else {
// Employ caching BitSet optimization
return makeValueMatcher(Predicates.equalTo(value));
}
}
@Override
public ValueMatcher makeValueMatcher(final Predicate<String> predicate) {
final BitSet checkedIds = new BitSet(maxId);
final BitSet matchingIds = new BitSet(maxId);
final boolean matchNull = predicate.apply(null);
// Lazy matcher; only check an id if matches() is called.
return new ValueMatcher() {
@Override
public boolean matches() {
Object[] dims = currEntry.get().getDims();
if (dimIndex >= dims.length) {
return matchNull;
}
int[] dimsInt = (int[]) dims[dimIndex];
if (dimsInt == null || dimsInt.length == 0) {
return matchNull;
}
for (int id : dimsInt) {
if (checkedIds.get(id)) {
if (matchingIds.get(id)) {
return true;
}
} else {
final boolean matches = predicate.apply(lookupName(id));
checkedIds.set(id);
if (matches) {
matchingIds.set(id);
return true;
}
}
}
return false;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// nothing to inspect
}
};
}
@Override
public int getValueCardinality() {
return maxId;
}
@Override
public String lookupName(int id) {
if (id >= maxId) {
// Sanity check; IDs beyond maxId should not be known to callers. (See comment above.)
throw new ISE("id[%d] >= maxId[%d]", id, maxId);
}
final String strValue = getActualValue(id, false);
return extractionFn == null ? strValue : extractionFn.apply(strValue);
}
@Override
public boolean nameLookupPossibleInAdvance() {
return dictionaryEncodesAllValues();
}
@Nullable
@Override
public IdLookup idLookup() {
return extractionFn == null ? this : null;
}
@Override
public int lookupId(String name) {
if (extractionFn != null) {
throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function");
}
final int id = getEncodedValue(name, false);
if (id < maxId) {
return id;
} else {
// doesn't exist.
return DimensionDictionary.ABSENT_VALUE_ID;
}
}
@SuppressWarnings("deprecation")
@Nullable
@Override
public Object getObject() {
IncrementalIndexRow key = currEntry.get();
if (key == null) {
return null;
}
Object[] dims = key.getDims();
if (dimIndex >= dims.length) {
return null;
}
return convertUnsortedEncodedKeyComponentToActualList((int[]) dims[dimIndex]);
}
@SuppressWarnings("deprecation")
@Override
public Class classOfObject() {
return Object.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// nothing to inspect
}
}
return new IndexerDimensionSelector();
}
use of org.apache.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class GroupByQueryQueryToolChest method makePostComputeManipulatorFn.
@Override
public Function<ResultRow, ResultRow> makePostComputeManipulatorFn(final GroupByQuery query, final MetricManipulationFn fn) {
final BitSet optimizedDims = extractionsToRewrite(query);
final Function<ResultRow, ResultRow> preCompute = makePreComputeManipulatorFn(query, fn);
if (optimizedDims.isEmpty()) {
return preCompute;
}
// If we have optimizations that can be done at this level, we apply them here
final List<DimensionSpec> dimensions = query.getDimensions();
final List<ExtractionFn> extractionFns = new ArrayList<>(dimensions.size());
for (int i = 0; i < dimensions.size(); i++) {
final DimensionSpec dimensionSpec = dimensions.get(i);
final ExtractionFn extractionFnToAdd;
if (optimizedDims.get(i)) {
extractionFnToAdd = dimensionSpec.getExtractionFn();
} else {
extractionFnToAdd = null;
}
extractionFns.add(extractionFnToAdd);
}
final int dimensionStart = query.getResultRowDimensionStart();
return row -> {
// preCompute.apply(row) will either return the original row, or create a copy.
ResultRow newRow = preCompute.apply(row);
// noinspection ObjectEquality (if preCompute made a copy, no need to make another copy)
if (newRow == row) {
newRow = row.copy();
}
for (int i = optimizedDims.nextSetBit(0); i >= 0; i = optimizedDims.nextSetBit(i + 1)) {
newRow.set(dimensionStart + i, extractionFns.get(i).apply(newRow.get(dimensionStart + i)));
}
return newRow;
};
}
use of org.apache.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByNestedOuterExtractionFnOnFloatInner.
@Test
public void testGroupByNestedOuterExtractionFnOnFloatInner() {
// Cannot vectorize due to extraction dimension spec.
cannotVectorize();
if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
expectedException.expect(UnsupportedOperationException.class);
expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING.");
}
String jsFn = "function(obj) { return obj; }";
ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance());
GroupByQuery subquery = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("quality", "alias"), new ExtractionDimensionSpec("qualityFloat", "qf_inner", ColumnType.FLOAT, jsExtractionFn)).setDimFilter(new SelectorDimFilter("quality", "technology", null)).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT).setGranularity(QueryRunnerTestHelper.DAY_GRAN).build();
GroupByQuery outerQuery = makeQueryBuilder().setDataSource(subquery).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("alias", "alias"), new ExtractionDimensionSpec("qf_inner", "qf_outer", ColumnType.FLOAT, jsExtractionFn)).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT).setGranularity(QueryRunnerTestHelper.ALL_GRAN).build();
List<ResultRow> expectedResults = Collections.singletonList(makeRow(outerQuery, "2011-04-01", "alias", "technology", "qf_outer", 17000.0f, "rows", 2L));
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, outerQuery);
TestHelper.assertExpectedObjects(expectedResults, results, "extraction-fn");
}
Aggregations