Search in sources :

Example 11 with NumberType

use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.

the class FetchDataTypesVisitor method genericVisit.

/**
 * Use the MetadataHelper to fetch the Set<Type>'s for each field specified in a query term. Handle the case of spoofing the NumberType for fields
 * which are numeric but not indexed.
 *
 * @param node
 * @param data
 * @return
 */
private Object genericVisit(JexlNode node, Object data) {
    HashMultimap<String, Type<?>> dataTypes = (HashMultimap<String, Type<?>>) data;
    JexlASTHelper.IdentifierOpLiteral op = JexlASTHelper.getIdentifierOpLiteral(node);
    if (op == null) {
        return dataTypes;
    }
    final String fieldName = op.deconstructIdentifier();
    if (!dataTypes.containsKey(fieldName)) {
        Set<Type<?>> dataTypesForField = Collections.emptySet();
        try {
            if (useCache) {
                Tuple2<String, Set<String>> cacheKey = new Tuple2<>(fieldName, datatypeFilter);
                Set<Type<?>> types = typeCache.getIfPresent(cacheKey);
                if (null == types) {
                    dataTypesForField = this.helper.getDatatypesForField(fieldName, datatypeFilter);
                    typeCache.put(cacheKey, dataTypesForField);
                } else {
                    dataTypesForField = types;
                }
            } else
                dataTypesForField = this.helper.getDatatypesForField(fieldName, datatypeFilter);
        } catch (InstantiationException | TableNotFoundException | IllegalAccessException e) {
            log.error(e);
        }
        if (!dataTypesForField.isEmpty()) {
            dataTypes.putAll(fieldName, dataTypesForField);
        } else {
            if (op.getLiteralValue() instanceof Number) {
                // This is a hack to get around the following case:
                // 1) A user enters a query with a numeric term on a field
                // that isn't indexed: e.g. AGE < 4
                // 2) To get proper comparisons during evaluation, the
                // NumberType needs to be
                // set on the AGE, otherwise lexicographic comparisons will
                // occur.
                // This causes a problem though, because the
                // RangeBuildingVisitor thinks that
                // AGE is indexed, it is not, so it incorrectly fails
                // queries where "AGE < 4" is
                // intersected with an indexed field.
                // If this is unindexed (no normalizers for it) and the
                // literal is a Number
                dataTypes.put(fieldName, new NumberType());
                if (log.isTraceEnabled()) {
                    log.trace("Unindexed numeric field, adding NumberType for " + fieldName);
                }
            } else {
                // add LcNoDiacritics and NoOpType to ensure that we
                // query against both forms of the string
                dataTypes.put(fieldName, new LcNoDiacriticsType());
                dataTypes.put(fieldName, new NoOpType());
                if (log.isTraceEnabled()) {
                    log.trace("Unindexed field, adding LcNoDiacriticsType and NoOpType for " + fieldName);
                }
            }
        }
    }
    return dataTypes;
}
Also used : Set(java.util.Set) NoOpType(datawave.data.type.NoOpType) HashMultimap(com.google.common.collect.HashMultimap) JexlASTHelper(datawave.query.jexl.JexlASTHelper) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) Type(datawave.data.type.Type) NumberType(datawave.data.type.NumberType) Tuple2(datawave.query.util.Tuple2)

Example 12 with NumberType

use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.

the class ExpandMultiNormalizedTermsTest method testMixedCaseWithNumber.

@Test
public void testMixedCaseWithNumber() throws ParseException {
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.put("NUM", new NumberType());
    dataTypes.putAll("NAME", Sets.newHashSet(new LcNoDiacriticsType(), new NoOpType()));
    helper.setIndexedFields(dataTypes.keySet());
    helper.setIndexOnlyFields(dataTypes.keySet());
    helper.addTermFrequencyFields(dataTypes.keySet());
    config.setQueryFieldsDatatypes(dataTypes);
    String original = "(NAME == 'Alice' || NAME == 'BOB') && NUM < '1'";
    String expected = "((NAME == 'Alice' || NAME == 'alice') || (NAME == 'bob' || NAME == 'BOB')) && NUM < '+aE1'";
    expandTerms(original, expected);
}
Also used : LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) IpAddressType(datawave.data.type.IpAddressType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcType(datawave.data.type.LcType) Type(datawave.data.type.Type) NumberType(datawave.data.type.NumberType) NoOpType(datawave.data.type.NoOpType) Test(org.junit.Test)

Example 13 with NumberType

use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.

the class ExpandMultiNormalizedTermsTest method testNumber.

@Test
public void testNumber() throws ParseException {
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.putAll("MULTI", Sets.newHashSet(new LcNoDiacriticsType(), new NumberType()));
    dataTypes.put("NUM", new NumberType());
    helper.setIndexedFields(dataTypes.keySet());
    helper.setIndexOnlyFields(dataTypes.keySet());
    helper.addTermFrequencyFields(dataTypes.keySet());
    config.setQueryFieldsDatatypes(dataTypes);
    String original = "NUM == '1'";
    String expected = "NUM == '+aE1'";
    expandTerms(original, expected);
    original = "MULTI == '1'";
    expected = "(MULTI == '1' || MULTI == '+aE1')";
    expandTerms(original, expected);
}
Also used : LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) IpAddressType(datawave.data.type.IpAddressType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcType(datawave.data.type.LcType) Type(datawave.data.type.Type) NumberType(datawave.data.type.NumberType) Test(org.junit.Test)

Example 14 with NumberType

use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.

the class ExpandMultiNormalizedTermsTest method testFilterFunctionNormalizationWithIndexedNumeric.

@Test
public void testFilterFunctionNormalizationWithIndexedNumeric() throws ParseException {
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.put("NUM", new NumberType());
    config.setQueryFieldsDatatypes(dataTypes);
    String original = "filter:includeRegex(NUM, '1')";
    expandTerms(original, original);
}
Also used : IpAddressType(datawave.data.type.IpAddressType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcType(datawave.data.type.LcType) Type(datawave.data.type.Type) NumberType(datawave.data.type.NumberType) Test(org.junit.Test)

Example 15 with NumberType

use of datawave.data.type.NumberType in project datawave by NationalSecurityAgency.

the class GroupingTransform method flush.

@Override
public Entry<Key, Document> flush() {
    if (!countingMap.isEmpty()) {
        log.trace("flush will use the countingMap: {}", countingMap);
        for (Collection<GroupingTypeAttribute<?>> entry : countingMap.keySet()) {
            log.trace("from countingMap, got entry: {}", entry);
            ColumnVisibility columnVisibility = null;
            try {
                columnVisibility = toColumnVisibility(fieldVisibilities.get(entry));
            } catch (Exception e) {
                throw new IllegalStateException("Unable to merge column visibilities: " + fieldVisibilities.get(entry), e);
            }
            // grab a key from those saved during getListKeyCounts
            Assert.notEmpty(keys, "no available keys for grouping results");
            // use the last (most recent) key so a new iterator will know where to start
            Key docKey = keys.get(keys.size() - 1);
            Document d = new Document(docKey, true);
            d.setColumnVisibility(columnVisibility);
            entry.forEach(base -> d.put(getFieldName(base), base));
            NumberType type = new NumberType();
            type.setDelegate(new BigDecimal(countingMap.get(entry)));
            TypeAttribute<BigDecimal> attr = new TypeAttribute<>(type, new Key("count"), true);
            d.put("COUNT", attr);
            documents.add(d);
        }
        if (flatten) {
            // flatten to just one document on the tserver.
            flatten(documents);
        }
    }
    if (!documents.isEmpty()) {
        log.trace("{} will flush first of {} documents: {}", this.hashCode(), documents.size(), documents);
        Document d = documents.pop();
        Key key;
        if (keys.size() > 0 && flatten) {
            // use the last (most recent) key so a new iterator will know where to start
            key = keys.get(keys.size() - 1);
        } else {
            key = d.getMetadata();
        }
        Entry<Key, Document> entry = Maps.immutableEntry(key, d);
        log.trace("flushing out {}", entry);
        countingMap.clear();
        return entry;
    }
    return null;
}
Also used : NumberType(datawave.data.type.NumberType) TypeAttribute(datawave.query.attributes.TypeAttribute) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Document(datawave.query.attributes.Document) Key(org.apache.accumulo.core.data.Key) BigDecimal(java.math.BigDecimal)

Aggregations

NumberType (datawave.data.type.NumberType)28 LcNoDiacriticsType (datawave.data.type.LcNoDiacriticsType)21 Test (org.junit.Test)21 Type (datawave.data.type.Type)20 NoOpType (datawave.data.type.NoOpType)18 ScannerFactory (datawave.query.tables.ScannerFactory)13 MockMetadataHelper (datawave.query.util.MockMetadataHelper)13 ASTJexlScript (org.apache.commons.jexl2.parser.ASTJexlScript)13 RangeFactoryForTests.makeTestRange (datawave.common.test.utils.query.RangeFactoryForTests.makeTestRange)9 QueryPlan (datawave.query.planner.QueryPlan)9 Range (org.apache.accumulo.core.data.Range)9 LcType (datawave.data.type.LcType)8 IpAddressType (datawave.data.type.IpAddressType)4 ShardQueryConfiguration (datawave.query.config.ShardQueryConfiguration)3 Key (org.apache.accumulo.core.data.Key)3 ColumnVisibility (org.apache.accumulo.core.security.ColumnVisibility)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 MarkingFunctions (datawave.marking.MarkingFunctions)2 ValueTuple (datawave.query.attributes.ValueTuple)2 QueryIterator (datawave.query.iterator.QueryIterator)2