Search in sources :

Example 21 with LcNoDiacriticsType

use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.

the class ExpandMultiNormalizedTermsTest method testBoundedUnNormalizedBoundsCase.

@Test
public void testBoundedUnNormalizedBoundsCase() throws ParseException {
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.put("NEW", new LcNoDiacriticsType());
    helper.setIndexedFields(dataTypes.keySet());
    config.setQueryFieldsDatatypes(dataTypes);
    String original = "NEW == 'boo' && ((_Bounded_ = true) && (NEW > '1' && NEW < '10'))";
    String expected = "NEW == 'boo' && ((_Bounded_ = true) && (NEW > '1' && NEW < '10'))";
    expandTerms(original, expected);
}
Also used : LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) IpAddressType(datawave.data.type.IpAddressType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcType(datawave.data.type.LcType) Type(datawave.data.type.Type) Test(org.junit.Test)

Example 22 with LcNoDiacriticsType

use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.

the class FetchDataTypesVisitorTest method setup.

@BeforeClass
public static void setup() {
    // 1. Configure the ShardQueryConfig
    config.setBeginDate(new Date(0));
    config.setEndDate(new Date(System.currentTimeMillis()));
    // 2. Configure the MockMetadataHelper
    helper.addNormalizers("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
    helper.addNormalizers("FOO2", Sets.newHashSet(new LcType()));
    helper.addNormalizers("FOO3", Sets.newHashSet(new NumberType()));
    helper.addNormalizers("FOO4", Sets.newHashSet(new LcType(), new LcNoDiacriticsType()));
}
Also used : LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) NumberType(datawave.data.type.NumberType) LcType(datawave.data.type.LcType) Date(java.util.Date) BeforeClass(org.junit.BeforeClass)

Example 23 with LcNoDiacriticsType

use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.

the class FetchDataTypesVisitor method genericVisit.

/**
 * Use the MetadataHelper to fetch the Set&lt;Type&gt;'s for each field specified in a query term. Handle the case of spoofing the NumberType for fields
 * which are numeric but not indexed.
 *
 * @param node
 * @param data
 * @return
 */
private Object genericVisit(JexlNode node, Object data) {
    HashMultimap<String, Type<?>> dataTypes = (HashMultimap<String, Type<?>>) data;
    JexlASTHelper.IdentifierOpLiteral op = JexlASTHelper.getIdentifierOpLiteral(node);
    if (op == null) {
        return dataTypes;
    }
    final String fieldName = op.deconstructIdentifier();
    if (!dataTypes.containsKey(fieldName)) {
        Set<Type<?>> dataTypesForField = Collections.emptySet();
        try {
            if (useCache) {
                Tuple2<String, Set<String>> cacheKey = new Tuple2<>(fieldName, datatypeFilter);
                Set<Type<?>> types = typeCache.getIfPresent(cacheKey);
                if (null == types) {
                    dataTypesForField = this.helper.getDatatypesForField(fieldName, datatypeFilter);
                    typeCache.put(cacheKey, dataTypesForField);
                } else {
                    dataTypesForField = types;
                }
            } else
                dataTypesForField = this.helper.getDatatypesForField(fieldName, datatypeFilter);
        } catch (InstantiationException | TableNotFoundException | IllegalAccessException e) {
            log.error(e);
        }
        if (!dataTypesForField.isEmpty()) {
            dataTypes.putAll(fieldName, dataTypesForField);
        } else {
            if (op.getLiteralValue() instanceof Number) {
                // This is a hack to get around the following case:
                // 1) A user enters a query with a numeric term on a field
                // that isn't indexed: e.g. AGE < 4
                // 2) To get proper comparisons during evaluation, the
                // NumberType needs to be
                // set on the AGE, otherwise lexicographic comparisons will
                // occur.
                // This causes a problem though, because the
                // RangeBuildingVisitor thinks that
                // AGE is indexed, it is not, so it incorrectly fails
                // queries where "AGE < 4" is
                // intersected with an indexed field.
                // If this is unindexed (no normalizers for it) and the
                // literal is a Number
                dataTypes.put(fieldName, new NumberType());
                if (log.isTraceEnabled()) {
                    log.trace("Unindexed numeric field, adding NumberType for " + fieldName);
                }
            } else {
                // add LcNoDiacritics and NoOpType to ensure that we
                // query against both forms of the string
                dataTypes.put(fieldName, new LcNoDiacriticsType());
                dataTypes.put(fieldName, new NoOpType());
                if (log.isTraceEnabled()) {
                    log.trace("Unindexed field, adding LcNoDiacriticsType and NoOpType for " + fieldName);
                }
            }
        }
    }
    return dataTypes;
}
Also used : Set(java.util.Set) NoOpType(datawave.data.type.NoOpType) HashMultimap(com.google.common.collect.HashMultimap) JexlASTHelper(datawave.query.jexl.JexlASTHelper) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) Type(datawave.data.type.Type) NumberType(datawave.data.type.NumberType) Tuple2(datawave.query.util.Tuple2)

Example 24 with LcNoDiacriticsType

use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.

the class QueryModelVisitorTest method testAppliedModelWithNullNoFail.

@Test
public void testAppliedModelWithNullNoFail() throws ParseException {
    model.addTermToModel("FOO1", "BAR1");
    model.addTermToModel("OTHER", "9_2");
    String original = "FOO1 == 'baz' and OTHER == null";
    ASTJexlScript groomed = JexlASTHelper.InvertNodeVisitor.invertSwappedNodes(JexlASTHelper.parseJexlQuery(original));
    String expected = "BAR1 == 'baz' and $9_2 == null";
    ASTJexlScript actualScript = assertResult(JexlStringBuildingVisitor.buildQuery(groomed), expected);
    MockMetadataHelper helper = new MockMetadataHelper();
    helper.addNormalizers("FOO1", Sets.newHashSet(new LcNoDiacriticsType()));
    Multimap<String, String> maps = ArrayListMultimap.create();
    maps.put("9_2", "datatype1");
    helper.addFieldsToDatatypes(maps);
    Multimap<String, Type<?>> types = FetchDataTypesVisitor.fetchDataTypes(helper, Collections.singleton("datatype1"), actualScript);
    assertEquals(types.size(), 4);
    assertTrue(types.values().stream().allMatch((o) -> o instanceof LcNoDiacriticsType || o instanceof NoOpType));
}
Also used : LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) ArrayListMultimap(com.google.common.collect.ArrayListMultimap) ASTEQNode(org.apache.commons.jexl2.parser.ASTEQNode) JexlNodeAssert(datawave.test.JexlNodeAssert) JexlASTHelper(datawave.query.jexl.JexlASTHelper) MockMetadataHelper(datawave.query.util.MockMetadataHelper) QueryModel(datawave.query.model.QueryModel) ParseException(org.apache.commons.jexl2.parser.ParseException) Multimap(com.google.common.collect.Multimap) NoExpansion(datawave.query.language.functions.jexl.NoExpansion) NoOpType(datawave.data.type.NoOpType) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) Assert.fail(org.junit.Assert.fail) Before(org.junit.Before) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Sets(com.google.common.collect.Sets) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) List(java.util.List) Type(datawave.data.type.Type) Assert(org.junit.Assert) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) ASTReference(org.apache.commons.jexl2.parser.ASTReference) MockMetadataHelper(datawave.query.util.MockMetadataHelper) NoOpType(datawave.data.type.NoOpType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) Type(datawave.data.type.Type) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) NoOpType(datawave.data.type.NoOpType) Test(org.junit.Test)

Example 25 with LcNoDiacriticsType

use of datawave.data.type.LcNoDiacriticsType in project datawave by NationalSecurityAgency.

the class ExpandMultiNormalizedTermsTest method testMixedCaseWithNumber.

@Test
public void testMixedCaseWithNumber() throws ParseException {
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.put("NUM", new NumberType());
    dataTypes.putAll("NAME", Sets.newHashSet(new LcNoDiacriticsType(), new NoOpType()));
    helper.setIndexedFields(dataTypes.keySet());
    helper.setIndexOnlyFields(dataTypes.keySet());
    helper.addTermFrequencyFields(dataTypes.keySet());
    config.setQueryFieldsDatatypes(dataTypes);
    String original = "(NAME == 'Alice' || NAME == 'BOB') && NUM < '1'";
    String expected = "((NAME == 'Alice' || NAME == 'alice') || (NAME == 'bob' || NAME == 'BOB')) && NUM < '+aE1'";
    expandTerms(original, expected);
}
Also used : LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) IpAddressType(datawave.data.type.IpAddressType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcType(datawave.data.type.LcType) Type(datawave.data.type.Type) NumberType(datawave.data.type.NumberType) NoOpType(datawave.data.type.NoOpType) Test(org.junit.Test)

Aggregations

LcNoDiacriticsType (datawave.data.type.LcNoDiacriticsType)49 Type (datawave.data.type.Type)46 NumberType (datawave.data.type.NumberType)44 Test (org.junit.Test)42 NoOpType (datawave.data.type.NoOpType)41 ASTJexlScript (org.apache.commons.jexl2.parser.ASTJexlScript)36 MockMetadataHelper (datawave.query.util.MockMetadataHelper)35 ScannerFactory (datawave.query.tables.ScannerFactory)34 RangeFactoryForTests.makeTestRange (datawave.common.test.utils.query.RangeFactoryForTests.makeTestRange)25 QueryPlan (datawave.query.planner.QueryPlan)25 Range (org.apache.accumulo.core.data.Range)25 LcType (datawave.data.type.LcType)9 SimpleDateFormat (java.text.SimpleDateFormat)9 IpAddressType (datawave.data.type.IpAddressType)6 JexlASTHelper (datawave.query.jexl.JexlASTHelper)2 Date (java.util.Date)2 Set (java.util.Set)2 ASTReference (org.apache.commons.jexl2.parser.ASTReference)2 JexlNode (org.apache.commons.jexl2.parser.JexlNode)2 ParseException (org.apache.commons.jexl2.parser.ParseException)2