Search in sources :

Example 6 with NoOpType

use of datawave.data.type.NoOpType in project datawave by NationalSecurityAgency.

the class QueryModelVisitorTest method testAppliedModelWithNullNoFail.

@Test
public void testAppliedModelWithNullNoFail() throws ParseException {
    model.addTermToModel("FOO1", "BAR1");
    model.addTermToModel("OTHER", "9_2");
    String original = "FOO1 == 'baz' and OTHER == null";
    ASTJexlScript groomed = JexlASTHelper.InvertNodeVisitor.invertSwappedNodes(JexlASTHelper.parseJexlQuery(original));
    String expected = "BAR1 == 'baz' and $9_2 == null";
    ASTJexlScript actualScript = assertResult(JexlStringBuildingVisitor.buildQuery(groomed), expected);
    MockMetadataHelper helper = new MockMetadataHelper();
    helper.addNormalizers("FOO1", Sets.newHashSet(new LcNoDiacriticsType()));
    Multimap<String, String> maps = ArrayListMultimap.create();
    maps.put("9_2", "datatype1");
    helper.addFieldsToDatatypes(maps);
    Multimap<String, Type<?>> types = FetchDataTypesVisitor.fetchDataTypes(helper, Collections.singleton("datatype1"), actualScript);
    assertEquals(types.size(), 4);
    assertTrue(types.values().stream().allMatch((o) -> o instanceof LcNoDiacriticsType || o instanceof NoOpType));
}
Also used : LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) ArrayListMultimap(com.google.common.collect.ArrayListMultimap) ASTEQNode(org.apache.commons.jexl2.parser.ASTEQNode) JexlNodeAssert(datawave.test.JexlNodeAssert) JexlASTHelper(datawave.query.jexl.JexlASTHelper) MockMetadataHelper(datawave.query.util.MockMetadataHelper) QueryModel(datawave.query.model.QueryModel) ParseException(org.apache.commons.jexl2.parser.ParseException) Multimap(com.google.common.collect.Multimap) NoExpansion(datawave.query.language.functions.jexl.NoExpansion) NoOpType(datawave.data.type.NoOpType) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) Assert.fail(org.junit.Assert.fail) Before(org.junit.Before) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Sets(com.google.common.collect.Sets) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) List(java.util.List) Type(datawave.data.type.Type) Assert(org.junit.Assert) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) ASTReference(org.apache.commons.jexl2.parser.ASTReference) MockMetadataHelper(datawave.query.util.MockMetadataHelper) NoOpType(datawave.data.type.NoOpType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) Type(datawave.data.type.Type) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) NoOpType(datawave.data.type.NoOpType) Test(org.junit.Test)

Example 7 with NoOpType

use of datawave.data.type.NoOpType in project datawave by NationalSecurityAgency.

the class ExpandMultiNormalizedTermsTest method testMixedCaseWithNumber.

@Test
public void testMixedCaseWithNumber() throws ParseException {
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.put("NUM", new NumberType());
    dataTypes.putAll("NAME", Sets.newHashSet(new LcNoDiacriticsType(), new NoOpType()));
    helper.setIndexedFields(dataTypes.keySet());
    helper.setIndexOnlyFields(dataTypes.keySet());
    helper.addTermFrequencyFields(dataTypes.keySet());
    config.setQueryFieldsDatatypes(dataTypes);
    String original = "(NAME == 'Alice' || NAME == 'BOB') && NUM < '1'";
    String expected = "((NAME == 'Alice' || NAME == 'alice') || (NAME == 'bob' || NAME == 'BOB')) && NUM < '+aE1'";
    expandTerms(original, expected);
}
Also used : LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) IpAddressType(datawave.data.type.IpAddressType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcType(datawave.data.type.LcType) Type(datawave.data.type.Type) NumberType(datawave.data.type.NumberType) NoOpType(datawave.data.type.NoOpType) Test(org.junit.Test)

Example 8 with NoOpType

use of datawave.data.type.NoOpType in project datawave by NationalSecurityAgency.

the class ExpandMultiNormalizedTermsTest method testNoOp.

@Test
public void testNoOp() throws ParseException {
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.put("NOOP", new NoOpType());
    helper.setIndexedFields(dataTypes.keySet());
    helper.setIndexOnlyFields(dataTypes.keySet());
    helper.addTermFrequencyFields(dataTypes.keySet());
    config.setQueryFieldsDatatypes(dataTypes);
    // No change expected
    String original = "NOOP == 'bar'";
    expandTerms(original, original);
}
Also used : IpAddressType(datawave.data.type.IpAddressType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcType(datawave.data.type.LcType) Type(datawave.data.type.Type) NoOpType(datawave.data.type.NoOpType) Test(org.junit.Test)

Example 9 with NoOpType

use of datawave.data.type.NoOpType in project datawave by NationalSecurityAgency.

the class BaseIngestHelper method setup.

@Override
public void setup(Configuration config) {
    super.setup(config);
    this.failedFieldPolicy = Maps.newHashMap();
    this.failedFieldPatternPolicy = Maps.newHashMap();
    this.typeFieldMap = HashMultimap.create();
    this.typeFieldMap.put(null, new NoOpType());
    this.typePatternMap = HashMultimap.create();
    this.typeCompiledPatternMap = null;
    this.getVirtualIngest().setup(config);
    if (this.compositeIngest == null)
        this.compositeIngest = new CompositeFieldIngestHelper(this.getType());
    this.getCompositeIngest().setup(config);
    IngestConfiguration ingestConfiguration = IngestConfigurationFactory.getIngestConfiguration();
    markingsHelper = ingestConfiguration.getMarkingsHelper(config, getType());
    this.normalizedFields.addAll(config.getTrimmedStringCollection(this.getType().typeName() + NORMALIZED_FIELDS));
    this.moveToPatternMap(this.normalizedFields, this.normalizedPatterns);
    deleteMode = config.getBoolean(INGEST_MODE_DELETE, false);
    replaceMalformedUTF8 = config.getBoolean(this.getType().typeName() + REPLACE_MALFORMED_CHAR, false);
    defaultFailedFieldPolicy = FailurePolicy.valueOf(config.get(this.getType().typeName() + DEFAULT_FAILED_NORMALIZATION_POLICY, defaultFailedFieldPolicy.name()));
    failedNormalizationField = config.get(this.getType().typeName() + FAILED_NORMALIZATION_FIELD, failedNormalizationField);
    // index
    if (config.get(this.getType().typeName() + BLACKLIST_INDEX_FIELDS) != null && config.get(this.getType().typeName() + INDEX_FIELDS) != null) {
        throw new RuntimeException("Configuration contains BlackList and Whitelist for indexed fields, " + "it specifies both.  Type: " + this.getType().typeName() + ", parameters: " + config.get(this.getType().typeName() + BLACKLIST_INDEX_FIELDS) + " and " + config.get(this.getType().typeName() + INDEX_FIELDS));
    }
    String configProperty = null;
    // Load the field helper, which takes precedence over the individual field configurations
    final String fieldConfigFile = config.get(this.getType().typeName() + FIELD_CONFIG_FILE);
    if (fieldConfigFile != null) {
        if (log.isDebugEnabled()) {
            log.debug("Field config file " + fieldConfigFile + " specified for: " + this.getType().typeName() + FIELD_CONFIG_FILE);
        }
        this.fieldConfigHelper = XMLFieldConfigHelper.load(fieldConfigFile, this);
    }
    // Process the indexed fields
    if (config.get(this.getType().typeName() + BLACKLIST_INDEX_FIELDS) != null) {
        if (log.isDebugEnabled()) {
            log.debug("Blacklist specified for: " + this.getType().typeName() + BLACKLIST_INDEX_FIELDS);
        }
        super.setHasIndexBlacklist(true);
        configProperty = BLACKLIST_INDEX_FIELDS;
    } else if (config.get(this.getType().typeName() + INDEX_FIELDS) != null) {
        log.debug("IndexedFields specified.");
        super.setHasIndexBlacklist(false);
        configProperty = INDEX_FIELDS;
    }
    // Load the proper list of fields to (not) index
    if (fieldConfigHelper != null) {
        log.info("Using field config helper for " + this.getType().typeName());
    } else if (null == configProperty || configProperty.isEmpty()) {
        log.warn("No index fields or blacklist fields specified, not generating index fields for " + this.getType().typeName());
    } else {
        this.indexedFields = Sets.newHashSet();
        Collection<String> indexedStrings = config.getStringCollection(this.getType().typeName() + configProperty);
        if (null != indexedStrings && !indexedStrings.isEmpty()) {
            for (String indexedString : indexedStrings) {
                this.indexedFields.add(indexedString.trim());
            }
            this.moveToPatternMap(this.indexedFields, this.indexedPatterns);
        } else {
            log.warn(this.getType().typeName() + configProperty + " not specified.");
        }
    }
    // reverse index
    if (config.get(this.getType().typeName() + BLACKLIST_REVERSE_INDEX_FIELDS) != null && config.get(this.getType().typeName() + REVERSE_INDEX_FIELDS) != null) {
        throw new RuntimeException("Configuration contains BlackList and Whitelist for indexed fields, it specifies both.  Type: " + this.getType().typeName() + ", parameters: " + config.get(this.getType().typeName() + BLACKLIST_REVERSE_INDEX_FIELDS) + "  " + config.get(this.getType().typeName() + REVERSE_INDEX_FIELDS));
    }
    configProperty = null;
    // Process the reverse index fields
    if (config.get(this.getType().typeName() + BLACKLIST_REVERSE_INDEX_FIELDS) != null) {
        if (log.isDebugEnabled()) {
            log.debug("Blacklist specified for: " + this.getType().typeName() + BLACKLIST_REVERSE_INDEX_FIELDS);
        }
        this.setHasReverseIndexBlacklist(true);
        configProperty = BLACKLIST_REVERSE_INDEX_FIELDS;
    } else if (config.get(this.getType().typeName() + REVERSE_INDEX_FIELDS) != null) {
        if (log.isDebugEnabled()) {
            log.debug("Reverse Index specified.for: " + this.getType().typeName() + REVERSE_INDEX_FIELDS);
        }
        this.setHasReverseIndexBlacklist(false);
        configProperty = REVERSE_INDEX_FIELDS;
    }
    // Load the proper list of fields to (not) reverse index
    if (null == configProperty || configProperty.isEmpty()) {
        log.warn("No reverse index fields or blacklist reverse index fields specified, not generating reverse index fields for " + this.getType().typeName());
    } else {
        reverseIndexedFields = Sets.newHashSet();
        Collection<String> reverseIndexedStrings = config.getStringCollection(this.getType().typeName() + configProperty);
        if (null != reverseIndexedStrings && !reverseIndexedStrings.isEmpty()) {
            for (String reverseIndexedString : reverseIndexedStrings) {
                reverseIndexedFields.add(reverseIndexedString.trim());
            }
            this.moveToPatternMap(this.reverseIndexedFields, this.reverseIndexedPatterns);
        } else {
            log.warn(this.getType().typeName() + configProperty + " not specified");
        }
    }
    // somebody else is
    for (Type type : TypeRegistry.getTypes()) {
        Collection<String> indexedStrings = config.getStringCollection(type.typeName() + INDEX_FIELDS);
        if (null != indexedStrings && !indexedStrings.isEmpty()) {
            for (String indexedString : indexedStrings) {
                String indexedTrimmedString = indexedString.trim();
                allIndexFields.add(indexedTrimmedString);
            }
        }
        Collection<String> reverseIndexedStrings = config.getStringCollection(type.typeName() + REVERSE_INDEX_FIELDS);
        if (null != reverseIndexedStrings && !reverseIndexedStrings.isEmpty()) {
            for (String reverseIndexedString : reverseIndexedStrings) {
                String reverseIndexedTrimmedString = reverseIndexedString.trim();
                allReverseIndexFields.add(reverseIndexedTrimmedString);
            }
        }
    }
    for (Entry<String, String> property : config) {
        // Make sure we are only processing normalizers for this type
        if (!property.getKey().startsWith(this.getType().typeName() + '.')) {
            continue;
        }
        String fieldName = null;
        String key = property.getKey();
        if (key.endsWith(DEFAULT_TYPE) || key.endsWith(FIELD_TYPE)) {
            if (key.endsWith(FIELD_TYPE)) {
                if ((fieldName = getFieldType(key, FIELD_TYPE)) == null) {
                    continue;
                }
            }
            String typeClasses = property.getValue();
            updateDatawaveTypes(fieldName, typeClasses);
        } else if (property.getKey().endsWith(FIELD_FAILED_NORMALIZATION_POLICY)) {
            if ((fieldName = getFieldName(property.getKey(), FIELD_FAILED_NORMALIZATION_POLICY)) == null) {
                continue;
            }
            FailurePolicy policy = null;
            try {
                policy = FailurePolicy.valueOf(property.getValue());
            } catch (Exception e) {
                log.error("Unable to parse field normalization failure policy: " + property.getValue(), e);
                throw new IllegalArgumentException("Unable to parse field normalization failure policy: " + property.getValue(), e);
            }
            if (fieldName.indexOf('*') >= 0) {
                failedFieldPatternPolicy.put(fieldName, policy);
            } else {
                failedFieldPolicy.put(fieldName, policy);
            }
        }
    }
    // Support for excluding specific fields from being inserted into the
    // Shard table
    // This is useful if virtual fields are used heavily, but you don't want
    // these fields inserted
    // into the shard table. For instance, if many virtual fields are used
    // in the edge table
    // 
    // Note: this pruning occurs after all the field names are aliased or
    // normalized
    shardExclusions.clear();
    String exclusionsList = config.get(this.getType().typeName() + SHARD_FIELD_EXCLUSIONS);
    if (exclusionsList != null) {
        String[] exclusions = StringUtils.split(exclusionsList, ',');
        if (exclusions != null && exclusions.length > 0) {
            for (String exclusionFieldName : exclusions) {
                String fieldName = exclusionFieldName.trim();
                if (!fieldName.isEmpty()) {
                    shardExclusions.add(fieldName);
                } else {
                // TODO: Possibly add warning to indicated a potentially
                // questionable configuration file...
                }
            }
        }
    }
    String indexOnlyFieldList = config.get(this.getType().typeName() + INDEX_ONLY_FIELDS);
    if (null != indexOnlyFieldList) {
        for (String s : indexOnlyFieldList.split(",")) {
            String fieldName = s.trim();
            if (!fieldName.isEmpty()) {
                this.indexOnlyFields.add(fieldName);
            } else {
            // TODO: Possibly add warning to indicated a potentially
            // questionable configuration file...
            }
        }
    }
}
Also used : NoOpType(datawave.data.type.NoOpType) NormalizationException(datawave.data.normalizer.NormalizationException) NoOpType(datawave.data.type.NoOpType) OneToManyNormalizerType(datawave.data.type.OneToManyNormalizerType) Type(datawave.ingest.data.Type) Collection(java.util.Collection) IngestConfiguration(datawave.ingest.config.IngestConfiguration)

Example 10 with NoOpType

use of datawave.data.type.NoOpType in project datawave by NationalSecurityAgency.

the class RangeStreamTest method testPrune.

@Test
public void testPrune() throws Exception {
    String originalQuery = "FOO=='bag' || FOO=='qwertylikeskeyboards'";
    ASTJexlScript script = JexlASTHelper.parseJexlQuery(originalQuery);
    config.setBeginDate(new Date(0));
    config.setEndDate(new Date(System.currentTimeMillis()));
    Multimap<String, Type<?>> dataTypes = HashMultimap.create();
    dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
    dataTypes.putAll("BAR", Sets.newHashSet(new NoOpType()));
    dataTypes.putAll("BAR", Sets.newHashSet(new NumberType()));
    config.setQueryFieldsDatatypes(dataTypes);
    config.setIndexedFields(dataTypes);
    MockMetadataHelper helper = new MockMetadataHelper();
    helper.setIndexedFields(dataTypes.keySet());
    helper.addFields(Lists.newArrayList("BAR"));
    Range range1 = makeTestRange("20190314", "datatype1\u0000234");
    Range range2 = makeTestRange("20190314", "datatype1\u0000345");
    Set<Range> expectedRanges = Sets.newHashSet(range1, range2);
    RangeStream rangeStream = new RangeStream(config, new ScannerFactory(config.getConnector()), helper);
    for (QueryPlan queryPlan : rangeStream.streamPlans(script)) {
        assertEquals("FOO == 'bag'", JexlStringBuildingVisitor.buildQuery(queryPlan.getQueryTree()));
        for (Range range : queryPlan.getRanges()) {
            assertTrue("Tried to remove unexpected range " + range.toString() + " from expected ranges: " + expectedRanges.toString(), expectedRanges.remove(range));
        }
    }
    assertTrue("Expected ranges not found in query plan: " + expectedRanges.toString(), expectedRanges.isEmpty());
}
Also used : ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) NoOpType(datawave.data.type.NoOpType) RangeFactoryForTests.makeTestRange(datawave.common.test.utils.query.RangeFactoryForTests.makeTestRange) Range(org.apache.accumulo.core.data.Range) ScannerFactory(datawave.query.tables.ScannerFactory) QueryPlan(datawave.query.planner.QueryPlan) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) MockMetadataHelper(datawave.query.util.MockMetadataHelper) NoOpType(datawave.data.type.NoOpType) NumberType(datawave.data.type.NumberType) LcNoDiacriticsType(datawave.data.type.LcNoDiacriticsType) Type(datawave.data.type.Type) NumberType(datawave.data.type.NumberType) Test(org.junit.Test)

Aggregations

NoOpType (datawave.data.type.NoOpType)10 Type (datawave.data.type.Type)7 Test (org.junit.Test)6 LcNoDiacriticsType (datawave.data.type.LcNoDiacriticsType)5 NumberType (datawave.data.type.NumberType)4 JexlASTHelper (datawave.query.jexl.JexlASTHelper)3 ArrayListMultimap (com.google.common.collect.ArrayListMultimap)2 Multimap (com.google.common.collect.Multimap)2 DiscreteIndexType (datawave.data.type.DiscreteIndexType)2 IpAddressType (datawave.data.type.IpAddressType)2 LcType (datawave.data.type.LcType)2 OneToManyNormalizerType (datawave.data.type.OneToManyNormalizerType)2 QueryModel (datawave.query.model.QueryModel)2 MockMetadataHelper (datawave.query.util.MockMetadataHelper)2 Collection (java.util.Collection)2 Date (java.util.Date)2 Set (java.util.Set)2 ASTJexlScript (org.apache.commons.jexl2.parser.ASTJexlScript)2 Preconditions (com.google.common.base.Preconditions)1 HashMultimap (com.google.common.collect.HashMultimap)1