use of datawave.data.type.NoOpType in project datawave by NationalSecurityAgency.
the class QueryModelVisitorTest method testAppliedModelWithNullNoFail.
@Test
public void testAppliedModelWithNullNoFail() throws ParseException {
model.addTermToModel("FOO1", "BAR1");
model.addTermToModel("OTHER", "9_2");
String original = "FOO1 == 'baz' and OTHER == null";
ASTJexlScript groomed = JexlASTHelper.InvertNodeVisitor.invertSwappedNodes(JexlASTHelper.parseJexlQuery(original));
String expected = "BAR1 == 'baz' and $9_2 == null";
ASTJexlScript actualScript = assertResult(JexlStringBuildingVisitor.buildQuery(groomed), expected);
MockMetadataHelper helper = new MockMetadataHelper();
helper.addNormalizers("FOO1", Sets.newHashSet(new LcNoDiacriticsType()));
Multimap<String, String> maps = ArrayListMultimap.create();
maps.put("9_2", "datatype1");
helper.addFieldsToDatatypes(maps);
Multimap<String, Type<?>> types = FetchDataTypesVisitor.fetchDataTypes(helper, Collections.singleton("datatype1"), actualScript);
assertEquals(types.size(), 4);
assertTrue(types.values().stream().allMatch((o) -> o instanceof LcNoDiacriticsType || o instanceof NoOpType));
}
use of datawave.data.type.NoOpType in project datawave by NationalSecurityAgency.
the class ExpandMultiNormalizedTermsTest method testMixedCaseWithNumber.
@Test
public void testMixedCaseWithNumber() throws ParseException {
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.put("NUM", new NumberType());
dataTypes.putAll("NAME", Sets.newHashSet(new LcNoDiacriticsType(), new NoOpType()));
helper.setIndexedFields(dataTypes.keySet());
helper.setIndexOnlyFields(dataTypes.keySet());
helper.addTermFrequencyFields(dataTypes.keySet());
config.setQueryFieldsDatatypes(dataTypes);
String original = "(NAME == 'Alice' || NAME == 'BOB') && NUM < '1'";
String expected = "((NAME == 'Alice' || NAME == 'alice') || (NAME == 'bob' || NAME == 'BOB')) && NUM < '+aE1'";
expandTerms(original, expected);
}
use of datawave.data.type.NoOpType in project datawave by NationalSecurityAgency.
the class ExpandMultiNormalizedTermsTest method testNoOp.
@Test
public void testNoOp() throws ParseException {
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.put("NOOP", new NoOpType());
helper.setIndexedFields(dataTypes.keySet());
helper.setIndexOnlyFields(dataTypes.keySet());
helper.addTermFrequencyFields(dataTypes.keySet());
config.setQueryFieldsDatatypes(dataTypes);
// No change expected
String original = "NOOP == 'bar'";
expandTerms(original, original);
}
use of datawave.data.type.NoOpType in project datawave by NationalSecurityAgency.
the class BaseIngestHelper method setup.
@Override
public void setup(Configuration config) {
super.setup(config);
this.failedFieldPolicy = Maps.newHashMap();
this.failedFieldPatternPolicy = Maps.newHashMap();
this.typeFieldMap = HashMultimap.create();
this.typeFieldMap.put(null, new NoOpType());
this.typePatternMap = HashMultimap.create();
this.typeCompiledPatternMap = null;
this.getVirtualIngest().setup(config);
if (this.compositeIngest == null)
this.compositeIngest = new CompositeFieldIngestHelper(this.getType());
this.getCompositeIngest().setup(config);
IngestConfiguration ingestConfiguration = IngestConfigurationFactory.getIngestConfiguration();
markingsHelper = ingestConfiguration.getMarkingsHelper(config, getType());
this.normalizedFields.addAll(config.getTrimmedStringCollection(this.getType().typeName() + NORMALIZED_FIELDS));
this.moveToPatternMap(this.normalizedFields, this.normalizedPatterns);
deleteMode = config.getBoolean(INGEST_MODE_DELETE, false);
replaceMalformedUTF8 = config.getBoolean(this.getType().typeName() + REPLACE_MALFORMED_CHAR, false);
defaultFailedFieldPolicy = FailurePolicy.valueOf(config.get(this.getType().typeName() + DEFAULT_FAILED_NORMALIZATION_POLICY, defaultFailedFieldPolicy.name()));
failedNormalizationField = config.get(this.getType().typeName() + FAILED_NORMALIZATION_FIELD, failedNormalizationField);
// index
if (config.get(this.getType().typeName() + BLACKLIST_INDEX_FIELDS) != null && config.get(this.getType().typeName() + INDEX_FIELDS) != null) {
throw new RuntimeException("Configuration contains BlackList and Whitelist for indexed fields, " + "it specifies both. Type: " + this.getType().typeName() + ", parameters: " + config.get(this.getType().typeName() + BLACKLIST_INDEX_FIELDS) + " and " + config.get(this.getType().typeName() + INDEX_FIELDS));
}
String configProperty = null;
// Load the field helper, which takes precedence over the individual field configurations
final String fieldConfigFile = config.get(this.getType().typeName() + FIELD_CONFIG_FILE);
if (fieldConfigFile != null) {
if (log.isDebugEnabled()) {
log.debug("Field config file " + fieldConfigFile + " specified for: " + this.getType().typeName() + FIELD_CONFIG_FILE);
}
this.fieldConfigHelper = XMLFieldConfigHelper.load(fieldConfigFile, this);
}
// Process the indexed fields
if (config.get(this.getType().typeName() + BLACKLIST_INDEX_FIELDS) != null) {
if (log.isDebugEnabled()) {
log.debug("Blacklist specified for: " + this.getType().typeName() + BLACKLIST_INDEX_FIELDS);
}
super.setHasIndexBlacklist(true);
configProperty = BLACKLIST_INDEX_FIELDS;
} else if (config.get(this.getType().typeName() + INDEX_FIELDS) != null) {
log.debug("IndexedFields specified.");
super.setHasIndexBlacklist(false);
configProperty = INDEX_FIELDS;
}
// Load the proper list of fields to (not) index
if (fieldConfigHelper != null) {
log.info("Using field config helper for " + this.getType().typeName());
} else if (null == configProperty || configProperty.isEmpty()) {
log.warn("No index fields or blacklist fields specified, not generating index fields for " + this.getType().typeName());
} else {
this.indexedFields = Sets.newHashSet();
Collection<String> indexedStrings = config.getStringCollection(this.getType().typeName() + configProperty);
if (null != indexedStrings && !indexedStrings.isEmpty()) {
for (String indexedString : indexedStrings) {
this.indexedFields.add(indexedString.trim());
}
this.moveToPatternMap(this.indexedFields, this.indexedPatterns);
} else {
log.warn(this.getType().typeName() + configProperty + " not specified.");
}
}
// reverse index
if (config.get(this.getType().typeName() + BLACKLIST_REVERSE_INDEX_FIELDS) != null && config.get(this.getType().typeName() + REVERSE_INDEX_FIELDS) != null) {
throw new RuntimeException("Configuration contains BlackList and Whitelist for indexed fields, it specifies both. Type: " + this.getType().typeName() + ", parameters: " + config.get(this.getType().typeName() + BLACKLIST_REVERSE_INDEX_FIELDS) + " " + config.get(this.getType().typeName() + REVERSE_INDEX_FIELDS));
}
configProperty = null;
// Process the reverse index fields
if (config.get(this.getType().typeName() + BLACKLIST_REVERSE_INDEX_FIELDS) != null) {
if (log.isDebugEnabled()) {
log.debug("Blacklist specified for: " + this.getType().typeName() + BLACKLIST_REVERSE_INDEX_FIELDS);
}
this.setHasReverseIndexBlacklist(true);
configProperty = BLACKLIST_REVERSE_INDEX_FIELDS;
} else if (config.get(this.getType().typeName() + REVERSE_INDEX_FIELDS) != null) {
if (log.isDebugEnabled()) {
log.debug("Reverse Index specified.for: " + this.getType().typeName() + REVERSE_INDEX_FIELDS);
}
this.setHasReverseIndexBlacklist(false);
configProperty = REVERSE_INDEX_FIELDS;
}
// Load the proper list of fields to (not) reverse index
if (null == configProperty || configProperty.isEmpty()) {
log.warn("No reverse index fields or blacklist reverse index fields specified, not generating reverse index fields for " + this.getType().typeName());
} else {
reverseIndexedFields = Sets.newHashSet();
Collection<String> reverseIndexedStrings = config.getStringCollection(this.getType().typeName() + configProperty);
if (null != reverseIndexedStrings && !reverseIndexedStrings.isEmpty()) {
for (String reverseIndexedString : reverseIndexedStrings) {
reverseIndexedFields.add(reverseIndexedString.trim());
}
this.moveToPatternMap(this.reverseIndexedFields, this.reverseIndexedPatterns);
} else {
log.warn(this.getType().typeName() + configProperty + " not specified");
}
}
// somebody else is
for (Type type : TypeRegistry.getTypes()) {
Collection<String> indexedStrings = config.getStringCollection(type.typeName() + INDEX_FIELDS);
if (null != indexedStrings && !indexedStrings.isEmpty()) {
for (String indexedString : indexedStrings) {
String indexedTrimmedString = indexedString.trim();
allIndexFields.add(indexedTrimmedString);
}
}
Collection<String> reverseIndexedStrings = config.getStringCollection(type.typeName() + REVERSE_INDEX_FIELDS);
if (null != reverseIndexedStrings && !reverseIndexedStrings.isEmpty()) {
for (String reverseIndexedString : reverseIndexedStrings) {
String reverseIndexedTrimmedString = reverseIndexedString.trim();
allReverseIndexFields.add(reverseIndexedTrimmedString);
}
}
}
for (Entry<String, String> property : config) {
// Make sure we are only processing normalizers for this type
if (!property.getKey().startsWith(this.getType().typeName() + '.')) {
continue;
}
String fieldName = null;
String key = property.getKey();
if (key.endsWith(DEFAULT_TYPE) || key.endsWith(FIELD_TYPE)) {
if (key.endsWith(FIELD_TYPE)) {
if ((fieldName = getFieldType(key, FIELD_TYPE)) == null) {
continue;
}
}
String typeClasses = property.getValue();
updateDatawaveTypes(fieldName, typeClasses);
} else if (property.getKey().endsWith(FIELD_FAILED_NORMALIZATION_POLICY)) {
if ((fieldName = getFieldName(property.getKey(), FIELD_FAILED_NORMALIZATION_POLICY)) == null) {
continue;
}
FailurePolicy policy = null;
try {
policy = FailurePolicy.valueOf(property.getValue());
} catch (Exception e) {
log.error("Unable to parse field normalization failure policy: " + property.getValue(), e);
throw new IllegalArgumentException("Unable to parse field normalization failure policy: " + property.getValue(), e);
}
if (fieldName.indexOf('*') >= 0) {
failedFieldPatternPolicy.put(fieldName, policy);
} else {
failedFieldPolicy.put(fieldName, policy);
}
}
}
// Support for excluding specific fields from being inserted into the
// Shard table
// This is useful if virtual fields are used heavily, but you don't want
// these fields inserted
// into the shard table. For instance, if many virtual fields are used
// in the edge table
//
// Note: this pruning occurs after all the field names are aliased or
// normalized
shardExclusions.clear();
String exclusionsList = config.get(this.getType().typeName() + SHARD_FIELD_EXCLUSIONS);
if (exclusionsList != null) {
String[] exclusions = StringUtils.split(exclusionsList, ',');
if (exclusions != null && exclusions.length > 0) {
for (String exclusionFieldName : exclusions) {
String fieldName = exclusionFieldName.trim();
if (!fieldName.isEmpty()) {
shardExclusions.add(fieldName);
} else {
// TODO: Possibly add warning to indicated a potentially
// questionable configuration file...
}
}
}
}
String indexOnlyFieldList = config.get(this.getType().typeName() + INDEX_ONLY_FIELDS);
if (null != indexOnlyFieldList) {
for (String s : indexOnlyFieldList.split(",")) {
String fieldName = s.trim();
if (!fieldName.isEmpty()) {
this.indexOnlyFields.add(fieldName);
} else {
// TODO: Possibly add warning to indicated a potentially
// questionable configuration file...
}
}
}
}
use of datawave.data.type.NoOpType in project datawave by NationalSecurityAgency.
the class RangeStreamTest method testPrune.
@Test
public void testPrune() throws Exception {
String originalQuery = "FOO=='bag' || FOO=='qwertylikeskeyboards'";
ASTJexlScript script = JexlASTHelper.parseJexlQuery(originalQuery);
config.setBeginDate(new Date(0));
config.setEndDate(new Date(System.currentTimeMillis()));
Multimap<String, Type<?>> dataTypes = HashMultimap.create();
dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
dataTypes.putAll("BAR", Sets.newHashSet(new NoOpType()));
dataTypes.putAll("BAR", Sets.newHashSet(new NumberType()));
config.setQueryFieldsDatatypes(dataTypes);
config.setIndexedFields(dataTypes);
MockMetadataHelper helper = new MockMetadataHelper();
helper.setIndexedFields(dataTypes.keySet());
helper.addFields(Lists.newArrayList("BAR"));
Range range1 = makeTestRange("20190314", "datatype1\u0000234");
Range range2 = makeTestRange("20190314", "datatype1\u0000345");
Set<Range> expectedRanges = Sets.newHashSet(range1, range2);
RangeStream rangeStream = new RangeStream(config, new ScannerFactory(config.getConnector()), helper);
for (QueryPlan queryPlan : rangeStream.streamPlans(script)) {
assertEquals("FOO == 'bag'", JexlStringBuildingVisitor.buildQuery(queryPlan.getQueryTree()));
for (Range range : queryPlan.getRanges()) {
assertTrue("Tried to remove unexpected range " + range.toString() + " from expected ranges: " + expectedRanges.toString(), expectedRanges.remove(range));
}
}
assertTrue("Expected ranges not found in query plan: " + expectedRanges.toString(), expectedRanges.isEmpty());
}
Aggregations