use of datawave.query.util.TypeMetadata in project datawave by NationalSecurityAgency.
the class QueryIterator method init.
@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException {
if (log.isTraceEnabled()) {
log.trace("QueryIterator init()");
}
if (!validateOptions(new SourcedOptions<>(source, env, options))) {
throw new IllegalArgumentException("Could not initialize QueryIterator with " + options);
}
// We want to add in spoofed dataTypes for Aggregation/Evaluation to
// ensure proper numeric evaluation.
this.typeMetadata = new TypeMetadata(this.getTypeMetadata());
this.typeMetadataWithNonIndexed = new TypeMetadata(this.typeMetadata);
this.typeMetadataWithNonIndexed.addForAllIngestTypes(this.getNonIndexedDataTypeMap());
this.exceededOrEvaluationCache = new HashMap<>();
// Parse the query
try {
this.script = JexlASTHelper.parseJexlQuery(this.getQuery());
this.myEvaluationFunction = new JexlEvaluation(this.getQuery(), arithmetic);
} catch (Exception e) {
throw new IOException("Could not parse the JEXL query: '" + this.getQuery() + "'", e);
}
this.documentOptions = options;
this.myEnvironment = env;
if (gatherTimingDetails()) {
this.trackingSpan = new MultiThreadedQuerySpan(getStatsdClient());
this.source = new SourceTrackingIterator(trackingSpan, source);
} else {
this.source = source;
}
this.fiAggregator = new IdentityAggregator(getAllIndexOnlyFields(), getEvaluationFilter(), getEvaluationFilter() != null ? getEvaluationFilter().getMaxNextCount() : -1);
if (isDebugMultithreadedSources()) {
this.source = new SourceThreadTrackingIterator(this.source);
}
this.sourceForDeepCopies = this.source.deepCopy(this.myEnvironment);
// update ActiveQueryLog with (potentially) updated config
if (env != null) {
ActiveQueryLog.setConfig(env.getConfig());
}
DatawaveFieldIndexListIteratorJexl.FSTManager.setHdfsFileSystem(this.getFileSystemCache());
DatawaveFieldIndexListIteratorJexl.FSTManager.setHdfsFileCompressionCodec(this.getHdfsFileCompressionCodec());
pruneIvaratorCacheDirs();
}
use of datawave.query.util.TypeMetadata in project datawave by NationalSecurityAgency.
the class QueryOptions method getDocumentPermutations.
public List<DocumentPermutation> getDocumentPermutations() {
if (documentPermutations == null) {
List<DocumentPermutation> list = new ArrayList<>();
TypeMetadata metadata = getTypeMetadata();
for (String classname : getDocumentPermutationClasses()) {
try {
Class<DocumentPermutation> clazz = (Class<DocumentPermutation>) Class.forName(classname);
try {
Constructor<DocumentPermutation> constructor = clazz.getConstructor(TypeMetadata.class);
list.add(constructor.newInstance(metadata));
} catch (IllegalAccessException | InstantiationException | InvocationTargetException e) {
log.error("Unable to construct " + classname + " as a DocumentPermutation", e);
throw new IllegalArgumentException("Unable to construct " + classname + " as a DocumentPermutation", e);
} catch (NoSuchMethodException e) {
try {
list.add(clazz.newInstance());
} catch (InstantiationException | IllegalAccessException e2) {
log.error("Unable to construct " + classname + " as a DocumentPermutation", e2);
throw new IllegalArgumentException("Unable to construct " + classname + " as a DocumentPermutation", e2);
}
}
} catch (ClassNotFoundException e) {
log.error("Unable to construct " + classname + " as a DocumentPermutation", e);
throw new IllegalArgumentException("Unable to construct " + classname + " as a DocumentPermutation", e);
}
}
this.documentPermutations = list;
}
return this.documentPermutations;
}
use of datawave.query.util.TypeMetadata in project datawave by NationalSecurityAgency.
the class QueryIteratorIT method setup.
@Before
public void setup() throws IOException {
iterator = new QueryIterator();
options = new HashMap<>();
tempPath = temporaryFolder.newFolder().toPath();
// global options
// force serial pipelines
options.put(SERIAL_EVALUATION_PIPELINE, "true");
options.put(ALLOW_FIELD_INDEX_EVALUATION, "true");
options.put(ALLOW_TERM_FREQUENCY_LOOKUP, "true");
// set the indexed fields list
options.put(INDEXED_FIELDS, "EVENT_FIELD1,EVENT_FIELD4,EVENT_FIELD6,TF_FIELD0,TF_FIELD1,TF_FIELD2,INDEX_ONLY_FIELD1,INDEX_ONLY_FIELD2,INDEX_ONLY_FIELD3");
// set the unindexed fields list
options.put(NON_INDEXED_DATATYPES, DEFAULT_DATATYPE + ":EVENT_FIELD2,EVENT_FIELD3,EVENT_FIELD5");
// set a query id
options.put(QUERY_ID, "000001");
// setup ivarator settings
IvaratorCacheDirConfig config = new IvaratorCacheDirConfig("file://" + tempPath.toAbsolutePath().toString());
options.put(IVARATOR_CACHE_DIR_CONFIG, IvaratorCacheDirConfig.toJson(config));
URL hdfsSiteConfig = this.getClass().getResource("/testhadoop.config");
options.put(HDFS_SITE_CONFIG_URLS, hdfsSiteConfig.toExternalForm());
// query time range
options.put(START_TIME, "10");
options.put(END_TIME, "100");
// these will be marked as indexed fields
typeMetadata = new TypeMetadata();
typeMetadata.put("EVENT_FIELD1", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("EVENT_FIELD4", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("EVENT_FIELD6", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("TF_FIELD0", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("TF_FIELD1", DEFAULT_DATATYPE, CommaFreeType.class.getName());
typeMetadata.put("TF_FIELD2", DEFAULT_DATATYPE, CommaFreeType.class.getName());
typeMetadata.put("INDEX_ONLY_FIELD1", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("INDEX_ONLY_FIELD2", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("INDEX_ONLY_FIELD3", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
environment = createMock(IteratorEnvironment.class);
EasyMock.expect(environment.getConfig()).andReturn(DefaultConfiguration.getInstance()).anyTimes();
filter = createMock(EventDataQueryFilter.class);
}
use of datawave.query.util.TypeMetadata in project datawave by NationalSecurityAgency.
the class IndexOnlyFunctionIterator method initializeFetch.
/*
* Trigger the fetch by creating a stack of iterators based on a specialized, index-only, KeyToDocumentData implementation.
*
* @param fieldName The field to be fetched
*
* @param fetchAllRecords If true, fetch all relevant records, fully populating the Document and its DocumentData with all relevant name/value pairs.
*
* @return an iterator of Key/Document pairs
*/
private <E> Iterator<Entry<Key, Document>> initializeFetch(final String fieldName, final IndexOnlyKeyToDocumentData keyToDocumentData) {
Collection<Entry<Key, Document>> collection = Collections.emptySet();
Iterator<Entry<Key, Document>> documents = collection.iterator();
try {
// Create a range to load a document with index-only information
final Range parent = this.parentRange;
final Key startKey = parent.getStartKey();
final Text tfRow = startKey.getRow();
final Text tfCf = new Text(TF_COLUMN_FAMILY);
Text tfPartialCq = startKey.getColumnFamily();
if ((tfPartialCq.getLength() == 0) && (null != this.documentKey)) {
tfPartialCq = this.documentKey.getColumnFamily();
}
final ColumnVisibility cv = new ColumnVisibility(startKey.getColumnVisibility());
long timeStamp = startKey.getTimestamp();
final Key start = new Key(tfRow, tfCf, tfPartialCq, cv, timeStamp);
final Key stop = new Key(tfRow, tfCf, tfPartialCq, cv, timeStamp);
final Range indexOnlyRange = new Range(start, stop);
// Take the document Keys and transform it into Entry<Key,Document>, which will remove attributes for this document
// not falling within the expected time range
final TypeMetadata typeMetadata = this.contextCreator.getTypeMetadata();
final CompositeMetadata compositeMetadata = this.contextCreator.getCompositeMetadata();
boolean includeGroupingContext = this.contextCreator.isIncludeGroupingContext();
final TimeFilter timeFilter = this.contextCreator.getTimeFilter();
boolean includeRecordId = this.contextCreator.isIncludeRecordId();
final Aggregation aggregation = new Aggregation(timeFilter, typeMetadata, compositeMetadata, includeGroupingContext, includeRecordId, false, null);
// Construct an iterator to build the document. Although the DocumentData will be retrieved from the tf section
// of the shard table, the IndexOnlyKeyToDocumentData will reformat the entries to "look" like records from standard
// columns.
final Key documentKey = this.contextCreator.getGetDocumentKey().apply(indexOnlyRange);
final DocumentSpecificTreeIterable source = new DocumentSpecificTreeIterable(documentKey, keyToDocumentData);
// Initialize the seek
source.iterator();
// Initialize the fetch
documents = Iterators.transform(keyToDocumentData, aggregation);
} catch (final Exception e) {
final String message = "Could not perform function on index-only field '" + fieldName + "\' for range " + this.parentRange;
LOG.error(message, e);
}
return documents;
}
use of datawave.query.util.TypeMetadata in project datawave by NationalSecurityAgency.
the class ValueToAttributesTest method testComposites.
@Test
public void testComposites() {
CompositeMetadata compositeMetadata = new CompositeMetadata();
for (String ingestType : new String[] { "test", "pilot", "work", "beep", "tw" }) {
compositeMetadata.setCompositeFieldMappingByType(ingestType, "MAKE_COLOR", Arrays.asList("MAKE", "COLOR"));
compositeMetadata.setCompositeFieldMappingByType(ingestType, "COLOR_WHEELS", Arrays.asList("MAKE", "COLOR"));
}
TypeMetadata typeMetadata = new TypeMetadata("MAKE:[beep:datawave.data.type.LcNoDiacriticsType];MAKE_COLOR:[beep:datawave.data.type.NoOpType];START_DATE:[beep:datawave.data.type.DateType];TYPE_NOEVAL:[beep:datawave.data.type.LcNoDiacriticsType];IP_ADDR:[beep:datawave.data.type.IpAddressType];WHEELS:[beep:datawave.data.type.LcNoDiacriticsType,datawave.data.type.NumberType];COLOR:[beep:datawave.data.type.LcNoDiacriticsType];COLOR_WHEELS:[beep:datawave.data.type.NoOpType];TYPE:[beep:datawave.data.type.LcNoDiacriticsType]");
MarkingFunctions markingFunctions = new MarkingFunctions.Default();
ValueToAttributes valueToAttributes = new ValueToAttributes(compositeMetadata, typeMetadata, null, markingFunctions, true);
}
Aggregations