Search in sources :

Example 1 with TypeMetadata

use of datawave.query.util.TypeMetadata in project datawave by NationalSecurityAgency.

the class QueryIterator method init.

@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException {
    if (log.isTraceEnabled()) {
        log.trace("QueryIterator init()");
    }
    if (!validateOptions(new SourcedOptions<>(source, env, options))) {
        throw new IllegalArgumentException("Could not initialize QueryIterator with " + options);
    }
    // We want to add in spoofed dataTypes for Aggregation/Evaluation to
    // ensure proper numeric evaluation.
    this.typeMetadata = new TypeMetadata(this.getTypeMetadata());
    this.typeMetadataWithNonIndexed = new TypeMetadata(this.typeMetadata);
    this.typeMetadataWithNonIndexed.addForAllIngestTypes(this.getNonIndexedDataTypeMap());
    this.exceededOrEvaluationCache = new HashMap<>();
    // Parse the query
    try {
        this.script = JexlASTHelper.parseJexlQuery(this.getQuery());
        this.myEvaluationFunction = new JexlEvaluation(this.getQuery(), arithmetic);
    } catch (Exception e) {
        throw new IOException("Could not parse the JEXL query: '" + this.getQuery() + "'", e);
    }
    this.documentOptions = options;
    this.myEnvironment = env;
    if (gatherTimingDetails()) {
        this.trackingSpan = new MultiThreadedQuerySpan(getStatsdClient());
        this.source = new SourceTrackingIterator(trackingSpan, source);
    } else {
        this.source = source;
    }
    this.fiAggregator = new IdentityAggregator(getAllIndexOnlyFields(), getEvaluationFilter(), getEvaluationFilter() != null ? getEvaluationFilter().getMaxNextCount() : -1);
    if (isDebugMultithreadedSources()) {
        this.source = new SourceThreadTrackingIterator(this.source);
    }
    this.sourceForDeepCopies = this.source.deepCopy(this.myEnvironment);
    // update ActiveQueryLog with (potentially) updated config
    if (env != null) {
        ActiveQueryLog.setConfig(env.getConfig());
    }
    DatawaveFieldIndexListIteratorJexl.FSTManager.setHdfsFileSystem(this.getFileSystemCache());
    DatawaveFieldIndexListIteratorJexl.FSTManager.setHdfsFileCompressionCodec(this.getHdfsFileCompressionCodec());
    pruneIvaratorCacheDirs();
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) SourceTrackingIterator(datawave.query.iterator.profile.SourceTrackingIterator) JexlEvaluation(datawave.query.function.JexlEvaluation) MultiThreadedQuerySpan(datawave.query.iterator.profile.MultiThreadedQuerySpan) IdentityAggregator(datawave.query.jexl.functions.IdentityAggregator) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) ConfigException(org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) TabletClosedException(org.apache.accumulo.tserver.tablet.TabletClosedException) MalformedURLException(java.net.MalformedURLException)

Example 2 with TypeMetadata

use of datawave.query.util.TypeMetadata in project datawave by NationalSecurityAgency.

the class QueryOptions method getDocumentPermutations.

public List<DocumentPermutation> getDocumentPermutations() {
    if (documentPermutations == null) {
        List<DocumentPermutation> list = new ArrayList<>();
        TypeMetadata metadata = getTypeMetadata();
        for (String classname : getDocumentPermutationClasses()) {
            try {
                Class<DocumentPermutation> clazz = (Class<DocumentPermutation>) Class.forName(classname);
                try {
                    Constructor<DocumentPermutation> constructor = clazz.getConstructor(TypeMetadata.class);
                    list.add(constructor.newInstance(metadata));
                } catch (IllegalAccessException | InstantiationException | InvocationTargetException e) {
                    log.error("Unable to construct " + classname + " as a DocumentPermutation", e);
                    throw new IllegalArgumentException("Unable to construct " + classname + " as a DocumentPermutation", e);
                } catch (NoSuchMethodException e) {
                    try {
                        list.add(clazz.newInstance());
                    } catch (InstantiationException | IllegalAccessException e2) {
                        log.error("Unable to construct " + classname + " as a DocumentPermutation", e2);
                        throw new IllegalArgumentException("Unable to construct " + classname + " as a DocumentPermutation", e2);
                    }
                }
            } catch (ClassNotFoundException e) {
                log.error("Unable to construct " + classname + " as a DocumentPermutation", e);
                throw new IllegalArgumentException("Unable to construct " + classname + " as a DocumentPermutation", e);
            }
        }
        this.documentPermutations = list;
    }
    return this.documentPermutations;
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) ArrayList(java.util.ArrayList) InvocationTargetException(java.lang.reflect.InvocationTargetException) DocumentPermutation(datawave.query.function.DocumentPermutation)

Example 3 with TypeMetadata

use of datawave.query.util.TypeMetadata in project datawave by NationalSecurityAgency.

the class QueryIteratorIT method setup.

@Before
public void setup() throws IOException {
    iterator = new QueryIterator();
    options = new HashMap<>();
    tempPath = temporaryFolder.newFolder().toPath();
    // global options
    // force serial pipelines
    options.put(SERIAL_EVALUATION_PIPELINE, "true");
    options.put(ALLOW_FIELD_INDEX_EVALUATION, "true");
    options.put(ALLOW_TERM_FREQUENCY_LOOKUP, "true");
    // set the indexed fields list
    options.put(INDEXED_FIELDS, "EVENT_FIELD1,EVENT_FIELD4,EVENT_FIELD6,TF_FIELD0,TF_FIELD1,TF_FIELD2,INDEX_ONLY_FIELD1,INDEX_ONLY_FIELD2,INDEX_ONLY_FIELD3");
    // set the unindexed fields list
    options.put(NON_INDEXED_DATATYPES, DEFAULT_DATATYPE + ":EVENT_FIELD2,EVENT_FIELD3,EVENT_FIELD5");
    // set a query id
    options.put(QUERY_ID, "000001");
    // setup ivarator settings
    IvaratorCacheDirConfig config = new IvaratorCacheDirConfig("file://" + tempPath.toAbsolutePath().toString());
    options.put(IVARATOR_CACHE_DIR_CONFIG, IvaratorCacheDirConfig.toJson(config));
    URL hdfsSiteConfig = this.getClass().getResource("/testhadoop.config");
    options.put(HDFS_SITE_CONFIG_URLS, hdfsSiteConfig.toExternalForm());
    // query time range
    options.put(START_TIME, "10");
    options.put(END_TIME, "100");
    // these will be marked as indexed fields
    typeMetadata = new TypeMetadata();
    typeMetadata.put("EVENT_FIELD1", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("EVENT_FIELD4", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("EVENT_FIELD6", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("TF_FIELD0", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("TF_FIELD1", DEFAULT_DATATYPE, CommaFreeType.class.getName());
    typeMetadata.put("TF_FIELD2", DEFAULT_DATATYPE, CommaFreeType.class.getName());
    typeMetadata.put("INDEX_ONLY_FIELD1", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("INDEX_ONLY_FIELD2", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("INDEX_ONLY_FIELD3", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    environment = createMock(IteratorEnvironment.class);
    EasyMock.expect(environment.getConfig()).andReturn(DefaultConfiguration.getInstance()).anyTimes();
    filter = createMock(EventDataQueryFilter.class);
}
Also used : IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) IteratorEnvironment(org.apache.accumulo.core.iterators.IteratorEnvironment) URL(java.net.URL) Before(org.junit.Before)

Example 4 with TypeMetadata

use of datawave.query.util.TypeMetadata in project datawave by NationalSecurityAgency.

the class IndexOnlyFunctionIterator method initializeFetch.

/*
     * Trigger the fetch by creating a stack of iterators based on a specialized, index-only, KeyToDocumentData implementation.
     * 
     * @param fieldName The field to be fetched
     * 
     * @param fetchAllRecords If true, fetch all relevant records, fully populating the Document and its DocumentData with all relevant name/value pairs.
     * 
     * @return an iterator of Key/Document pairs
     */
private <E> Iterator<Entry<Key, Document>> initializeFetch(final String fieldName, final IndexOnlyKeyToDocumentData keyToDocumentData) {
    Collection<Entry<Key, Document>> collection = Collections.emptySet();
    Iterator<Entry<Key, Document>> documents = collection.iterator();
    try {
        // Create a range to load a document with index-only information
        final Range parent = this.parentRange;
        final Key startKey = parent.getStartKey();
        final Text tfRow = startKey.getRow();
        final Text tfCf = new Text(TF_COLUMN_FAMILY);
        Text tfPartialCq = startKey.getColumnFamily();
        if ((tfPartialCq.getLength() == 0) && (null != this.documentKey)) {
            tfPartialCq = this.documentKey.getColumnFamily();
        }
        final ColumnVisibility cv = new ColumnVisibility(startKey.getColumnVisibility());
        long timeStamp = startKey.getTimestamp();
        final Key start = new Key(tfRow, tfCf, tfPartialCq, cv, timeStamp);
        final Key stop = new Key(tfRow, tfCf, tfPartialCq, cv, timeStamp);
        final Range indexOnlyRange = new Range(start, stop);
        // Take the document Keys and transform it into Entry<Key,Document>, which will remove attributes for this document
        // not falling within the expected time range
        final TypeMetadata typeMetadata = this.contextCreator.getTypeMetadata();
        final CompositeMetadata compositeMetadata = this.contextCreator.getCompositeMetadata();
        boolean includeGroupingContext = this.contextCreator.isIncludeGroupingContext();
        final TimeFilter timeFilter = this.contextCreator.getTimeFilter();
        boolean includeRecordId = this.contextCreator.isIncludeRecordId();
        final Aggregation aggregation = new Aggregation(timeFilter, typeMetadata, compositeMetadata, includeGroupingContext, includeRecordId, false, null);
        // Construct an iterator to build the document. Although the DocumentData will be retrieved from the tf section
        // of the shard table, the IndexOnlyKeyToDocumentData will reformat the entries to "look" like records from standard
        // columns.
        final Key documentKey = this.contextCreator.getGetDocumentKey().apply(indexOnlyRange);
        final DocumentSpecificTreeIterable source = new DocumentSpecificTreeIterable(documentKey, keyToDocumentData);
        // Initialize the seek
        source.iterator();
        // Initialize the fetch
        documents = Iterators.transform(keyToDocumentData, aggregation);
    } catch (final Exception e) {
        final String message = "Could not perform function on index-only field '" + fieldName + "\' for range " + this.parentRange;
        LOG.error(message, e);
    }
    return documents;
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) TimeFilter(datawave.query.predicate.TimeFilter) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) Aggregation(datawave.query.function.Aggregation) Entry(java.util.Map.Entry) CompositeMetadata(datawave.query.composite.CompositeMetadata) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Key(org.apache.accumulo.core.data.Key)

Example 5 with TypeMetadata

use of datawave.query.util.TypeMetadata in project datawave by NationalSecurityAgency.

the class ValueToAttributesTest method testComposites.

@Test
public void testComposites() {
    CompositeMetadata compositeMetadata = new CompositeMetadata();
    for (String ingestType : new String[] { "test", "pilot", "work", "beep", "tw" }) {
        compositeMetadata.setCompositeFieldMappingByType(ingestType, "MAKE_COLOR", Arrays.asList("MAKE", "COLOR"));
        compositeMetadata.setCompositeFieldMappingByType(ingestType, "COLOR_WHEELS", Arrays.asList("MAKE", "COLOR"));
    }
    TypeMetadata typeMetadata = new TypeMetadata("MAKE:[beep:datawave.data.type.LcNoDiacriticsType];MAKE_COLOR:[beep:datawave.data.type.NoOpType];START_DATE:[beep:datawave.data.type.DateType];TYPE_NOEVAL:[beep:datawave.data.type.LcNoDiacriticsType];IP_ADDR:[beep:datawave.data.type.IpAddressType];WHEELS:[beep:datawave.data.type.LcNoDiacriticsType,datawave.data.type.NumberType];COLOR:[beep:datawave.data.type.LcNoDiacriticsType];COLOR_WHEELS:[beep:datawave.data.type.NoOpType];TYPE:[beep:datawave.data.type.LcNoDiacriticsType]");
    MarkingFunctions markingFunctions = new MarkingFunctions.Default();
    ValueToAttributes valueToAttributes = new ValueToAttributes(compositeMetadata, typeMetadata, null, markingFunctions, true);
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) CompositeMetadata(datawave.query.composite.CompositeMetadata) MarkingFunctions(datawave.marking.MarkingFunctions) Test(org.junit.Test) BaseEdgeQueryTest(datawave.query.tables.edge.BaseEdgeQueryTest)

Aggregations

TypeMetadata (datawave.query.util.TypeMetadata)18 Key (org.apache.accumulo.core.data.Key)10 AttributeFactory (datawave.query.attributes.AttributeFactory)9 Document (datawave.query.attributes.Document)9 Value (org.apache.accumulo.core.data.Value)9 EventDataQueryFilter (datawave.query.predicate.EventDataQueryFilter)8 HashSet (java.util.HashSet)8 Range (org.apache.accumulo.core.data.Range)8 Test (org.junit.Test)8 EventDataQueryFieldFilter (datawave.query.predicate.EventDataQueryFieldFilter)7 SortedMapIterator (org.apache.accumulo.core.iterators.SortedMapIterator)7 DatawaveKey (datawave.query.data.parsers.DatawaveKey)6 ArrayList (java.util.ArrayList)4 Before (org.junit.Before)4 Attribute (datawave.query.attributes.Attribute)2 CompositeMetadata (datawave.query.composite.CompositeMetadata)2 Aggregation (datawave.query.function.Aggregation)2 Entry (java.util.Map.Entry)2 Set (java.util.Set)2 BaseIteratorEnvironment (org.apache.accumulo.core.client.impl.BaseIteratorEnvironment)2