use of datawave.query.composite.CompositeMetadata in project datawave by NationalSecurityAgency.
the class IndexOnlyFunctionIterator method initializeFetch.
/*
* Trigger the fetch by creating a stack of iterators based on a specialized, index-only, KeyToDocumentData implementation.
*
* @param fieldName The field to be fetched
*
* @param fetchAllRecords If true, fetch all relevant records, fully populating the Document and its DocumentData with all relevant name/value pairs.
*
* @return an iterator of Key/Document pairs
*/
private <E> Iterator<Entry<Key, Document>> initializeFetch(final String fieldName, final IndexOnlyKeyToDocumentData keyToDocumentData) {
Collection<Entry<Key, Document>> collection = Collections.emptySet();
Iterator<Entry<Key, Document>> documents = collection.iterator();
try {
// Create a range to load a document with index-only information
final Range parent = this.parentRange;
final Key startKey = parent.getStartKey();
final Text tfRow = startKey.getRow();
final Text tfCf = new Text(TF_COLUMN_FAMILY);
Text tfPartialCq = startKey.getColumnFamily();
if ((tfPartialCq.getLength() == 0) && (null != this.documentKey)) {
tfPartialCq = this.documentKey.getColumnFamily();
}
final ColumnVisibility cv = new ColumnVisibility(startKey.getColumnVisibility());
long timeStamp = startKey.getTimestamp();
final Key start = new Key(tfRow, tfCf, tfPartialCq, cv, timeStamp);
final Key stop = new Key(tfRow, tfCf, tfPartialCq, cv, timeStamp);
final Range indexOnlyRange = new Range(start, stop);
// Take the document Keys and transform it into Entry<Key,Document>, which will remove attributes for this document
// not falling within the expected time range
final TypeMetadata typeMetadata = this.contextCreator.getTypeMetadata();
final CompositeMetadata compositeMetadata = this.contextCreator.getCompositeMetadata();
boolean includeGroupingContext = this.contextCreator.isIncludeGroupingContext();
final TimeFilter timeFilter = this.contextCreator.getTimeFilter();
boolean includeRecordId = this.contextCreator.isIncludeRecordId();
final Aggregation aggregation = new Aggregation(timeFilter, typeMetadata, compositeMetadata, includeGroupingContext, includeRecordId, false, null);
// Construct an iterator to build the document. Although the DocumentData will be retrieved from the tf section
// of the shard table, the IndexOnlyKeyToDocumentData will reformat the entries to "look" like records from standard
// columns.
final Key documentKey = this.contextCreator.getGetDocumentKey().apply(indexOnlyRange);
final DocumentSpecificTreeIterable source = new DocumentSpecificTreeIterable(documentKey, keyToDocumentData);
// Initialize the seek
source.iterator();
// Initialize the fetch
documents = Iterators.transform(keyToDocumentData, aggregation);
} catch (final Exception e) {
final String message = "Could not perform function on index-only field '" + fieldName + "\' for range " + this.parentRange;
LOG.error(message, e);
}
return documents;
}
use of datawave.query.composite.CompositeMetadata in project datawave by NationalSecurityAgency.
the class ValueToAttributesTest method testComposites.
@Test
public void testComposites() {
CompositeMetadata compositeMetadata = new CompositeMetadata();
for (String ingestType : new String[] { "test", "pilot", "work", "beep", "tw" }) {
compositeMetadata.setCompositeFieldMappingByType(ingestType, "MAKE_COLOR", Arrays.asList("MAKE", "COLOR"));
compositeMetadata.setCompositeFieldMappingByType(ingestType, "COLOR_WHEELS", Arrays.asList("MAKE", "COLOR"));
}
TypeMetadata typeMetadata = new TypeMetadata("MAKE:[beep:datawave.data.type.LcNoDiacriticsType];MAKE_COLOR:[beep:datawave.data.type.NoOpType];START_DATE:[beep:datawave.data.type.DateType];TYPE_NOEVAL:[beep:datawave.data.type.LcNoDiacriticsType];IP_ADDR:[beep:datawave.data.type.IpAddressType];WHEELS:[beep:datawave.data.type.LcNoDiacriticsType,datawave.data.type.NumberType];COLOR:[beep:datawave.data.type.LcNoDiacriticsType];COLOR_WHEELS:[beep:datawave.data.type.NoOpType];TYPE:[beep:datawave.data.type.LcNoDiacriticsType]");
MarkingFunctions markingFunctions = new MarkingFunctions.Default();
ValueToAttributes valueToAttributes = new ValueToAttributes(compositeMetadata, typeMetadata, null, markingFunctions, true);
}
use of datawave.query.composite.CompositeMetadata in project datawave by NationalSecurityAgency.
the class DefaultQueryPlanner method loadQueryIterator.
protected Future<IteratorSetting> loadQueryIterator(final MetadataHelper metadataHelper, final ShardQueryConfiguration config, final Query settings, final String queryString, final Boolean isFullTable) throws DatawaveQueryException {
return builderThread.submit(() -> {
// VersioningIterator is typically set at 20 on the table
IteratorSetting cfg = new IteratorSetting(config.getBaseIteratorPriority() + 40, "query", getQueryIteratorClass());
addOption(cfg, Constants.RETURN_TYPE, config.getReturnType().toString(), false);
addOption(cfg, QueryOptions.FULL_TABLE_SCAN_ONLY, Boolean.toString(isFullTable), false);
if (sourceLimit > 0) {
addOption(cfg, QueryOptions.LIMIT_SOURCES, Long.toString(sourceLimit), false);
}
if (config.getCollectTimingDetails()) {
addOption(cfg, QueryOptions.COLLECT_TIMING_DETAILS, Boolean.toString(true), false);
}
if (config.getSendTimingToStatsd()) {
addOption(cfg, QueryOptions.STATSD_HOST_COLON_PORT, config.getStatsdHost() + ':' + Integer.toString(config.getStatsdPort()), false);
addOption(cfg, QueryOptions.STATSD_MAX_QUEUE_SIZE, Integer.toString(config.getStatsdMaxQueueSize()), false);
}
if (config.getHdfsSiteConfigURLs() != null) {
addOption(cfg, QueryOptions.HDFS_SITE_CONFIG_URLS, config.getHdfsSiteConfigURLs(), false);
}
if (config.getHdfsFileCompressionCodec() != null) {
addOption(cfg, QueryOptions.HDFS_FILE_COMPRESSION_CODEC, config.getHdfsFileCompressionCodec(), false);
}
if (config.getZookeeperConfig() != null) {
addOption(cfg, QueryOptions.ZOOKEEPER_CONFIG, config.getZookeeperConfig(), false);
}
if (config.getIvaratorCacheDirConfigs() != null && !config.getIvaratorCacheDirConfigs().isEmpty()) {
addOption(cfg, QueryOptions.IVARATOR_CACHE_DIR_CONFIG, IvaratorCacheDirConfig.toJson(getShuffledIvaratoCacheDirConfigs(config)), false);
}
addOption(cfg, QueryOptions.IVARATOR_CACHE_BUFFER_SIZE, Integer.toString(config.getIvaratorCacheBufferSize()), false);
addOption(cfg, QueryOptions.IVARATOR_SCAN_PERSIST_THRESHOLD, Long.toString(config.getIvaratorCacheScanPersistThreshold()), false);
addOption(cfg, QueryOptions.IVARATOR_SCAN_TIMEOUT, Long.toString(config.getIvaratorCacheScanTimeout()), false);
addOption(cfg, QueryOptions.COLLECT_TIMING_DETAILS, Boolean.toString(config.getCollectTimingDetails()), false);
addOption(cfg, QueryOptions.MAX_INDEX_RANGE_SPLIT, Integer.toString(config.getMaxFieldIndexRangeSplit()), false);
addOption(cfg, QueryOptions.MAX_IVARATOR_OPEN_FILES, Integer.toString(config.getIvaratorMaxOpenFiles()), false);
addOption(cfg, QueryOptions.MAX_IVARATOR_RESULTS, Long.toString(config.getMaxIvaratorResults()), false);
addOption(cfg, QueryOptions.IVARATOR_NUM_RETRIES, Integer.toString(config.getIvaratorNumRetries()), false);
addOption(cfg, QueryOptions.IVARATOR_PERSIST_VERIFY, Boolean.toString(config.isIvaratorPersistVerify()), false);
addOption(cfg, QueryOptions.IVARATOR_PERSIST_VERIFY_COUNT, Integer.toString(config.getIvaratorPersistVerifyCount()), false);
addOption(cfg, QueryOptions.MAX_EVALUATION_PIPELINES, Integer.toString(config.getMaxEvaluationPipelines()), false);
addOption(cfg, QueryOptions.MAX_PIPELINE_CACHED_RESULTS, Integer.toString(config.getMaxPipelineCachedResults()), false);
addOption(cfg, QueryOptions.MAX_IVARATOR_SOURCES, Integer.toString(config.getMaxIvaratorSources()), false);
if (config.getYieldThresholdMs() != Long.MAX_VALUE && config.getYieldThresholdMs() > 0) {
addOption(cfg, QueryOptions.YIELD_THRESHOLD_MS, Long.toString(config.getYieldThresholdMs()), false);
}
addOption(cfg, QueryOptions.SORTED_UIDS, Boolean.toString(config.isSortedUIDs()), false);
configureTypeMappings(config, cfg, metadataHelper, compressMappings);
configureAdditionalOptions(config, cfg);
try {
addOption(cfg, QueryOptions.INDEX_ONLY_FIELDS, QueryOptions.buildFieldStringFromSet(metadataHelper.getIndexOnlyFields(config.getDatatypeFilter())), true);
addOption(cfg, QueryOptions.COMPOSITE_FIELDS, QueryOptions.buildFieldStringFromSet(metadataHelper.getCompositeToFieldMap(config.getDatatypeFilter()).keySet()), true);
addOption(cfg, QueryOptions.INDEXED_FIELDS, QueryOptions.buildFieldStringFromSet(metadataHelper.getIndexedFields(config.getDatatypeFilter())), true);
} catch (TableNotFoundException e) {
QueryException qe = new QueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_RETRIEVAL_ERROR, e);
throw new DatawaveQueryException(qe);
}
try {
CompositeMetadata compositeMetadata = metadataHelper.getCompositeMetadata().filter(config.getQueryFieldsDatatypes().keySet());
if (compositeMetadata != null && !compositeMetadata.isEmpty())
addOption(cfg, QueryOptions.COMPOSITE_METADATA, java.util.Base64.getEncoder().encodeToString(CompositeMetadata.toBytes(compositeMetadata)), false);
} catch (TableNotFoundException e) {
QueryException qe = new QueryException(DatawaveErrorCode.COMPOSITE_METADATA_CONFIG_ERROR, e);
throw new DatawaveQueryException(qe);
}
String datatypeFilter = config.getDatatypeFilterAsString();
addOption(cfg, QueryOptions.DATATYPE_FILTER, datatypeFilter, false);
try {
addOption(cfg, QueryOptions.CONTENT_EXPANSION_FIELDS, Joiner.on(',').join(metadataHelper.getContentFields(config.getDatatypeFilter())), false);
} catch (TableNotFoundException e) {
QueryException qe = new QueryException(DatawaveErrorCode.CONTENT_FIELDS_RETRIEVAL_ERROR, e);
throw new DatawaveQueryException(qe);
}
if (config.isDebugMultithreadedSources()) {
addOption(cfg, QueryOptions.DEBUG_MULTITHREADED_SOURCES, Boolean.toString(config.isDebugMultithreadedSources()), false);
}
if (config.isLimitFieldsPreQueryEvaluation()) {
addOption(cfg, QueryOptions.LIMIT_FIELDS_PRE_QUERY_EVALUATION, Boolean.toString(config.isLimitFieldsPreQueryEvaluation()), false);
}
if (config.getLimitFieldsField() != null) {
addOption(cfg, QueryOptions.LIMIT_FIELDS_FIELD, config.getLimitFieldsField(), false);
}
return cfg;
});
}
Aggregations