use of datawave.query.jexl.functions.IdentityAggregator in project datawave by NationalSecurityAgency.
the class QueryIterator method init.
@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException {
if (log.isTraceEnabled()) {
log.trace("QueryIterator init()");
}
if (!validateOptions(new SourcedOptions<>(source, env, options))) {
throw new IllegalArgumentException("Could not initialize QueryIterator with " + options);
}
// We want to add in spoofed dataTypes for Aggregation/Evaluation to
// ensure proper numeric evaluation.
this.typeMetadata = new TypeMetadata(this.getTypeMetadata());
this.typeMetadataWithNonIndexed = new TypeMetadata(this.typeMetadata);
this.typeMetadataWithNonIndexed.addForAllIngestTypes(this.getNonIndexedDataTypeMap());
this.exceededOrEvaluationCache = new HashMap<>();
// Parse the query
try {
this.script = JexlASTHelper.parseJexlQuery(this.getQuery());
this.myEvaluationFunction = new JexlEvaluation(this.getQuery(), arithmetic);
} catch (Exception e) {
throw new IOException("Could not parse the JEXL query: '" + this.getQuery() + "'", e);
}
this.documentOptions = options;
this.myEnvironment = env;
if (gatherTimingDetails()) {
this.trackingSpan = new MultiThreadedQuerySpan(getStatsdClient());
this.source = new SourceTrackingIterator(trackingSpan, source);
} else {
this.source = source;
}
this.fiAggregator = new IdentityAggregator(getAllIndexOnlyFields(), getEvaluationFilter(), getEvaluationFilter() != null ? getEvaluationFilter().getMaxNextCount() : -1);
if (isDebugMultithreadedSources()) {
this.source = new SourceThreadTrackingIterator(this.source);
}
this.sourceForDeepCopies = this.source.deepCopy(this.myEnvironment);
// update ActiveQueryLog with (potentially) updated config
if (env != null) {
ActiveQueryLog.setConfig(env.getConfig());
}
DatawaveFieldIndexListIteratorJexl.FSTManager.setHdfsFileSystem(this.getFileSystemCache());
DatawaveFieldIndexListIteratorJexl.FSTManager.setHdfsFileCompressionCodec(this.getHdfsFileCompressionCodec());
pruneIvaratorCacheDirs();
}
use of datawave.query.jexl.functions.IdentityAggregator in project datawave by NationalSecurityAgency.
the class FieldIndexOnlyQueryIterator method init.
@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException {
if (log.isTraceEnabled()) {
log.trace("QueryIterator init()");
}
if (!validateOptions(options)) {
throw new IllegalArgumentException("Could not initialize QueryIterator with " + options);
}
// Parse & flatten the query
try {
script = JexlASTHelper.parseAndFlattenJexlQuery(this.getQuery());
} catch (ParseException e) {
throw new IOException("Could not parse the JEXL query: '" + this.getQuery() + "'", e);
}
this.documentOptions = options;
this.myEnvironment = env;
if (collectTimingDetails) {
trackingSpan = new QuerySpan(getStatsdClient());
this.source = new SourceTrackingIterator(trackingSpan, source);
} else {
this.source = source;
}
this.fiAggregator = new IdentityAggregator(null, null);
this.sourceForDeepCopies = this.source.deepCopy(this.myEnvironment);
}
use of datawave.query.jexl.functions.IdentityAggregator in project datawave by NationalSecurityAgency.
the class QueryOptions method validateOptions.
@Override
public boolean validateOptions(Map<String, String> options) {
if (log.isTraceEnabled()) {
log.trace("Options: " + options);
}
this.options = options;
// we don't aren't performing any Jexl evaluation
if (options.containsKey(DISABLE_EVALUATION)) {
this.disableEvaluation = Boolean.parseBoolean(options.get(DISABLE_EVALUATION));
}
if (options.containsKey(DISABLE_FIELD_INDEX_EVAL)) {
this.disableFiEval = Boolean.parseBoolean(options.get(DISABLE_FIELD_INDEX_EVAL));
}
if (options.containsKey(LIMIT_OVERRIDE)) {
this.limitOverride = Boolean.parseBoolean(options.get(LIMIT_OVERRIDE));
}
if (options.containsKey(LIMIT_SOURCES)) {
try {
this.sourceLimit = Long.parseLong(options.get(LIMIT_SOURCES));
} catch (NumberFormatException nfe) {
this.sourceLimit = -1;
}
}
if (options.containsKey(DISABLE_DOCUMENTS_WITHOUT_EVENTS)) {
this.disableIndexOnlyDocuments = Boolean.parseBoolean(options.get(DISABLE_DOCUMENTS_WITHOUT_EVENTS));
}
// evaluation
if (options.containsKey(QUERY)) {
this.query = options.get(QUERY);
} else if (!this.disableEvaluation) {
log.error("If a query is not specified, evaluation must be disabled.");
return false;
}
if (options.containsKey(QUERY_ID)) {
this.queryId = options.get(QUERY_ID);
}
if (options.containsKey(SCAN_ID)) {
this.scanId = options.get(SCAN_ID);
}
if (options.containsKey(QUERY_MAPPING_COMPRESS)) {
compressedMappings = Boolean.valueOf(options.get(QUERY_MAPPING_COMPRESS));
}
this.validateTypeMetadata(options);
if (options.containsKey(COMPOSITE_METADATA)) {
String compositeMetadataString = options.get(COMPOSITE_METADATA);
if (compositeMetadataString != null && !compositeMetadataString.isEmpty()) {
this.compositeMetadata = CompositeMetadata.fromBytes(java.util.Base64.getDecoder().decode(compositeMetadataString));
}
if (log.isTraceEnabled()) {
log.trace("Using compositeMetadata: " + this.compositeMetadata);
}
}
if (options.containsKey(COMPOSITE_SEEK_THRESHOLD)) {
try {
this.compositeSeekThreshold = Integer.parseInt(options.get(COMPOSITE_SEEK_THRESHOLD));
} catch (NumberFormatException nfe) {
this.compositeSeekThreshold = 10;
}
}
// Currently writable, kryo or toString
if (options.containsKey(Constants.RETURN_TYPE)) {
setReturnType(DocumentSerialization.ReturnType.valueOf(options.get(Constants.RETURN_TYPE)));
}
// Boolean: should each attribute maintain a ColumnVisibility.
if (options.containsKey(REDUCED_RESPONSE)) {
setReducedResponse(Boolean.parseBoolean(options.get(REDUCED_RESPONSE)));
}
if (options.containsKey(FULL_TABLE_SCAN_ONLY)) {
setFullTableScanOnly(Boolean.parseBoolean(options.get(FULL_TABLE_SCAN_ONLY)));
}
if (options.containsKey(TRACK_SIZES) && options.get(TRACK_SIZES) != null) {
setTrackSizes(Boolean.parseBoolean(options.get(TRACK_SIZES)));
}
if (options.containsKey(PROJECTION_FIELDS)) {
this.projectResults = true;
this.useWhiteListedFields = true;
String fieldList = options.get(PROJECTION_FIELDS);
if (fieldList != null && EVERYTHING.equals(fieldList)) {
this.whiteListedFields = UniversalSet.instance();
} else if (fieldList != null && !fieldList.trim().equals("")) {
this.whiteListedFields = new HashSet<>();
Collections.addAll(this.whiteListedFields, StringUtils.split(fieldList, Constants.PARAM_VALUE_SEP));
}
if (options.containsKey(HIT_LIST) && Boolean.parseBoolean(options.get(HIT_LIST))) {
this.whiteListedFields.add(JexlEvaluation.HIT_TERM_FIELD);
}
}
if (options.containsKey(BLACKLISTED_FIELDS)) {
if (this.projectResults) {
log.error("QueryOptions.PROJECTION_FIELDS and QueryOptions.BLACKLISTED_FIELDS are mutually exclusive");
return false;
}
this.projectResults = true;
this.useBlackListedFields = true;
String fieldList = options.get(BLACKLISTED_FIELDS);
if (fieldList != null && !fieldList.trim().equals("")) {
this.blackListedFields = new HashSet<>();
Collections.addAll(this.blackListedFields, StringUtils.split(fieldList, Constants.PARAM_VALUE_SEP));
}
}
// log.info("Performing regular query : queryId=" + this.queryId);
this.equality = new PrefixEquality(PartialKey.ROW_COLFAM);
this.evaluationFilter = null;
this.getDocumentKey = GetStartKey.instance();
this.mustUseFieldIndex = false;
if (options.containsKey(FILTER_MASKED_VALUES)) {
this.filterMaskedValues = Boolean.parseBoolean(options.get(FILTER_MASKED_VALUES));
}
if (options.containsKey(INCLUDE_DATATYPE)) {
this.includeDatatype = Boolean.parseBoolean(options.get(INCLUDE_DATATYPE));
if (this.includeDatatype) {
this.datatypeKey = options.getOrDefault(DATATYPE_FIELDNAME, DEFAULT_DATATYPE_FIELDNAME);
}
}
if (options.containsKey(INCLUDE_RECORD_ID)) {
this.includeRecordId = Boolean.parseBoolean(options.get(INCLUDE_RECORD_ID));
}
if (options.containsKey(COLLECT_TIMING_DETAILS)) {
this.collectTimingDetails = Boolean.parseBoolean(options.get(COLLECT_TIMING_DETAILS));
}
if (options.containsKey(STATSD_HOST_COLON_PORT)) {
this.statsdHostAndPort = options.get(STATSD_HOST_COLON_PORT);
}
if (options.containsKey(STATSD_MAX_QUEUE_SIZE)) {
this.statsdMaxQueueSize = Integer.parseInt(options.get(STATSD_MAX_QUEUE_SIZE));
}
if (options.containsKey(INCLUDE_HIERARCHY_FIELDS)) {
this.includeHierarchyFields = Boolean.parseBoolean(options.get(INCLUDE_HIERARCHY_FIELDS));
}
if (options.containsKey(DATATYPE_FILTER)) {
String filterCsv = options.get(DATATYPE_FILTER);
if (filterCsv != null && !filterCsv.isEmpty()) {
HashSet<String> set = Sets.newHashSet(StringUtils.split(filterCsv, ','));
Iterable<Text> tformed = Iterables.transform(set, new StringToText());
if (options.containsKey(SeekingQueryPlanner.MAX_KEYS_BEFORE_DATATYPE_SEEK)) {
this.fieldIndexKeyDataTypeFilter = new FieldIndexKeyDataTypeFilter(tformed, Integer.parseInt(options.get(SeekingQueryPlanner.MAX_KEYS_BEFORE_DATATYPE_SEEK)));
} else {
this.fieldIndexKeyDataTypeFilter = new FieldIndexKeyDataTypeFilter(tformed);
}
this.eventEntryKeyDataTypeFilter = new EventKeyDataTypeFilter(tformed);
} else {
this.fieldIndexKeyDataTypeFilter = KeyIdentity.Function;
this.eventEntryKeyDataTypeFilter = KeyIdentity.Function;
}
} else {
this.fieldIndexKeyDataTypeFilter = KeyIdentity.Function;
this.eventEntryKeyDataTypeFilter = KeyIdentity.Function;
}
if (options.containsKey(INDEX_ONLY_FIELDS)) {
this.indexOnlyFields = buildFieldSetFromString(options.get(INDEX_ONLY_FIELDS));
} else if (!this.fullTableScanOnly) {
log.error("A list of index only fields must be provided when running an optimized query");
return false;
}
if (options.containsKey(INDEXED_FIELDS)) {
this.indexedFields = buildFieldSetFromString(options.get(INDEXED_FIELDS));
}
this.fiAggregator = new IdentityAggregator(getNonEventFields(), getEvaluationFilter(), getEvaluationFilter() != null ? getEvaluationFilter().getMaxNextCount() : -1);
if (options.containsKey(IGNORE_COLUMN_FAMILIES)) {
this.ignoreColumnFamilies = buildIgnoredColumnFamilies(options.get(IGNORE_COLUMN_FAMILIES));
}
if (options.containsKey(START_TIME)) {
this.startTime = Long.parseLong(options.get(START_TIME));
} else {
log.error("Must pass a value for " + START_TIME);
return false;
}
if (options.containsKey(END_TIME)) {
this.endTime = Long.parseLong(options.get(END_TIME));
} else {
log.error("Must pass a value for " + END_TIME);
return false;
}
if (this.endTime < this.startTime) {
log.error("The startTime was greater than the endTime: " + this.startTime + " > " + this.endTime);
return false;
}
this.timeFilter = new TimeFilter(startTime, endTime);
if (options.containsKey(INCLUDE_GROUPING_CONTEXT)) {
this.setIncludeGroupingContext(Boolean.parseBoolean(options.get(INCLUDE_GROUPING_CONTEXT)));
}
if (options.containsKey(DOCUMENT_PERMUTATION_CLASSES)) {
this.setDocumentPermutationClasses(options.get(DOCUMENT_PERMUTATION_CLASSES));
}
if (options.containsKey(LIMIT_FIELDS)) {
String limitFields = options.get(LIMIT_FIELDS);
for (String paramGroup : Splitter.on(',').omitEmptyStrings().trimResults().split(limitFields)) {
String[] keyAndValue = Iterables.toArray(Splitter.on('=').omitEmptyStrings().trimResults().split(paramGroup), String.class);
if (keyAndValue != null && keyAndValue.length > 1) {
this.getLimitFieldsMap().put(keyAndValue[0], Integer.parseInt(keyAndValue[1]));
}
}
}
if (options.containsKey(LIMIT_FIELDS_PRE_QUERY_EVALUATION)) {
this.setLimitFieldsPreQueryEvaluation(Boolean.parseBoolean(options.get(LIMIT_FIELDS_PRE_QUERY_EVALUATION)));
}
if (options.containsKey(LIMIT_FIELDS_FIELD)) {
this.setLimitFieldsField(options.get(LIMIT_FIELDS_FIELD));
}
if (options.containsKey(GROUP_FIELDS)) {
String groupFields = options.get(GROUP_FIELDS);
for (String param : Splitter.on(',').omitEmptyStrings().trimResults().split(groupFields)) {
this.getGroupFields().add(param);
}
}
if (options.containsKey(GROUP_FIELDS_BATCH_SIZE)) {
String groupFieldsBatchSize = options.get(GROUP_FIELDS_BATCH_SIZE);
int batchSize = Integer.parseInt(groupFieldsBatchSize);
this.setGroupFieldsBatchSize(batchSize);
}
if (options.containsKey(UNIQUE_FIELDS)) {
this.setUniqueFields(UniqueFields.from(options.get(UNIQUE_FIELDS)));
}
if (options.containsKey(HIT_LIST)) {
log.debug("Adding hitList to QueryOptions? " + options.get(HIT_LIST));
if (Boolean.parseBoolean(options.get(HIT_LIST))) {
this.setArithmetic(new HitListArithmetic());
}
} else {
log.debug("options does not include key 'hit.list'");
}
if (options.containsKey(DATE_INDEX_TIME_TRAVEL)) {
log.debug("Adding dateIndexTimeTravel to QueryOptions? " + options.get(DATE_INDEX_TIME_TRAVEL));
boolean dateIndexTimeTravel = Boolean.parseBoolean(options.get(DATE_INDEX_TIME_TRAVEL));
if (dateIndexTimeTravel) {
this.setDateIndexTimeTravel(dateIndexTimeTravel);
}
}
if (options.containsKey(POSTPROCESSING_CLASSES)) {
this.postProcessingFunctions = options.get(POSTPROCESSING_CLASSES);
// test parsing of the functions
getPostProcessingChain(new WrappingIterator<>());
}
if (options.containsKey(NON_INDEXED_DATATYPES)) {
try {
String nonIndexedDataTypes = options.get(NON_INDEXED_DATATYPES);
if (compressedMappings) {
nonIndexedDataTypes = decompressOption(nonIndexedDataTypes, QueryOptions.UTF8);
}
this.setNonIndexedDataTypeMap(buildFieldDataTypeMap(nonIndexedDataTypes));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
if (options.containsKey(CONTAINS_INDEX_ONLY_TERMS)) {
this.setContainsIndexOnlyTerms(Boolean.parseBoolean(options.get(CONTAINS_INDEX_ONLY_TERMS)));
}
if (options.containsKey(ALLOW_FIELD_INDEX_EVALUATION)) {
this.setAllowFieldIndexEvaluation(Boolean.parseBoolean(options.get(ALLOW_FIELD_INDEX_EVALUATION)));
}
if (options.containsKey(ALLOW_TERM_FREQUENCY_LOOKUP)) {
this.setAllowTermFrequencyLookup(Boolean.parseBoolean(options.get(ALLOW_TERM_FREQUENCY_LOOKUP)));
}
if (options.containsKey(HDFS_SITE_CONFIG_URLS)) {
this.setHdfsSiteConfigURLs(options.get(HDFS_SITE_CONFIG_URLS));
}
if (options.containsKey(HDFS_FILE_COMPRESSION_CODEC)) {
this.setHdfsFileCompressionCodec(options.get(HDFS_FILE_COMPRESSION_CODEC));
}
if (options.containsKey(ZOOKEEPER_CONFIG)) {
this.setZookeeperConfig(options.get(ZOOKEEPER_CONFIG));
}
if (options.containsKey(IVARATOR_CACHE_DIR_CONFIG)) {
try {
this.setIvaratorCacheDirConfigs(IvaratorCacheDirConfig.fromJson(options.get(IVARATOR_CACHE_DIR_CONFIG)));
} catch (JsonProcessingException e) {
log.warn("Unable to parse ivaratorCacheDirConfig.", e);
}
}
if (options.containsKey(IVARATOR_CACHE_BUFFER_SIZE)) {
this.setIvaratorCacheBufferSize(Integer.parseInt(options.get(IVARATOR_CACHE_BUFFER_SIZE)));
}
if (options.containsKey(IVARATOR_SCAN_PERSIST_THRESHOLD)) {
this.setIvaratorCacheScanPersistThreshold(Long.parseLong(options.get(IVARATOR_SCAN_PERSIST_THRESHOLD)));
}
if (options.containsKey(IVARATOR_SCAN_TIMEOUT)) {
this.setIvaratorCacheScanTimeout(Long.parseLong(options.get(IVARATOR_SCAN_TIMEOUT)));
}
if (options.containsKey(MAX_INDEX_RANGE_SPLIT)) {
this.setMaxIndexRangeSplit(Integer.parseInt(options.get(MAX_INDEX_RANGE_SPLIT)));
}
if (options.containsKey(MAX_IVARATOR_OPEN_FILES)) {
this.setIvaratorMaxOpenFiles(Integer.parseInt(options.get(MAX_IVARATOR_OPEN_FILES)));
}
if (options.containsKey(IVARATOR_NUM_RETRIES)) {
this.setIvaratorNumRetries(Integer.parseInt(options.get(IVARATOR_NUM_RETRIES)));
}
if (options.containsKey(IVARATOR_PERSIST_VERIFY)) {
boolean verify = Boolean.parseBoolean(options.get(IVARATOR_PERSIST_VERIFY));
FileSortedSet.PersistOptions persistOptions = getIvaratorPersistOptions();
this.setIvaratorPersistOptions(new FileSortedSet.PersistOptions(verify, verify, persistOptions.getNumElementsToVerify()));
}
if (options.containsKey(IVARATOR_PERSIST_VERIFY_COUNT)) {
int numElements = Integer.parseInt(options.get(IVARATOR_PERSIST_VERIFY_COUNT));
FileSortedSet.PersistOptions persistOptions = getIvaratorPersistOptions();
this.setIvaratorPersistOptions(new FileSortedSet.PersistOptions(persistOptions.isVerifySize(), persistOptions.isVerifyElements(), numElements));
}
if (options.containsKey(MAX_IVARATOR_SOURCES)) {
this.setMaxIvaratorSources(Integer.parseInt(options.get(MAX_IVARATOR_SOURCES)));
}
if (options.containsKey(MAX_IVARATOR_RESULTS)) {
this.setMaxIvaratorResults(Long.parseLong(options.get(MAX_IVARATOR_RESULTS)));
}
if (options.containsKey(YIELD_THRESHOLD_MS)) {
this.setYieldThresholdMs(Long.parseLong(options.get(YIELD_THRESHOLD_MS)));
}
if (options.containsKey(COMPRESS_SERVER_SIDE_RESULTS)) {
this.setCompressResults(Boolean.parseBoolean(options.get(COMPRESS_SERVER_SIDE_RESULTS)));
}
if (options.containsKey(MAX_EVALUATION_PIPELINES)) {
this.setMaxEvaluationPipelines(Integer.parseInt(options.get(MAX_EVALUATION_PIPELINES)));
}
if (options.containsKey(SERIAL_EVALUATION_PIPELINE)) {
this.setSerialEvaluationPipeline(Boolean.parseBoolean(options.get(SERIAL_EVALUATION_PIPELINE)));
}
if (options.containsKey(MAX_PIPELINE_CACHED_RESULTS)) {
this.setMaxPipelineCachedResults(Integer.parseInt(options.get(MAX_PIPELINE_CACHED_RESULTS)));
}
if (options.containsKey(TERM_FREQUENCIES_REQUIRED)) {
this.setTermFrequenciesRequired(Boolean.parseBoolean(options.get(TERM_FREQUENCIES_REQUIRED)));
}
this.setTermFrequencyFields(parseTermFrequencyFields(options));
this.setContentExpansionFields(parseContentExpansionFields(options));
if (options.containsKey(BATCHED_QUERY)) {
this.batchedQueries = Integer.parseInt(options.get(BATCHED_QUERY));
if (this.batchedQueries > 0) {
// combining is only meant to be used when threading is enabled
if (maxEvaluationPipelines == 1) {
maxEvaluationPipelines = 2;
}
batchStack = Queues.newArrayDeque();
for (int i = 0; i < batchedQueries; i++) {
String rangeValue = options.get(BATCHED_QUERY_RANGE_PREFIX + i);
String queryValue = options.get(BATCHED_QUERY_PREFIX + i);
if (null != rangeValue && null != queryValue) {
try {
Range decodedRange = ColumnRangeIterator.decodeRange(rangeValue);
if (log.isTraceEnabled()) {
log.trace("Adding batch " + decodedRange + " " + queryValue);
}
batchStack.offer(Maps.immutableEntry(decodedRange, queryValue));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
}
}
if (options.containsKey(DATE_INDEX_TIME_TRAVEL)) {
this.dateIndexTimeTravel = Boolean.parseBoolean(options.get(DATE_INDEX_TIME_TRAVEL));
}
if (options.containsKey(SORTED_UIDS)) {
this.sortedUIDs = Boolean.parseBoolean(options.get(SORTED_UIDS));
}
if (options.containsKey(DEBUG_MULTITHREADED_SOURCES)) {
this.debugMultithreadedSources = Boolean.parseBoolean(options.get(DEBUG_MULTITHREADED_SOURCES));
}
if (options.containsKey(ACTIVE_QUERY_LOG_NAME)) {
setActiveQueryLogName(activeQueryLogName);
}
return true;
}
Aggregations