Search in sources :

Example 1 with AbstractQueryTermContainer

use of org.vitrivr.cineast.core.data.query.containers.AbstractQueryTermContainer in project cineast by vitrivr.

the class CineastQueryService method getSimilar.

// TODO This has enormous code duplication with the TemporalQueryMessageHandler
@Override
public void getSimilar(CineastGrpc.TemporalQuery query, StreamObserver<CineastGrpc.QueryResult> responseObserver) {
    StopWatch watch = StopWatch.createStarted();
    MediaSegmentReader mediaSegmentReader = new MediaSegmentReader(Config.sharedConfig().getDatabase().getSelectorSupplier().get());
    MediaObjectReader mediaObjectReader = new MediaObjectReader(Config.sharedConfig().getDatabase().getSelectorSupplier().get());
    MediaSegmentMetadataReader segmentMetadataReader = new MediaSegmentMetadataReader(Config.sharedConfig().getDatabase().getSelectorSupplier().get());
    MediaObjectMetadataReader objectMetadataReader = new MediaObjectMetadataReader(Config.sharedConfig().getDatabase().getSelectorSupplier().get());
    Set<String> sentSegmentIds = new HashSet<>(), sentObjectIds = new HashSet<>();
    CineastGrpc.QueryConfig config = query.getQueryList().get(0).getConfig();
    ReadableQueryConfig rqconf = QueryContainerUtil.queryConfig(config);
    QueryConfig qconf = new QueryConfig(rqconf);
    /* Prepare QueryConfig (so as to obtain a QueryId). */
    final String uuid = qconf.getQueryId().toString();
    final int max = qconf.getMaxResults().orElse(Config.sharedConfig().getRetriever().getMaxResults());
    qconf.setMaxResults(max);
    final int resultsPerModule = qconf.getRawResultsPerModule() == -1 ? Config.sharedConfig().getRetriever().getMaxResultsPerModule() : qconf.getResultsPerModule();
    qconf.setResultsPerModule(resultsPerModule);
    List<Thread> metadataRetrievalThreads = new ArrayList<>();
    /* We iterate over all components independently, because they have a temporal context.*/
    for (int containerIdx = 0; containerIdx < query.getQueryCount(); containerIdx++) {
        List<QueryStage> stages = QueryContainerUtil.query(query.getQueryList().get(containerIdx));
        /* We make a new stagedQueryConfig per stage because the relevant segments will differ for each stage. This also resets the filter (relevant ids in the config)*/
        QueryConfig stageQConf = QueryConfig.clone(qconf);
        /* For the first stage, there will be no relevant segments when querying. This is ok because the retrieval engine handles this appropriately */
        HashSet<String> relevantSegments = new HashSet<>();
        /* Store for each queryterm per category all results to be sent at a later time */
        List<Map<String, List<StringDoublePair>>> cache = new ArrayList<>();
        /* For the terms of a stage, ordering matters. The assumption is that each term is used as a filter for its successor */
        for (int stageIndex = 0; stageIndex < stages.size(); stageIndex++) {
            /* Initalize stage with this hashmap */
            cache.add(stageIndex, new HashMap<>());
            QueryStage stage = stages.get(stageIndex);
            List<Thread> qtThreads = new ArrayList<>();
            /* We now iterate over all QueryTerms for this stage, simply adding their results to the list of relevant segments for the next querystage.
         * The list is only updated once we've iterated over all terms
         */
            for (int i = 0; i < stage.getQueryTerms().size(); i++) {
                QueryTerm qt = stage.getQueryTerms().get(i);
                final int finalContainerIdx = containerIdx;
                final int finalStageIndex = stageIndex;
                Thread qtRetrievalThread = new Thread(() -> {
                    /* Prepare QueryTerm and perform sanity-checks */
                    if (qt == null) {
                        /* In rare instances, it is possible to have null as query stage. If this happens to you, please report this to the developers so we can try to fix it. */
                        LOGGER.warn("QueryTerm was null for stage {}", stage);
                        return;
                    }
                    AbstractQueryTermContainer qc = qt.getContainer();
                    if (qc == null) {
                        LOGGER.warn("Likely an empty query, as it could not be converted to a query container. Ignoring it");
                        return;
                    }
                    List<Thread> categoryThreads = new ArrayList<>();
                    /* For each category of a specific queryterm, we actually go and retrieve. Be aware that we do not change the relevant ids after this call */
                    for (String category : qt.getCategories()) {
                        /* Merge partial results with score-map */
                        List<SegmentScoreElement> scores = continuousRetrievalLogic.retrieve(qc, category, stageQConf);
                        /* Transform raw results into list of StringDoublePairs (segmentId -> score) */
                        final List<StringDoublePair> results = scores.stream().map(elem -> new StringDoublePair(elem.getSegmentId(), elem.getScore())).filter(p -> p.value > 0d).sorted(StringDoublePair.COMPARATOR).limit(max).collect(Collectors.toList());
                        if (results.isEmpty()) {
                            LOGGER.warn("No results found for category {} and qt {} in stage with id {}. Full compoment: {}", category, qt, finalContainerIdx, stage);
                        }
                        if (cache.get(finalStageIndex).containsKey(category)) {
                            LOGGER.error("Category {} was used twice in stage {}. This erases the results of the previous category... ", category, finalStageIndex);
                        }
                        cache.get(finalStageIndex).put(category, results);
                        results.forEach(res -> relevantSegments.add(res.key));
                        LOGGER.trace("Category {} at stage {} executed @ {} ms", category, finalStageIndex, watch.getTime(TimeUnit.MILLISECONDS));
                        /* If this is the last stage, we can send relevant results per category back to the UI.
               * Otherwise, we cannot since we might send results to the UI which would be filtered at a later stage
               */
                        if (finalStageIndex == stages.size() - 1) {
                            /* Finalize and submit per-container results */
                            responseObserver.onNext(QueryContainerUtil.queryResult(QueryContainerUtil.similarityQueryResult(qt.getQueryConfig().getQueryId().toString(), category, results)));
                            List<String> segmentIds = results.stream().map(x -> x.key).filter(x -> !sentSegmentIds.contains(x)).collect(Collectors.toList());
                            if (segmentIds.isEmpty()) {
                                continue;
                            }
                            Map<String, MediaSegmentDescriptor> segments = mediaSegmentReader.lookUpSegments(segmentIds);
                            responseObserver.onNext(QueryContainerUtil.queryResult(CineastGrpc.MediaSegmentQueryResult.newBuilder().addAllSegments(segments.values().stream().map(MediaSegmentUtil::fromMediaSegmentDescriptor).collect(Collectors.toList())).build()));
                            List<MediaSegmentMetadataDescriptor> segmentMetaData = segmentMetadataReader.lookupMultimediaMetadata(segmentIds);
                            responseObserver.onNext(QueryContainerUtil.queryResult(CineastGrpc.MediaSegmentMetaDataQueryResult.newBuilder().addAllSegmentMetaData(segmentMetaData.stream().map(QueryContainerUtil::mediaSegmentMetaData).collect(Collectors.toList())).build()));
                            sentSegmentIds.addAll(segmentIds);
                            List<String> objectIds = segments.values().stream().map(MediaSegmentDescriptor::getObjectId).filter(x -> !sentObjectIds.contains(x)).collect(Collectors.toList());
                            if (objectIds.isEmpty()) {
                                continue;
                            }
                            Map<String, MediaObjectDescriptor> objects = mediaObjectReader.lookUpObjects(objectIds);
                            responseObserver.onNext(QueryContainerUtil.queryResult(CineastGrpc.MediaObjectQueryResult.newBuilder().addAllObjects(objects.values().stream().map(MediaObjectUtil::fromMediaObjectDescriptor).collect(Collectors.toList())).build()));
                            List<MediaObjectMetadataDescriptor> objectMetaData = objectMetadataReader.lookupMultimediaMetadata(objectIds);
                            responseObserver.onNext(QueryContainerUtil.queryResult(CineastGrpc.MediaObjectMetaDataQueryResult.newBuilder().addAllObjectMetaData(objectMetaData.stream().map(QueryContainerUtil::mediaObjectMetaData).collect(Collectors.toList())).build()));
                            sentObjectIds.addAll(objectIds);
                        }
                    }
                /* We're done for this querycontainer */
                });
                // TODO Better name
                qtRetrievalThread.setName("qt-stage" + stageIndex + "-" + qt.getCategories());
                qtThreads.add(qtRetrievalThread);
                qtRetrievalThread.start();
            }
            for (Thread thread : qtThreads) {
                try {
                    thread.join();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
            /* After we are done with a stage, we add all relevant segments to the config for the next stage. */
            if (relevantSegments.size() == 0) {
                LOGGER.warn("No relevant segments anymore, aborting staged querying");
                /* Clear relevant segments (there are none) */
                stageQConf.setRelevantSegmentIds(relevantSegments);
                break;
            }
            stageQConf.setRelevantSegmentIds(relevantSegments);
            relevantSegments.clear();
        }
        /* At this point, we have iterated over all stages. Now, we need to go back for all stages and send the results for the relevant ids. */
        for (int stageIndex = 0; stageIndex < stages.size() - 1; stageIndex++) {
            cache.get(stageIndex).forEach((category, results) -> {
                results.removeIf(pair -> !stageQConf.getRelevantSegmentIds().contains(pair.key));
                responseObserver.onNext(QueryContainerUtil.queryResult(QueryContainerUtil.similarityQueryResult(// TODO This assumes that all queries in a temporalquery have the same uuid
                uuid, category, results)));
            });
        }
    /* There should be no carry-over from this block since temporal queries are executed independently */
    }
    /* At this point, all StagedQueries have been executed for this TemporalQuery.
     * Since results have always been sent for the final stage or, when appropriate, in intermediate steps, there's nothing left to do.
     */
    responseObserver.onCompleted();
    mediaSegmentReader.close();
    mediaObjectReader.close();
    segmentMetadataReader.close();
    watch.stop();
    LOGGER.debug("Query executed in {} ms", watch.getTime(TimeUnit.MILLISECONDS));
}
Also used : MediaObjectUtil(org.vitrivr.cineast.api.grpc.util.MediaObjectUtil) AbstractQueryTermContainer(org.vitrivr.cineast.core.data.query.containers.AbstractQueryTermContainer) HashMap(java.util.HashMap) MediaSegmentUtil(org.vitrivr.cineast.api.grpc.util.MediaSegmentUtil) QueryStage(org.vitrivr.cineast.api.grpc.data.QueryStage) MediaObjectMetadataDescriptor(org.vitrivr.cineast.core.data.entities.MediaObjectMetadataDescriptor) MediaObjectDescriptor(org.vitrivr.cineast.core.data.entities.MediaObjectDescriptor) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamObserver(io.grpc.stub.StreamObserver) MediaObjectReader(org.vitrivr.cineast.core.db.dao.reader.MediaObjectReader) QueryContainerUtil(org.vitrivr.cineast.api.grpc.util.QueryContainerUtil) Map(java.util.Map) MediaSegmentDescriptor(org.vitrivr.cineast.core.data.entities.MediaSegmentDescriptor) ContinuousRetrievalLogic(org.vitrivr.cineast.standalone.util.ContinuousRetrievalLogic) MediaSegmentReader(org.vitrivr.cineast.core.db.dao.reader.MediaSegmentReader) MediaObjectMetadataReader(org.vitrivr.cineast.core.db.dao.reader.MediaObjectMetadataReader) QueryConfig(org.vitrivr.cineast.core.config.QueryConfig) Set(java.util.Set) StopWatch(org.apache.commons.lang3.time.StopWatch) ReadableQueryConfig(org.vitrivr.cineast.core.config.ReadableQueryConfig) StringDoublePair(org.vitrivr.cineast.core.data.StringDoublePair) Collectors(java.util.stream.Collectors) TimeUnit(java.util.concurrent.TimeUnit) QueryUtil(org.vitrivr.cineast.api.util.QueryUtil) List(java.util.List) Logger(org.apache.logging.log4j.Logger) SegmentScoreElement(org.vitrivr.cineast.core.data.score.SegmentScoreElement) MediaSegmentMetadataDescriptor(org.vitrivr.cineast.core.data.entities.MediaSegmentMetadataDescriptor) MediaSegmentMetadataReader(org.vitrivr.cineast.core.db.dao.reader.MediaSegmentMetadataReader) LogManager(org.apache.logging.log4j.LogManager) QueryTerm(org.vitrivr.cineast.api.grpc.data.QueryTerm) Config(org.vitrivr.cineast.standalone.config.Config) MediaObjectDescriptor(org.vitrivr.cineast.core.data.entities.MediaObjectDescriptor) AbstractQueryTermContainer(org.vitrivr.cineast.core.data.query.containers.AbstractQueryTermContainer) ArrayList(java.util.ArrayList) MediaObjectReader(org.vitrivr.cineast.core.db.dao.reader.MediaObjectReader) QueryTerm(org.vitrivr.cineast.api.grpc.data.QueryTerm) MediaObjectMetadataReader(org.vitrivr.cineast.core.db.dao.reader.MediaObjectMetadataReader) StringDoublePair(org.vitrivr.cineast.core.data.StringDoublePair) MediaSegmentMetadataDescriptor(org.vitrivr.cineast.core.data.entities.MediaSegmentMetadataDescriptor) QueryStage(org.vitrivr.cineast.api.grpc.data.QueryStage) MediaObjectMetadataDescriptor(org.vitrivr.cineast.core.data.entities.MediaObjectMetadataDescriptor) HashSet(java.util.HashSet) MediaSegmentMetadataReader(org.vitrivr.cineast.core.db.dao.reader.MediaSegmentMetadataReader) QueryConfig(org.vitrivr.cineast.core.config.QueryConfig) ReadableQueryConfig(org.vitrivr.cineast.core.config.ReadableQueryConfig) StopWatch(org.apache.commons.lang3.time.StopWatch) ReadableQueryConfig(org.vitrivr.cineast.core.config.ReadableQueryConfig) MediaSegmentReader(org.vitrivr.cineast.core.db.dao.reader.MediaSegmentReader) SegmentScoreElement(org.vitrivr.cineast.core.data.score.SegmentScoreElement) MediaSegmentDescriptor(org.vitrivr.cineast.core.data.entities.MediaSegmentDescriptor) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with AbstractQueryTermContainer

use of org.vitrivr.cineast.core.data.query.containers.AbstractQueryTermContainer in project cineast by vitrivr.

the class EvaluationRuntime method call.

/**
 * Executes the evaluation and returns a Triple that contains the number of files that were processed, skipped due to errors and skipped deliberately. The actual evaluation results are written to files.
 *
 * @return computed result
 * @throws EvaluationException if unable to compute a result
 * @see EvaluationConfig
 */
@Override
public Triple<Integer, Integer, Integer> call() throws EvaluationException, IOException {
    /* Tries to instantiate the converter. */
    final Converter converter = this.config.getConverter();
    if (converter == null) {
        throw new EvaluationException("Failed to instantiate the converter class.");
    }
    /* Instantiates the groundtruth and checks if it contains classes. */
    final Groundtruth gt = this.config.getGroundtruth();
    if (gt.numberOfClasses() == 0) {
        throw new EvaluationException(String.format("The specified ground truth '%s' does not contain any classes.", this.config.getClassfile()));
    }
    /* Updates the retrieval configuration. */
    Config.sharedConfig().getRetriever().setMaxResults(this.config.getSize());
    Config.sharedConfig().getRetriever().setResultsPerModule(this.config.getSize());
    /* Prepares the iterator for the test files. */
    final Iterator<Path> testfilesIterator;
    try {
        testfilesIterator = Files.walk(this.config.getTestfiles()).filter(p -> {
            try {
                return Files.exists(p) && Files.isRegularFile(p) && !Files.isHidden(p) && Files.isReadable(p);
            } catch (IOException e) {
                LOGGER.error("An IO exception occurred while testing the media file at '{}'.", p.toString(), LogHelper.getStackTrace(e));
                return false;
            }
        }).iterator();
    } catch (IOException exception) {
        throw new EvaluationException(String.format("Could not obtain test files under the specified path '%s'.", this.config.getTestfiles()));
    }
    /* Prepare folder structure per category. */
    for (String category : this.config.getCategories()) {
        Files.createDirectories(this.config.getResults().resolve(category));
    }
    /* Prepare a placeholder query-config. */
    final ReadableQueryConfig queryConfig = new ReadableQueryConfig(null);
    /* Prepare a random number generator that decides if a file should be used for evaluation or not. */
    final Random random = new Random();
    /* Perform evaluation for every file. */
    Path path;
    while (testfilesIterator.hasNext()) {
        path = testfilesIterator.next();
        if (random.nextBoolean() && config.getMode() == EvaluationConfig.EvaluationMode.RANDOM) {
            LOGGER.info("Randomly skipping file {}.", path);
            this.skipped += 1;
            continue;
        }
        /* Try to create a QueryContainer. If this fails, the file is skipped. */
        final AbstractQueryTermContainer container = converter.convert(path);
        if (container == null) {
            LOGGER.warn("Failed to convert the file {}. File is being skipped...", path.getFileName());
            this.error += 1;
            continue;
        }
        LOGGER.info("Starting evaluation for {}", path);
        for (String category : this.config.getCategories()) {
            List<SegmentScoreElement> scores = this.retrievalLogic.retrieve(container, category, queryConfig);
            EvaluationResult result = this.performEvaluation(scores, path, gt);
            this.writeToFile(category, result);
        }
        this.processed += 1;
    }
    return new ImmutableTriple<>(this.processed, this.error, this.skipped);
}
Also used : Path(java.nio.file.Path) AbstractQueryTermContainer(org.vitrivr.cineast.core.data.query.containers.AbstractQueryTermContainer) ImmutableTriple(org.apache.commons.lang3.tuple.ImmutableTriple) IOException(java.io.IOException) ReadableQueryConfig(org.vitrivr.cineast.core.config.ReadableQueryConfig) Random(java.util.Random) SegmentScoreElement(org.vitrivr.cineast.core.data.score.SegmentScoreElement) Converter(org.vitrivr.cineast.core.extraction.decode.general.Converter)

Example 3 with AbstractQueryTermContainer

use of org.vitrivr.cineast.core.data.query.containers.AbstractQueryTermContainer in project cineast by vitrivr.

the class QueryUtil method retrieveCategory.

public static List<StringDoublePair> retrieveCategory(ContinuousRetrievalLogic continuousRetrievalLogic, List<Pair<AbstractQueryTermContainer, ReadableQueryConfig>> queryContainers, String category) {
    TObjectDoubleHashMap<String> scoreBySegmentId = new TObjectDoubleHashMap<>();
    for (Pair<AbstractQueryTermContainer, ReadableQueryConfig> pair : queryContainers) {
        if (pair == null) {
            continue;
        }
        AbstractQueryTermContainer qc = pair.first;
        ReadableQueryConfig qconf = pair.second;
        float weight = MathHelper.limit(qc.getWeight(), -1f, 1f);
        retrieveAndWeight(continuousRetrievalLogic, category, scoreBySegmentId, qc, qconf, weight);
    }
    final List<StringDoublePair> list = new ArrayList<>(scoreBySegmentId.size());
    scoreBySegmentId.forEachEntry((segmentId, score) -> {
        if (score > 0) {
            list.add(new StringDoublePair(segmentId, score));
        }
        return true;
    });
    list.sort(StringDoublePair.COMPARATOR);
    // FIXME: Using an arbitrary query config to limit results is prone to errors
    final int MAX_RESULTS = queryContainers.get(0).second.getMaxResults().orElse(Config.sharedConfig().getRetriever().getMaxResults());
    List<StringDoublePair> resultList = list;
    if (list.size() > MAX_RESULTS) {
        resultList = resultList.subList(0, MAX_RESULTS);
    }
    return resultList;
}
Also used : ReadableQueryConfig(org.vitrivr.cineast.core.config.ReadableQueryConfig) StringDoublePair(org.vitrivr.cineast.core.data.StringDoublePair) TObjectDoubleHashMap(gnu.trove.map.hash.TObjectDoubleHashMap) AbstractQueryTermContainer(org.vitrivr.cineast.core.data.query.containers.AbstractQueryTermContainer) ArrayList(java.util.ArrayList)

Example 4 with AbstractQueryTermContainer

use of org.vitrivr.cineast.core.data.query.containers.AbstractQueryTermContainer in project cineast by vitrivr.

the class TemporalQueryMessageHandler method execute.

@Override
public void execute(Session session, QueryConfig qconf, TemporalQuery message, Set<String> segmentIdsForWhichMetadataIsFetched, Set<String> objectIdsForWhichMetadataIsFetched) throws Exception {
    /* Prepare the query config and get the QueryId */
    final String uuid = qconf.getQueryId().toString();
    String qid = uuid.substring(0, 3);
    final int max = Math.min(qconf.getMaxResults().orElse(Config.sharedConfig().getRetriever().getMaxResults()), Config.sharedConfig().getRetriever().getMaxResults());
    qconf.setMaxResults(max);
    final int resultsPerModule = Math.min(qconf.getRawResultsPerModule() == -1 ? Config.sharedConfig().getRetriever().getMaxResultsPerModule() : qconf.getResultsPerModule(), Config.sharedConfig().getRetriever().getMaxResultsPerModule());
    qconf.setResultsPerModule(resultsPerModule);
    List<Thread> metadataRetrievalThreads = new ArrayList<>();
    List<CompletableFuture<Void>> futures = new ArrayList<>();
    List<Thread> cleanupThreads = new ArrayList<>();
    /* We need a set of segments and objects to be used for temporal scoring as well as a storage of all container results where are the index of the outer list is where container i was scored */
    Map<Integer, List<StringDoublePair>> containerResults = new IntObjectHashMap<>();
    Set<MediaSegmentDescriptor> segments = new HashSet<>();
    Set<String> sentSegmentIds = new HashSet<>();
    Set<String> sentObjectIds = new HashSet<>();
    /* Each container can be evaluated in parallel, provided resouces are available */
    List<Thread> ssqThreads = new ArrayList<>();
    /* Iterate over all temporal query containers independently */
    for (int containerIdx = 0; containerIdx < message.getQueries().size(); containerIdx++) {
        StagedSimilarityQuery stagedSimilarityQuery = message.getQueries().get(containerIdx);
        /* Make a new Query config for this container because the relevant segments from the previous stage will differ within this container from stage to stage.  */
        QueryConfig stageQConf = QueryConfig.clone(qconf);
        QueryConfig limitedStageQConf = QueryConfig.clone(qconf);
        /* The first stage of a container will have no relevant segments from a previous stage. The retrieval engine will handle this case. */
        HashSet<String> relevantSegments = new HashSet<>();
        HashSet<String> limitedRelevantSegments = new HashSet<>();
        /*
       * Store for each query term per category all results to be sent at a later time
       */
        List<Map<String, List<StringDoublePair>>> cache = new ArrayList<>();
        /* For the temporal scoring, we need to store the relevant results of the stage to be saved to the containerResults */
        List<StringDoublePair> stageResults = new ArrayList<>();
        int lambdaFinalContainerIdx = containerIdx;
        /*
       * The lightweight, but blocking logic of waiting for retrieval results is launched as a thread.
       * The results of this thread will be awaited after all containers have started their retrieval process
       */
        Thread ssqThread = new Thread(() -> {
            /* Iterate over all stages in their respective order as each term of one stage will be used as a filter for its successors */
            for (int stageIndex = 0; stageIndex < stagedSimilarityQuery.getStages().size(); stageIndex++) {
                /* Create hashmap for this stage as cache */
                cache.add(stageIndex, new HashMap<>());
                QueryStage stage = stagedSimilarityQuery.getStages().get(stageIndex);
                /*
           * Iterate over all QueryTerms for this stage and add their results to the list of relevant segments for the next query stage.
           * Only update the list of relevant query terms once we iterated over all terms
           */
                for (int i = 0; i < stage.terms.size(); i++) {
                    QueryTerm qt = stage.terms.get(i);
                    /* Prepare the QueryTerm and perform sanity checks */
                    if (qt == null) {
                        /* There are edge cases in which we have a null as a query stage. If this happens please report this to the developers  */
                        LOGGER.warn("QueryTerm was null for stage {}", stage);
                        return;
                    }
                    AbstractQueryTermContainer qc = qt.toContainer();
                    if (qc == null) {
                        LOGGER.warn("Likely an empty query, as it could not be converted to a query container. Ignoring it");
                        return;
                    }
                    /* We retrieve the results for each category of a QueryTerm independently. The relevant ids will not yet be changed after this call as we are still in the same stage. */
                    for (String category : qt.getCategories()) {
                        List<SegmentScoreElement> scores = continuousRetrievalLogic.retrieve(qc, category, stageQConf);
                        final List<StringDoublePair> results = scores.stream().map(elem -> new StringDoublePair(elem.getSegmentId(), elem.getScore())).filter(p -> p.value > 0d).sorted(StringDoublePair.COMPARATOR).collect(Collectors.toList());
                        if (results.isEmpty()) {
                            LOGGER.warn("No results found for category {} and qt {} in stage with id {}. Full component: {}", category, qt.getType(), lambdaFinalContainerIdx, stage);
                        }
                        if (cache.get(stageIndex).containsKey(category)) {
                            LOGGER.error("Category {} was used twice in stage {}. This erases the results of the previous category... ", category, stageIndex);
                        }
                        cache.get(stageIndex).put(category, results);
                        results.forEach(res -> relevantSegments.add(res.key));
                        /*
               * If this is the last stage, we can collect the results and send relevant results per category back the requester.
               * Otherwise we shouldn't yet send since we might send results to the requester that would be filtered at a later stage.
               */
                        if (stageIndex == stagedSimilarityQuery.getStages().size() - 1) {
                            /* We limit the results to be sent back to the requester to the max limit. This is so that the original view is not affected by the changes of temporal query version 2 */
                            List<StringDoublePair> limitedResults = results.stream().limit(max).collect(Collectors.toList());
                            results.forEach(res -> limitedRelevantSegments.add(res.key));
                            List<String> limitedSegmentIds = limitedResults.stream().map(el -> el.key).collect(Collectors.toList());
                            sentSegmentIds.addAll(limitedSegmentIds);
                            List<MediaSegmentDescriptor> limitedSegmentDescriptors = this.loadSegments(limitedSegmentIds, qid);
                            /* Store the segments and results for this staged query to be used in the temporal querying. */
                            segments.addAll(limitedSegmentDescriptors);
                            stageResults.addAll(results);
                            List<String> limitedObjectIds = this.submitPrefetchedSegmentAndObjectInformation(session, uuid, limitedSegmentDescriptors);
                            sentObjectIds.addAll(limitedObjectIds);
                            LOGGER.trace("Queueing finalization and result submission for last stage, container {}", lambdaFinalContainerIdx);
                            futures.addAll(this.finalizeAndSubmitResults(session, uuid, category, lambdaFinalContainerIdx, limitedResults));
                            List<Thread> _threads = this.submitMetadata(session, uuid, limitedSegmentIds, limitedObjectIds, segmentIdsForWhichMetadataIsFetched, objectIdsForWhichMetadataIsFetched, message.getMetadataAccessSpec());
                            metadataRetrievalThreads.addAll(_threads);
                        }
                    }
                }
                /* After having finished a stage, we add all relevant segments to the config of the next stage. */
                if (relevantSegments.size() == 0) {
                    LOGGER.warn("No relevant segments anymore, aborting staged querying");
                    /* Clear the relevant segments are there are none */
                    stageQConf.setRelevantSegmentIds(relevantSegments);
                    break;
                }
                stageQConf.setRelevantSegmentIds(relevantSegments);
                relevantSegments.clear();
            }
            limitedStageQConf.setRelevantSegmentIds(limitedRelevantSegments);
            /* At this point, we have iterated over all stages. Now, we need to go back for all stages and send the results for the relevant ids. */
            for (int stageIndex = 0; stageIndex < stagedSimilarityQuery.getStages().size() - 1; stageIndex++) {
                int finalStageIndex = stageIndex;
                /* Add the results from the last filter from all previous stages also to the list of results */
                cache.get(stageIndex).forEach((category, results) -> {
                    results.removeIf(pair -> !stageQConf.getRelevantSegmentIds().contains(pair.key));
                    stageResults.addAll(results);
                });
                /* Return the limited results from all stages that are within the filter */
                cache.get(stageIndex).forEach((category, results) -> {
                    results.removeIf(pair -> !limitedStageQConf.getRelevantSegmentIds().contains(pair.key));
                    Thread thread = new Thread(() -> {
                        LOGGER.trace("Queuing finalization & result submission for stage {} and container {}", finalStageIndex, lambdaFinalContainerIdx);
                        futures.addAll(this.finalizeAndSubmitResults(session, uuid, category, lambdaFinalContainerIdx, results));
                    });
                    thread.setName("finalization-stage" + finalStageIndex + "-" + category);
                    thread.start();
                    cleanupThreads.add(thread);
                });
            }
            /* There should be no carry-over from this block since temporal queries are executed independently */
            containerResults.put(lambdaFinalContainerIdx, stageResults);
        });
        ssqThread.setName("ssq-" + containerIdx);
        ssqThreads.add(ssqThread);
        ssqThread.start();
    }
    for (Thread ssqThread : ssqThreads) {
        ssqThread.join();
    }
    /* You can skip the computation of temporal objects in the config if you wish simply to execute all queries independently (e.g. for evaluation)*/
    if (!message.getTemporalQueryConfig().computeTemporalObjects) {
        LOGGER.debug("Not computing temporal objects due to query config");
        finish(metadataRetrievalThreads, cleanupThreads);
        return;
    }
    LOGGER.debug("Starting fusion for temporal context");
    long start = System.currentTimeMillis();
    /* Retrieve the MediaSegmentDescriptors needed for the temporal scoring retrieval */
    Map<String, MediaSegmentDescriptor> segmentMap = segments.stream().distinct().collect(Collectors.toMap(MediaSegmentDescriptor::getSegmentId, x -> x, (x1, x2) -> x1));
    /* Initialise the temporal scoring algorithms depending on timeDistances list */
    List<List<StringDoublePair>> tmpContainerResults = new ArrayList<>();
    IntStream.range(0, message.getQueries().size()).forEach(idx -> tmpContainerResults.add(containerResults.getOrDefault(idx, new ArrayList<>())));
    /* Score and retrieve the results */
    List<TemporalObject> results = TemporalScoring.score(segmentMap, tmpContainerResults, message.getTimeDistances(), message.getMaxLength());
    List<TemporalObject> finalResults = results.stream().sorted(TemporalObject.COMPARATOR.reversed()).limit(max).collect(Collectors.toList());
    LOGGER.debug("Temporal scoring done in {} ms, {} results", System.currentTimeMillis() - start, finalResults.size());
    /* Retrieve the segment Ids of the newly scored segments */
    List<String> segmentIds = finalResults.stream().map(TemporalObject::getSegments).flatMap(List::stream).collect(Collectors.toList());
    /* Send potential information not already sent  */
    /* Maybe change from list to set? */
    segmentIds = segmentIds.stream().filter(s -> !sentSegmentIds.contains(s)).collect(Collectors.toList());
    List<String> objectIds = segments.stream().map(MediaSegmentDescriptor::getObjectId).collect(Collectors.toList());
    objectIds = objectIds.stream().filter(s -> !sentObjectIds.contains(s)).collect(Collectors.toList());
    /* If necessary, send to the UI */
    if (segmentIds.size() != 0 && objectIds.size() != 0) {
        this.submitSegmentAndObjectInformationFromIds(session, uuid, segmentIds, objectIds);
        /* Retrieve and send metadata for items not already sent */
        List<Thread> _threads = this.submitMetadata(session, uuid, segmentIds, objectIds, segmentIdsForWhichMetadataIsFetched, objectIdsForWhichMetadataIsFetched, message.getMetadataAccessSpec());
        metadataRetrievalThreads.addAll(_threads);
    }
    /* Send scoring results to the frontend */
    if (finalResults.size() > 0) {
        futures.addAll(this.finalizeAndSubmitTemporalResults(session, uuid, finalResults));
        futures.forEach(CompletableFuture::join);
    }
    finish(metadataRetrievalThreads, cleanupThreads);
}
Also used : IntStream(java.util.stream.IntStream) QueryStage(org.vitrivr.cineast.api.messages.query.QueryStage) TemporalQuery(org.vitrivr.cineast.api.messages.query.TemporalQuery) StagedSimilarityQuery(org.vitrivr.cineast.api.messages.query.StagedSimilarityQuery) AbstractQueryTermContainer(org.vitrivr.cineast.core.data.query.containers.AbstractQueryTermContainer) TemporalObject(org.vitrivr.cineast.core.data.TemporalObject) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TemporalScoring(org.vitrivr.cineast.core.temporal.TemporalScoring) QueryTerm(org.vitrivr.cineast.api.messages.query.QueryTerm) Map(java.util.Map) Session(org.eclipse.jetty.websocket.api.Session) IntObjectHashMap(io.netty.util.collection.IntObjectHashMap) MediaSegmentDescriptor(org.vitrivr.cineast.core.data.entities.MediaSegmentDescriptor) ContinuousRetrievalLogic(org.vitrivr.cineast.standalone.util.ContinuousRetrievalLogic) QueryConfig(org.vitrivr.cineast.core.config.QueryConfig) Set(java.util.Set) StringDoublePair(org.vitrivr.cineast.core.data.StringDoublePair) Collectors(java.util.stream.Collectors) List(java.util.List) Logger(org.apache.logging.log4j.Logger) SegmentScoreElement(org.vitrivr.cineast.core.data.score.SegmentScoreElement) LogManager(org.apache.logging.log4j.LogManager) Config(org.vitrivr.cineast.standalone.config.Config) AbstractQueryTermContainer(org.vitrivr.cineast.core.data.query.containers.AbstractQueryTermContainer) ArrayList(java.util.ArrayList) QueryTerm(org.vitrivr.cineast.api.messages.query.QueryTerm) StringDoublePair(org.vitrivr.cineast.core.data.StringDoublePair) CompletableFuture(java.util.concurrent.CompletableFuture) QueryStage(org.vitrivr.cineast.api.messages.query.QueryStage) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) QueryConfig(org.vitrivr.cineast.core.config.QueryConfig) IntObjectHashMap(io.netty.util.collection.IntObjectHashMap) TemporalObject(org.vitrivr.cineast.core.data.TemporalObject) StagedSimilarityQuery(org.vitrivr.cineast.api.messages.query.StagedSimilarityQuery) SegmentScoreElement(org.vitrivr.cineast.core.data.score.SegmentScoreElement) MediaSegmentDescriptor(org.vitrivr.cineast.core.data.entities.MediaSegmentDescriptor) HashMap(java.util.HashMap) Map(java.util.Map) IntObjectHashMap(io.netty.util.collection.IntObjectHashMap)

Aggregations

AbstractQueryTermContainer (org.vitrivr.cineast.core.data.query.containers.AbstractQueryTermContainer)4 ArrayList (java.util.ArrayList)3 ReadableQueryConfig (org.vitrivr.cineast.core.config.ReadableQueryConfig)3 StringDoublePair (org.vitrivr.cineast.core.data.StringDoublePair)3 SegmentScoreElement (org.vitrivr.cineast.core.data.score.SegmentScoreElement)3 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 Collectors (java.util.stream.Collectors)2 LogManager (org.apache.logging.log4j.LogManager)2 Logger (org.apache.logging.log4j.Logger)2 QueryConfig (org.vitrivr.cineast.core.config.QueryConfig)2 MediaSegmentDescriptor (org.vitrivr.cineast.core.data.entities.MediaSegmentDescriptor)2 Config (org.vitrivr.cineast.standalone.config.Config)2 ContinuousRetrievalLogic (org.vitrivr.cineast.standalone.util.ContinuousRetrievalLogic)2 TObjectDoubleHashMap (gnu.trove.map.hash.TObjectDoubleHashMap)1 StreamObserver (io.grpc.stub.StreamObserver)1 IntObjectHashMap (io.netty.util.collection.IntObjectHashMap)1