Search in sources :

Example 1 with Pair

use of org.vitrivr.cineast.core.data.Pair in project cineast by vitrivr.

the class PlainTextImporter method readNext.

/**
 * Returns the ID and the content of the next text file.
 *
 * @return Pair mapping an ID (from the filename) to the content of the file (text).
 */
@Override
public Pair<String, String> readNext() {
    try {
        if (this.files.hasNext()) {
            final Path path = this.files.next();
            final String segmentId = path.getFileName().toString().replace(".txt", "");
            final String text = new String(Files.readAllBytes(path));
            return new Pair<>(segmentId, text);
        } else {
            return null;
        }
    } catch (IOException e) {
        return null;
    }
}
Also used : Path(java.nio.file.Path) IOException(java.io.IOException) Pair(org.vitrivr.cineast.core.data.Pair)

Example 2 with Pair

use of org.vitrivr.cineast.core.data.Pair in project cineast by vitrivr.

the class VbsMetaImporter method readNext.

/**
 * Returns the ID and the content of the next XML file.
 *
 * @return Pair mapping an ID (from the filename) to the content of the file (text).
 */
@Override
public Pair<String, String> readNext() {
    try {
        if (this.files.hasNext()) {
            final Path path = this.files.next();
            final String objectId = path.getFileName().toString().replace(".xml", "");
            final String text = new String(Files.readAllBytes(path)).replaceAll("<[^>]+>", "\n").replaceAll("\\s+", " ").trim();
            return new Pair<>(objectId, text);
        } else {
            return null;
        }
    } catch (IOException e) {
        return null;
    }
}
Also used : Path(java.nio.file.Path) IOException(java.io.IOException) Pair(org.vitrivr.cineast.core.data.Pair)

Example 3 with Pair

use of org.vitrivr.cineast.core.data.Pair in project cineast by vitrivr.

the class OCRSearch method processSegment.

/**
 * Extracts the scene text and ingests it using the {@link SimpleFulltextFeatureDescriptor}.
 *
 * @param shot The {@link SegmentContainer} that should be processed.
 */
@Override
public void processSegment(SegmentContainer shot) {
    TextDetector_EAST detector = detectorCache.get();
    TextRecognizer_CTC recognizer = recognizerCache.get();
    int lenVideo = shot.getVideoFrames().size();
    // Scene text extraction for image
    if (lenVideo == 1) {
        Mat frame = img2Mat(shot.getVideoFrames().get(0).getImage().getBufferedImage());
        Point[][] coordinates = detector.detect(frame);
        StringBuilder sb = new StringBuilder();
        for (Point[] coordinate : coordinates) {
            sb.append(recognizer.recognize(coordinate, frame, true)).append('\n');
        }
        saveText(shot.getId(), sb.toString());
        return;
    }
    // Scene text extraction for video
    List<TextStream> streams = new ArrayList<>();
    List<Mat> matFrames = new ArrayList<>();
    for (int i = 0; i < lenVideo; i = i + rate) {
        matFrames.add(img2Mat(shot.getVideoFrames().get(i).getImage().getBufferedImage()));
    }
    List<Point[][]> detections = detector.detect(matFrames, batchSize);
    List<GrayU8> frames_grayU8 = new ArrayList<>();
    for (int i = 0; i < lenVideo; i++) {
        frames_grayU8.add(bufferedImage2GrayU8(shot, i));
    }
    for (int i = 0; i + rate < lenVideo && i < lenVideo; i = i + rate) {
        List<List<Quadrilateral_F64>> tracking_forward = new ArrayList<>();
        List<List<Quadrilateral_F64>> tracking_backward = new ArrayList<>();
        // Forward Tracking (from frame i to frame i+rate)
        List<Quadrilateral_F64> coordinates_tracking = new ArrayList<>();
        Point[][] initialCoordinates = detections.get(i / rate);
        if (initialCoordinates.length == 0) {
            continue;
        }
        int count = 0;
        for (Point[] coordinate : initialCoordinates) {
            Quadrilateral_F64 coordinate_tracking = new Quadrilateral_F64(coordinate[1].x, coordinate[1].y, coordinate[0].x, coordinate[0].y, coordinate[3].x, coordinate[3].y, coordinate[2].x, coordinate[2].y);
            Pair<Double, Double> minMaxX = minimumMaximumX(coordinate_tracking);
            Pair<Double, Double> minMaxY = minimumMaximumY(coordinate_tracking);
            coordinate_tracking = new Quadrilateral_F64(minMaxX.first, minMaxY.first, minMaxX.second, minMaxY.first, minMaxX.second, minMaxY.second, minMaxX.first, minMaxY.second);
            coordinates_tracking.add(coordinate_tracking);
            tracking_forward.add(new ArrayList<>());
            tracking_forward.get(count).add(coordinate_tracking);
            count++;
        }
        MultiTracker tracker_forward = new MultiTracker(bufferedImage2GrayU8(shot, i), coordinates_tracking, tracker_type);
        for (int j = i + 1; j < lenVideo && j <= i + rate; j++) {
            List<Pair<Boolean, Quadrilateral_F64>> new_coordinates = tracker_forward.update(frames_grayU8.get(j));
            for (int k = 0; k < new_coordinates.size(); k++) {
                if (new_coordinates.get(k).first) {
                    tracking_forward.get(k).add(new_coordinates.get(k).second);
                } else {
                    tracking_forward.get(k).add(null);
                }
            }
        }
        // Backward Tracking (from frame i+rate to frame i)
        coordinates_tracking.clear();
        initialCoordinates = detections.get((i + rate) / rate);
        if (initialCoordinates.length == 0) {
            continue;
        }
        count = 0;
        for (Point[] coordinate : initialCoordinates) {
            Quadrilateral_F64 coordinate_tracking = new Quadrilateral_F64(coordinate[1].x, coordinate[1].y, coordinate[2].x, coordinate[2].y, coordinate[3].x, coordinate[3].y, coordinate[0].x, coordinate[0].y);
            coordinates_tracking.add(coordinate_tracking);
            tracking_backward.add(new ArrayList<>());
            tracking_backward.get(count).add(coordinate_tracking);
            count++;
        }
        MultiTracker tracker_backward = new MultiTracker(bufferedImage2GrayU8(shot, i + rate), coordinates_tracking, tracker_type);
        for (int j = i + rate - 1; j >= 0 && j >= i; j--) {
            List<Pair<Boolean, Quadrilateral_F64>> new_coordinates = tracker_backward.update(frames_grayU8.get(j));
            for (int k = 0; k < new_coordinates.size(); k++) {
                if (new_coordinates.get(k).first) {
                    tracking_backward.get(k).add(0, new_coordinates.get(k).second);
                } else {
                    tracking_backward.get(k).add(0, null);
                }
            }
        }
        // Find best matches between the forward tracking stream and the backward tracking one and compare it to threshold_CiOU
        double[][] cost = new double[tracking_forward.size()][tracking_backward.size()];
        for (int j = 0; j < tracking_forward.size(); j++) {
            for (int k = 0; k < tracking_backward.size(); k++) {
                cost[j][k] = 1 - getAverageIntersectionOverUnion(tracking_forward.get(j), tracking_backward.get(k));
            }
        }
        HungarianAlgorithm optimization = new HungarianAlgorithm(cost);
        int[] pairs = optimization.execute();
        for (int j = 0; j < tracking_forward.size(); j++) {
            if (pairs[j] != -1 && cost[j][pairs[j]] <= 1 - threshold_CIoU) {
                TextStream stream = null;
                for (TextStream textStream : streams) {
                    if (textStream.getLast() == i && textStream.getCoordinate_id() == j) {
                        stream = textStream;
                        stream.add(i, i + rate, pairs[j], tracking_forward.get(j), tracking_backward.get(pairs[j]));
                    }
                }
                if (stream == null) {
                    stream = new TextStream(i, i + rate, pairs[j], tracking_forward.get(j), tracking_backward.get(pairs[j]));
                    streams.add(stream);
                }
            }
        }
    }
    List<TextStream> shouldRemove = new ArrayList<>();
    // Recognize text within each stream
    for (TextStream stream : streams) {
        HashMap<Integer, Quadrilateral_F64> filtered = stream.getFilteredCoordinates();
        Iterator<Integer> frameIterator = filtered.keySet().iterator();
        HashMap<String, Integer> counts = new HashMap<>();
        // Recognize text from the filtered coordinates and count the amount of times they occur
        while (frameIterator.hasNext()) {
            int key = frameIterator.next();
            Quadrilateral_F64 coord_before = filtered.get(key);
            Point[] coordinates = new Point[] { new Point(coord_before.getD().x, coord_before.getD().y), new Point(coord_before.getA().x, coord_before.getA().y), new Point(coord_before.getB().x, coord_before.getB().y), new Point(coord_before.getC().x, coord_before.getC().y) };
            Mat frame = img2Mat(shot.getVideoFrames().get(key).getImage().getBufferedImage());
            String recognition = recognizer.recognize(coordinates, frame, false);
            Integer count = counts.get(recognition);
            counts.put(recognition, count != null ? count + 1 : 1);
        }
        // Select the recognitions which appear most often (majority voting)
        int max_count = 0;
        List<String> prunedRecognitions = new ArrayList<>();
        for (HashMap.Entry<String, Integer> val : counts.entrySet()) {
            if (max_count < val.getValue()) {
                prunedRecognitions.clear();
                prunedRecognitions.add(val.getKey());
                max_count = val.getValue();
            } else if (max_count == val.getValue()) {
                prunedRecognitions.add(val.getKey());
            }
        }
        // If there are two recognitions which appear most often, apply NeedlemanWunschMerge. If there are more than two, then delete the stream
        if (prunedRecognitions.size() == 1) {
            stream.setText(prunedRecognitions.get(0));
        } else if (prunedRecognitions.size() == 2) {
            stream.setText(new NeedlemanWunschMerge(prunedRecognitions.get(0), prunedRecognitions.get(1)).execute());
        } else {
            // Streams who cannot agree on at most two recognitions are considered unstable and removed
            shouldRemove.add(stream);
        }
    }
    for (TextStream stream : shouldRemove) {
        streams.remove(stream);
    }
    HashMap<Integer, List<TextStream>> firsts = new HashMap<>();
    HashMap<Integer, List<TextStream>> lasts = new HashMap<>();
    for (TextStream stream : streams) {
        int first = stream.getFirst();
        int last = stream.getLast();
        if (firsts.containsKey(first)) {
            List<TextStream> s = firsts.get(first);
            s.add(stream);
        } else {
            List<TextStream> s = new ArrayList<>();
            s.add(stream);
            firsts.put(first, s);
        }
        if (lasts.containsKey(last)) {
            List<TextStream> s = lasts.get(last);
            s.add(stream);
        } else {
            List<TextStream> s = new ArrayList<>();
            s.add(stream);
            lasts.put(last, s);
        }
    }
    // Combine streams which are considered to be highly similar
    // This is done to overcome scenarios in which the object was occluded during the detection, or couldn't be detected for other reasons, which resulted in two or more streams for the same text instance
    int distance = rate;
    while (distance < threshold_postproc / 0.2) {
        for (int i = rate; i + distance < lenVideo; i = i + rate) {
            List<TextStream> streams_last = lasts.get(i);
            List<TextStream> streams_first = firsts.get(i + distance);
            if (streams_last == null || streams_last.size() == 0 || streams_first == null || streams_first.size() == 0) {
                continue;
            }
            double[][] cost = new double[streams_last.size()][streams_first.size()];
            for (int j = 0; j < streams_last.size(); j++) {
                for (int k = 0; k < streams_first.size(); k++) {
                    cost[j][k] = getSimilarity(streams_last.get(j), streams_first.get(k));
                }
            }
            HungarianAlgorithm optimization = new HungarianAlgorithm(cost);
            int[] pairs = optimization.execute();
            List<Pair<TextStream, TextStream>> matches = new ArrayList<>();
            for (int j = 0; j < streams_last.size(); j++) {
                if (pairs[j] < 0) {
                    continue;
                }
                if (cost[j][pairs[j]] < threshold_postproc) {
                    matches.add(new Pair<>(streams_last.get(j), streams_first.get(pairs[j])));
                }
            }
            for (Pair<TextStream, TextStream> match : matches) {
                TextStream stream_last = match.first;
                TextStream stream_first = match.second;
                if (!stream_last.getText().equals(stream_first.getText())) {
                    if ((stream_last.getLast() - stream_last.getFirst()) > (stream_first.getLast() - stream_first.getFirst())) {
                        stream_first.setText(stream_last.getText());
                    } else if ((stream_last.getLast() - stream_last.getFirst()) == (stream_first.getLast() - stream_first.getFirst())) {
                        stream_first.setText(new NeedlemanWunschMerge(stream_last.getText(), stream_first.getText()).execute());
                    }
                }
                firsts.get(stream_last.getFirst()).remove(stream_last);
                lasts.get(stream_last.getLast()).remove(stream_last);
                streams.remove(stream_last);
                firsts.get(stream_first.getFirst()).remove(stream_first);
                firsts.get(stream_last.getFirst()).add(stream_first);
                stream_first.add(stream_last);
            }
        }
        distance = distance + rate;
    }
    shouldRemove.clear();
    // This is done to prune unreliable streams and text instances which the user could not possibly have memorized
    for (TextStream stream : streams) {
        if ((stream.getLast() - stream.getFirst()) < threshold_stream_length) {
            shouldRemove.add(stream);
        }
    }
    for (TextStream stream : shouldRemove) {
        streams.remove(stream);
    }
    StringBuilder sb = new StringBuilder();
    streams.forEach(s -> sb.append(s.getText()).append('\n'));
    saveText(shot.getId(), sb.toString());
}
Also used : Mat(org.opencv.core.Mat) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) GrayU8(boofcv.struct.image.GrayU8) ArrayList(java.util.ArrayList) List(java.util.List) TextDetector_EAST(org.vitrivr.cineast.core.util.text.TextDetector_EAST) TextStream(org.vitrivr.cineast.core.util.text.TextStream) TextRecognizer_CTC(org.vitrivr.cineast.core.util.text.TextRecognizer_CTC) Pair(org.vitrivr.cineast.core.data.Pair) Quadrilateral_F64(georegression.struct.shapes.Quadrilateral_F64) HungarianAlgorithm(org.vitrivr.cineast.core.util.HungarianAlgorithm) Point(org.opencv.core.Point) Point(org.opencv.core.Point) MultiTracker(org.vitrivr.cineast.core.util.MultiTracker) NeedlemanWunschMerge(org.vitrivr.cineast.core.util.NeedlemanWunschMerge)

Example 4 with Pair

use of org.vitrivr.cineast.core.data.Pair in project cineast by vitrivr.

the class SaturationGrid8 method computeGrid.

private Pair<FloatVector, float[]> computeGrid(SegmentContainer qc) {
    ArrayList<SummaryStatistics> stats = new ArrayList<SummaryStatistics>(64);
    for (int i = 0; i < 64; ++i) {
        stats.add(new SummaryStatistics());
    }
    MultiImage img = qc.getMostRepresentativeFrame().getImage();
    int[] colors = img.getColors();
    ArrayList<Float> saturations = new ArrayList<Float>(img.getWidth() * img.getHeight());
    ArrayList<Float> alphas = new ArrayList<Float>(img.getWidth() * img.getHeight());
    for (int c : colors) {
        saturations.add(ColorConverter.cachedRGBtoLab(c).getSaturation());
        alphas.add(ReadableRGBContainer.getAlpha(c) / 255f);
    }
    ArrayList<LinkedList<Float>> partitions = GridPartitioner.partition(saturations, img.getWidth(), img.getHeight(), 8, 8);
    ArrayList<LinkedList<Float>> alphaPartitions = GridPartitioner.partition(alphas, img.getWidth(), img.getHeight(), 8, 8);
    for (int i = 0; i < partitions.size(); ++i) {
        SummaryStatistics stat = stats.get(i);
        Iterator<Float> iter = alphaPartitions.get(i).iterator();
        for (float c : partitions.get(i)) {
            if (iter.next() < 0.5f) {
                continue;
            }
            stat.addValue(c);
        }
    }
    float[] f = new float[128];
    for (int i = 0; i < 64; ++i) {
        f[2 * i] = (float) stats.get(i).getMean();
        f[2 * i + 1] = (float) stats.get(i).getVariance();
    }
    float[] weights = new float[128];
    for (int i = 0; i < alphaPartitions.size(); ++i) {
        float w = 0;
        for (float v : alphaPartitions.get(i)) {
            w += v;
        }
        w /= alphaPartitions.get(i).size();
        weights[2 * i] = w;
        weights[2 * i + 1] = w;
    }
    return new Pair<FloatVector, float[]>(new FloatVectorImpl(f), weights);
}
Also used : ArrayList(java.util.ArrayList) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) FloatVectorImpl(org.vitrivr.cineast.core.data.FloatVectorImpl) LinkedList(java.util.LinkedList) MultiImage(org.vitrivr.cineast.core.data.raw.images.MultiImage) Pair(org.vitrivr.cineast.core.data.Pair)

Example 5 with Pair

use of org.vitrivr.cineast.core.data.Pair in project cineast by vitrivr.

the class ForegroundBoundingBox method processSegment.

@Override
public void processSegment(SegmentContainer shot) {
    if (shot.getMostRepresentativeFrame() == VideoFrame.EMPTY_VIDEO_FRAME) {
        return;
    }
    if (!phandler.idExists(shot.getId())) {
        ArrayList<Pair<Long, ArrayList<Float>>> bboxs = MaskGenerator.getNormalizedBbox(shot.getVideoFrames(), shot.getPaths(), shot.getBgPaths());
        for (Pair<Long, ArrayList<Float>> bbox : bboxs) {
            FloatVectorImpl fv = new FloatVectorImpl(bbox.second);
            persist(shot.getId(), bbox.first, fv);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) FloatVectorImpl(org.vitrivr.cineast.core.data.FloatVectorImpl) Pair(org.vitrivr.cineast.core.data.Pair)

Aggregations

Pair (org.vitrivr.cineast.core.data.Pair)30 ArrayList (java.util.ArrayList)22 LinkedList (java.util.LinkedList)12 FloatVectorImpl (org.vitrivr.cineast.core.data.FloatVectorImpl)9 IOException (java.io.IOException)5 SummaryStatistics (org.apache.commons.math3.stat.descriptive.SummaryStatistics)5 GrayU8 (boofcv.struct.image.GrayU8)4 Point2D_F32 (georegression.struct.point.Point2D_F32)4 ReadableLabContainer (org.vitrivr.cineast.core.color.ReadableLabContainer)4 MultiImage (org.vitrivr.cineast.core.data.raw.images.MultiImage)4 Quadrilateral_F64 (georegression.struct.shapes.Quadrilateral_F64)3 Path (java.nio.file.Path)3 HashMap (java.util.HashMap)3 List (java.util.List)3 Vector3f (org.joml.Vector3f)3 Vector3fc (org.joml.Vector3fc)3 BufferedImage (java.awt.image.BufferedImage)2 DMatrixRMaj (org.ejml.data.DMatrixRMaj)2 Matrix4f (org.joml.Matrix4f)2 Mat (org.opencv.core.Mat)2