Search in sources :

Example 1 with Decoder

use of org.vitrivr.cineast.core.extraction.decode.general.Decoder in project cineast by vitrivr.

the class ImageCodebookGenerator method generate.

@Override
public void generate(Path source, Path destination, int words) throws IOException {
    long start = System.currentTimeMillis();
    final Decoder<BufferedImage> decoder = new DefaultImageDecoder();
    final MimetypesFileTypeMap filetypemap = new MimetypesFileTypeMap("mime.types");
    /* Filter the list of files and aggregate it. */
    /* Prepare array dequeue. */
    ArrayDeque<Path> files = Files.walk(source).filter(path -> {
        if (decoder.supportedFiles() != null) {
            String type = filetypemap.getContentType(path.toString());
            return decoder.supportedFiles().contains(type);
        } else {
            return true;
        }
    }).collect(Collectors.toCollection(ArrayDeque::new));
    /* Prepare data-structures to track progress. */
    int max = files.size();
    int counter = 0;
    int skipped = 0;
    char[] progressBar = new char[15];
    int update = max / progressBar.length;
    /* */
    System.out.println(String.format("Creating codebook of %d words from %d files.", words, files.size()));
    /*
     * Iterates over the files Dequeue. Every element that has been processed in removed from that Dequeue.
     */
    Path path = null;
    while ((path = files.poll()) != null) {
        if (decoder.init(path, null, null)) {
            BufferedImage image = decoder.getNext();
            if (image != null) {
                this.process(image);
            } else {
                skipped++;
            }
        } else {
            skipped++;
        }
        if (counter % update == 0) {
            this.updateProgressBar(progressBar, max, counter);
        }
        System.out.print(String.format("\rAdding vectors to codebook: %d/%d files processed (%d skipped) |%s| (Memory left: %.2f/%.2f GB)", counter, max, skipped, String.valueOf(progressBar), Runtime.getRuntime().freeMemory() / 1000000.0f, Runtime.getRuntime().totalMemory() / 1000000.0f));
        counter++;
    }
    /* Dispose of unnecessary elements. */
    files = null;
    progressBar = null;
    /* Start clustering.*/
    System.out.println(String.format("\nClustering... this could take a while."));
    this.cluster.process(words);
    /* Save file...*/
    System.out.println(String.format("Saving vocabulary with %d entries.", words));
    UtilIO.save(this.cluster.getAssignment(), destination.toString());
    long duration = System.currentTimeMillis() - start;
    System.out.println(String.format("Done! Took me %dhours %dmin %dsec", TimeUnit.MILLISECONDS.toHours(duration), TimeUnit.MILLISECONDS.toMinutes(duration), TimeUnit.MILLISECONDS.toSeconds(duration)));
}
Also used : Path(java.nio.file.Path) UtilIO(boofcv.io.UtilIO) ComputeClusters(org.ddogleg.clustering.ComputeClusters) BufferedImage(java.awt.image.BufferedImage) Files(java.nio.file.Files) IOException(java.io.IOException) Decoder(org.vitrivr.cineast.core.extraction.decode.general.Decoder) Collectors(java.util.stream.Collectors) MimetypesFileTypeMap(javax.activation.MimetypesFileTypeMap) ClusterVisualWords(boofcv.alg.bow.ClusterVisualWords) TimeUnit(java.util.concurrent.TimeUnit) ArrayDeque(java.util.ArrayDeque) DefaultImageDecoder(org.vitrivr.cineast.core.extraction.decode.image.DefaultImageDecoder) Path(java.nio.file.Path) MimetypesFileTypeMap(javax.activation.MimetypesFileTypeMap) DefaultImageDecoder(org.vitrivr.cineast.core.extraction.decode.image.DefaultImageDecoder) BufferedImage(java.awt.image.BufferedImage)

Example 2 with Decoder

use of org.vitrivr.cineast.core.extraction.decode.general.Decoder in project cineast by vitrivr.

the class GenericExtractionItemHandler method run.

@Override
@SuppressWarnings("unchecked")
public void run() {
    LOGGER.info("Starting extraction");
    this.executorService.execute(pipeline);
    final ObjectIdGenerator generator = this.context.objectIdGenerator();
    Pair<ExtractionItemContainer, MediaType> pair = null;
    /* Initalize all Metadata Extractors */
    for (MetadataExtractor extractor : this.metadataExtractors) {
        LOGGER.debug("Initializing metadata extractor {}", extractor.getClass().getSimpleName());
        if (extractor instanceof MetadataFeatureModule) {
            ((MetadataFeatureModule<?>) extractor).init(this.context.persistencyWriter());
        } else {
            extractor.init();
        }
    }
    /* Process until there's nothing left*/
    while ((pair = this.nextItem()) != null) {
        try {
            LOGGER.debug("Processing path {} and mediatype {}", pair.getLeft(), pair.getRight());
            if (handlerCache.get(pair.getRight()) == null) {
                LOGGER.error("Unknown mediatype {}, exiting extraction", pair.getRight());
                break;
            }
            /* Clear non-reusable segmenter */
            handlerCache.compute(pair.getRight(), (mediaType, cache) -> {
                Decoder decoder = null;
                if (cache.getLeft() != null) {
                    if (cache.getLeft().canBeReused()) {
                        decoder = cache.getLeft();
                    }
                }
                return ImmutablePair.of(decoder, null);
            });
            /* Put a new decoder in the cache if there's not one already there */
            if (handlerCache.get(pair.getRight()).getLeft() == null) {
                Decoder decoder = handlers.get(pair.getRight()).getLeft().get();
                handlerCache.compute(pair.getRight(), (mediaType, cache) -> new ImmutablePair<>(decoder, cache.getRight()));
            }
            /* Put a new segmenter in the cache if there's not one already there */
            if (handlerCache.get(pair.getRight()).getRight() == null) {
                Segmenter segmenter = handlers.get(pair.getRight()).getRight().get();
                handlerCache.compute(pair.getRight(), (mediaType, cache) -> new ImmutablePair<>(cache.getLeft(), segmenter));
            }
            Decoder decoder = handlerCache.get(pair.getRight()).getLeft();
            Segmenter segmenter = handlers.get(pair.getRight()).getRight().get();
            if (decoder.init(pair.getLeft().getPathForExtraction(), Config.sharedConfig().getDecoders().get(pair.getRight()), Config.sharedConfig().getCache())) {
                /* Create / lookup MediaObjectDescriptor for new file. */
                final MediaObjectDescriptor descriptor = this.fetchOrCreateMultimediaObjectDescriptor(generator, pair.getLeft(), pair.getRight());
                if (!this.checkAndPersistMultimediaObject(descriptor)) {
                    continue;
                }
                final String objectId = descriptor.getObjectId();
                int segmentNumber = 1;
                segmenter.init(decoder, descriptor);
                this.executorService.execute(segmenter);
                while (!segmenter.complete()) {
                    try {
                        final SegmentContainer container = segmenter.getNext();
                        if (container != null) {
                            /* Create segment-descriptor and try to persist it. */
                            MediaSegmentDescriptor mediaSegmentDescriptor;
                            if (container.getId() != null) {
                                mediaSegmentDescriptor = this.fetchOrCreateSegmentDescriptor(objectId, container.getId(), segmentNumber, container.getStart(), container.getEnd(), container.getAbsoluteStart(), container.getAbsoluteEnd());
                            /* Special case; segment ID is determined by container (image sequences only) */
                            } else {
                                mediaSegmentDescriptor = this.fetchOrCreateSegmentDescriptor(objectId, segmentNumber, container.getStart(), container.getEnd(), container.getAbsoluteStart(), container.getAbsoluteEnd());
                            }
                            container.setId(mediaSegmentDescriptor.getSegmentId());
                            container.setSuperId(mediaSegmentDescriptor.getObjectId());
                            if (!this.checkAndPersistSegment(mediaSegmentDescriptor)) {
                                continue;
                            }
                            int emissionTimeout = 1000;
                            while (!this.pipeline.emit(container, emissionTimeout)) {
                                LOGGER.debug("ExtractionPipeline is full - deferring emission of segment. Consider increasing the thread-pool count for the extraction pipeline.");
                                Thread.sleep(emissionTimeout);
                            // emissionTimeout += 500;
                            }
                            segmentNumber += 1;
                        }
                    } catch (InterruptedException e) {
                        LOGGER.log(Level.ERROR, "Thread was interrupted while the extraction process was running. Aborting...");
                        break;
                    }
                }
                List<MediaObjectMetadataDescriptor> metadata = pair.getLeft().getMetadata().stream().map(el -> MediaObjectMetadataDescriptor.fromExisting(el, objectId)).collect(Collectors.toList());
                this.metadataWriter.write(metadata);
                /* Extract metadata. */
                this.extractAndPersistMetadata(pair.getLeft(), objectId);
                /* Force flush the segment, object and metadata information. */
                this.mediaSegmentWriter.flush();
                this.objectWriter.flush();
                this.metadataWriter.flush();
            } else {
                LOGGER.error("Failed to initialize decoder. File is being skipped...");
            }
            /* Increment the files counter. */
            this.count_processed += 1;
            /*  Create new decoder pair for a new file if the decoder reports that it cannot be reused.*/
            if (!decoder.canBeReused()) {
                decoder.close();
                final MediaType type = pair.getRight();
                handlerCache.compute(type, (mediaType, cache) -> ImmutablePair.of(handlers.get(type).getLeft().get(), cache.getRight()));
            }
            for (ExtractionCompleteListener completeListener : this.completeListeners) {
                completeListener.onCompleted(pair.getLeft());
            }
            /*
         * Trigger garbage collection once in a while. This is specially relevant when many small files are processed, since unused allocated memory could accumulate and trigger swapping.
         */
            if (this.count_processed % 50 == 0) {
                System.gc();
            }
        } catch (Throwable t) {
            LOGGER.error("Exception while processing path {}, {}", pair.getLeft(), t.getMessage());
            t.printStackTrace();
        }
    }
    shutdown();
}
Also used : MetadataFeatureModule(org.vitrivr.cineast.core.features.abstracts.MetadataFeatureModule) MimeTypeHelper(org.vitrivr.cineast.core.util.MimeTypeHelper) Level(org.apache.logging.log4j.Level) MediaObjectDescriptor(org.vitrivr.cineast.core.data.entities.MediaObjectDescriptor) StringUtils(org.apache.commons.lang3.StringUtils) ExtractionContextProvider(org.vitrivr.cineast.core.extraction.ExtractionContextProvider) Pair(org.apache.commons.lang3.tuple.Pair) MediaObjectReader(org.vitrivr.cineast.core.db.dao.reader.MediaObjectReader) SegmentContainer(org.vitrivr.cineast.core.data.segments.SegmentContainer) Map(java.util.Map) Path(java.nio.file.Path) MediaSegmentDescriptor(org.vitrivr.cineast.core.data.entities.MediaSegmentDescriptor) ImageSequenceDecoder(org.vitrivr.cineast.core.extraction.decode.image.ImageSequenceDecoder) IdConfig(org.vitrivr.cineast.core.config.IdConfig) MediaObjectMetadataWriter(org.vitrivr.cineast.core.db.dao.writer.MediaObjectMetadataWriter) ExtractionPipeline(org.vitrivr.cineast.standalone.runtime.ExtractionPipeline) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) List(java.util.List) Logger(org.apache.logging.log4j.Logger) MediaSegmentWriter(org.vitrivr.cineast.core.db.dao.writer.MediaSegmentWriter) Optional(java.util.Optional) LogHelper(org.vitrivr.cineast.core.util.LogHelper) ModularMeshDecoder(org.vitrivr.cineast.core.extraction.decode.m3d.ModularMeshDecoder) MediaType(org.vitrivr.cineast.core.data.MediaType) ImageSequenceSegmenter(org.vitrivr.cineast.core.extraction.segmenter.image.ImageSequenceSegmenter) Config(org.vitrivr.cineast.standalone.config.Config) ReflectionHelper(org.vitrivr.cineast.core.util.ReflectionHelper) Model3DSegment(org.vitrivr.cineast.core.data.segments.Model3DSegment) DBSelector(org.vitrivr.cineast.core.db.DBSelector) HashMap(java.util.HashMap) Decoder(org.vitrivr.cineast.core.extraction.decode.general.Decoder) MediaObjectMetadataDescriptor(org.vitrivr.cineast.core.data.entities.MediaObjectMetadataDescriptor) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Mesh(org.vitrivr.cineast.core.data.m3d.Mesh) PersistencyWriter(org.vitrivr.cineast.core.db.PersistencyWriter) FFMpegVideoDecoder(org.vitrivr.cineast.core.extraction.decode.video.FFMpegVideoDecoder) ExecutorService(java.util.concurrent.ExecutorService) Segmenter(org.vitrivr.cineast.core.extraction.segmenter.general.Segmenter) Files(java.nio.file.Files) MediaSegmentReader(org.vitrivr.cineast.core.db.dao.reader.MediaSegmentReader) ConstantLengthAudioSegmenter(org.vitrivr.cineast.core.extraction.segmenter.audio.ConstantLengthAudioSegmenter) MetadataExtractor(org.vitrivr.cineast.core.extraction.metadata.MetadataExtractor) PassthroughSegmenter(org.vitrivr.cineast.core.extraction.segmenter.general.PassthroughSegmenter) ObjectIdGenerator(org.vitrivr.cineast.core.extraction.idgenerator.ObjectIdGenerator) IngestConfig(org.vitrivr.cineast.standalone.config.IngestConfig) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) TimeUnit(java.util.concurrent.TimeUnit) Paths(java.nio.file.Paths) FFMpegAudioDecoder(org.vitrivr.cineast.core.extraction.decode.audio.FFMpegAudioDecoder) MediaObjectWriter(org.vitrivr.cineast.core.db.dao.writer.MediaObjectWriter) DefaultImageDecoder(org.vitrivr.cineast.core.extraction.decode.image.DefaultImageDecoder) VideoHistogramSegmenter(org.vitrivr.cineast.core.extraction.segmenter.video.VideoHistogramSegmenter) LogManager(org.apache.logging.log4j.LogManager) ImageSegmenter(org.vitrivr.cineast.core.extraction.segmenter.image.ImageSegmenter) ObjectIdGenerator(org.vitrivr.cineast.core.extraction.idgenerator.ObjectIdGenerator) MediaObjectDescriptor(org.vitrivr.cineast.core.data.entities.MediaObjectDescriptor) MetadataFeatureModule(org.vitrivr.cineast.core.features.abstracts.MetadataFeatureModule) ImageSequenceSegmenter(org.vitrivr.cineast.core.extraction.segmenter.image.ImageSequenceSegmenter) Segmenter(org.vitrivr.cineast.core.extraction.segmenter.general.Segmenter) ConstantLengthAudioSegmenter(org.vitrivr.cineast.core.extraction.segmenter.audio.ConstantLengthAudioSegmenter) PassthroughSegmenter(org.vitrivr.cineast.core.extraction.segmenter.general.PassthroughSegmenter) VideoHistogramSegmenter(org.vitrivr.cineast.core.extraction.segmenter.video.VideoHistogramSegmenter) ImageSegmenter(org.vitrivr.cineast.core.extraction.segmenter.image.ImageSegmenter) ImageSequenceDecoder(org.vitrivr.cineast.core.extraction.decode.image.ImageSequenceDecoder) ModularMeshDecoder(org.vitrivr.cineast.core.extraction.decode.m3d.ModularMeshDecoder) Decoder(org.vitrivr.cineast.core.extraction.decode.general.Decoder) FFMpegVideoDecoder(org.vitrivr.cineast.core.extraction.decode.video.FFMpegVideoDecoder) FFMpegAudioDecoder(org.vitrivr.cineast.core.extraction.decode.audio.FFMpegAudioDecoder) DefaultImageDecoder(org.vitrivr.cineast.core.extraction.decode.image.DefaultImageDecoder) MediaSegmentDescriptor(org.vitrivr.cineast.core.data.entities.MediaSegmentDescriptor) MediaType(org.vitrivr.cineast.core.data.MediaType) MetadataExtractor(org.vitrivr.cineast.core.extraction.metadata.MetadataExtractor) SegmentContainer(org.vitrivr.cineast.core.data.segments.SegmentContainer) MediaObjectMetadataDescriptor(org.vitrivr.cineast.core.data.entities.MediaObjectMetadataDescriptor)

Aggregations

Files (java.nio.file.Files)2 Path (java.nio.file.Path)2 TimeUnit (java.util.concurrent.TimeUnit)2 Collectors (java.util.stream.Collectors)2 Decoder (org.vitrivr.cineast.core.extraction.decode.general.Decoder)2 DefaultImageDecoder (org.vitrivr.cineast.core.extraction.decode.image.DefaultImageDecoder)2 ClusterVisualWords (boofcv.alg.bow.ClusterVisualWords)1 UtilIO (boofcv.io.UtilIO)1 BufferedImage (java.awt.image.BufferedImage)1 IOException (java.io.IOException)1 Paths (java.nio.file.Paths)1 ArrayDeque (java.util.ArrayDeque)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1 ExecutorService (java.util.concurrent.ExecutorService)1 Executors (java.util.concurrent.Executors)1 Supplier (java.util.function.Supplier)1