use of org.vitrivr.cineast.core.extraction.decode.general.Decoder in project cineast by vitrivr.
the class ImageCodebookGenerator method generate.
@Override
public void generate(Path source, Path destination, int words) throws IOException {
long start = System.currentTimeMillis();
final Decoder<BufferedImage> decoder = new DefaultImageDecoder();
final MimetypesFileTypeMap filetypemap = new MimetypesFileTypeMap("mime.types");
/* Filter the list of files and aggregate it. */
/* Prepare array dequeue. */
ArrayDeque<Path> files = Files.walk(source).filter(path -> {
if (decoder.supportedFiles() != null) {
String type = filetypemap.getContentType(path.toString());
return decoder.supportedFiles().contains(type);
} else {
return true;
}
}).collect(Collectors.toCollection(ArrayDeque::new));
/* Prepare data-structures to track progress. */
int max = files.size();
int counter = 0;
int skipped = 0;
char[] progressBar = new char[15];
int update = max / progressBar.length;
/* */
System.out.println(String.format("Creating codebook of %d words from %d files.", words, files.size()));
/*
* Iterates over the files Dequeue. Every element that has been processed in removed from that Dequeue.
*/
Path path = null;
while ((path = files.poll()) != null) {
if (decoder.init(path, null, null)) {
BufferedImage image = decoder.getNext();
if (image != null) {
this.process(image);
} else {
skipped++;
}
} else {
skipped++;
}
if (counter % update == 0) {
this.updateProgressBar(progressBar, max, counter);
}
System.out.print(String.format("\rAdding vectors to codebook: %d/%d files processed (%d skipped) |%s| (Memory left: %.2f/%.2f GB)", counter, max, skipped, String.valueOf(progressBar), Runtime.getRuntime().freeMemory() / 1000000.0f, Runtime.getRuntime().totalMemory() / 1000000.0f));
counter++;
}
/* Dispose of unnecessary elements. */
files = null;
progressBar = null;
/* Start clustering.*/
System.out.println(String.format("\nClustering... this could take a while."));
this.cluster.process(words);
/* Save file...*/
System.out.println(String.format("Saving vocabulary with %d entries.", words));
UtilIO.save(this.cluster.getAssignment(), destination.toString());
long duration = System.currentTimeMillis() - start;
System.out.println(String.format("Done! Took me %dhours %dmin %dsec", TimeUnit.MILLISECONDS.toHours(duration), TimeUnit.MILLISECONDS.toMinutes(duration), TimeUnit.MILLISECONDS.toSeconds(duration)));
}
use of org.vitrivr.cineast.core.extraction.decode.general.Decoder in project cineast by vitrivr.
the class GenericExtractionItemHandler method run.
@Override
@SuppressWarnings("unchecked")
public void run() {
LOGGER.info("Starting extraction");
this.executorService.execute(pipeline);
final ObjectIdGenerator generator = this.context.objectIdGenerator();
Pair<ExtractionItemContainer, MediaType> pair = null;
/* Initalize all Metadata Extractors */
for (MetadataExtractor extractor : this.metadataExtractors) {
LOGGER.debug("Initializing metadata extractor {}", extractor.getClass().getSimpleName());
if (extractor instanceof MetadataFeatureModule) {
((MetadataFeatureModule<?>) extractor).init(this.context.persistencyWriter());
} else {
extractor.init();
}
}
/* Process until there's nothing left*/
while ((pair = this.nextItem()) != null) {
try {
LOGGER.debug("Processing path {} and mediatype {}", pair.getLeft(), pair.getRight());
if (handlerCache.get(pair.getRight()) == null) {
LOGGER.error("Unknown mediatype {}, exiting extraction", pair.getRight());
break;
}
/* Clear non-reusable segmenter */
handlerCache.compute(pair.getRight(), (mediaType, cache) -> {
Decoder decoder = null;
if (cache.getLeft() != null) {
if (cache.getLeft().canBeReused()) {
decoder = cache.getLeft();
}
}
return ImmutablePair.of(decoder, null);
});
/* Put a new decoder in the cache if there's not one already there */
if (handlerCache.get(pair.getRight()).getLeft() == null) {
Decoder decoder = handlers.get(pair.getRight()).getLeft().get();
handlerCache.compute(pair.getRight(), (mediaType, cache) -> new ImmutablePair<>(decoder, cache.getRight()));
}
/* Put a new segmenter in the cache if there's not one already there */
if (handlerCache.get(pair.getRight()).getRight() == null) {
Segmenter segmenter = handlers.get(pair.getRight()).getRight().get();
handlerCache.compute(pair.getRight(), (mediaType, cache) -> new ImmutablePair<>(cache.getLeft(), segmenter));
}
Decoder decoder = handlerCache.get(pair.getRight()).getLeft();
Segmenter segmenter = handlers.get(pair.getRight()).getRight().get();
if (decoder.init(pair.getLeft().getPathForExtraction(), Config.sharedConfig().getDecoders().get(pair.getRight()), Config.sharedConfig().getCache())) {
/* Create / lookup MediaObjectDescriptor for new file. */
final MediaObjectDescriptor descriptor = this.fetchOrCreateMultimediaObjectDescriptor(generator, pair.getLeft(), pair.getRight());
if (!this.checkAndPersistMultimediaObject(descriptor)) {
continue;
}
final String objectId = descriptor.getObjectId();
int segmentNumber = 1;
segmenter.init(decoder, descriptor);
this.executorService.execute(segmenter);
while (!segmenter.complete()) {
try {
final SegmentContainer container = segmenter.getNext();
if (container != null) {
/* Create segment-descriptor and try to persist it. */
MediaSegmentDescriptor mediaSegmentDescriptor;
if (container.getId() != null) {
mediaSegmentDescriptor = this.fetchOrCreateSegmentDescriptor(objectId, container.getId(), segmentNumber, container.getStart(), container.getEnd(), container.getAbsoluteStart(), container.getAbsoluteEnd());
/* Special case; segment ID is determined by container (image sequences only) */
} else {
mediaSegmentDescriptor = this.fetchOrCreateSegmentDescriptor(objectId, segmentNumber, container.getStart(), container.getEnd(), container.getAbsoluteStart(), container.getAbsoluteEnd());
}
container.setId(mediaSegmentDescriptor.getSegmentId());
container.setSuperId(mediaSegmentDescriptor.getObjectId());
if (!this.checkAndPersistSegment(mediaSegmentDescriptor)) {
continue;
}
int emissionTimeout = 1000;
while (!this.pipeline.emit(container, emissionTimeout)) {
LOGGER.debug("ExtractionPipeline is full - deferring emission of segment. Consider increasing the thread-pool count for the extraction pipeline.");
Thread.sleep(emissionTimeout);
// emissionTimeout += 500;
}
segmentNumber += 1;
}
} catch (InterruptedException e) {
LOGGER.log(Level.ERROR, "Thread was interrupted while the extraction process was running. Aborting...");
break;
}
}
List<MediaObjectMetadataDescriptor> metadata = pair.getLeft().getMetadata().stream().map(el -> MediaObjectMetadataDescriptor.fromExisting(el, objectId)).collect(Collectors.toList());
this.metadataWriter.write(metadata);
/* Extract metadata. */
this.extractAndPersistMetadata(pair.getLeft(), objectId);
/* Force flush the segment, object and metadata information. */
this.mediaSegmentWriter.flush();
this.objectWriter.flush();
this.metadataWriter.flush();
} else {
LOGGER.error("Failed to initialize decoder. File is being skipped...");
}
/* Increment the files counter. */
this.count_processed += 1;
/* Create new decoder pair for a new file if the decoder reports that it cannot be reused.*/
if (!decoder.canBeReused()) {
decoder.close();
final MediaType type = pair.getRight();
handlerCache.compute(type, (mediaType, cache) -> ImmutablePair.of(handlers.get(type).getLeft().get(), cache.getRight()));
}
for (ExtractionCompleteListener completeListener : this.completeListeners) {
completeListener.onCompleted(pair.getLeft());
}
/*
* Trigger garbage collection once in a while. This is specially relevant when many small files are processed, since unused allocated memory could accumulate and trigger swapping.
*/
if (this.count_processed % 50 == 0) {
System.gc();
}
} catch (Throwable t) {
LOGGER.error("Exception while processing path {}, {}", pair.getLeft(), t.getMessage());
t.printStackTrace();
}
}
shutdown();
}
Aggregations