use of org.bytedeco.javacpp.BytePointer in project cs-actions by CloudSlang.
the class OcrService method extractAllText.
private static String extractAllText(TessBaseAPI api) throws Exception {
final BytePointer outText;
final String result;
outText = api.GetUTF8Text();
if (outText == null) {
throw new Exception(TESSERACT_PARSE_ERROR);
}
result = outText.getString();
outText.deallocate();
return result;
}
use of org.bytedeco.javacpp.BytePointer in project cineast by vitrivr.
the class FFMpegVideoDecoder method initVideo.
/**
* Initializes the video decoding part of FFMPEG.
*
* @param config The {@link DecoderConfig} used for configuring the {@link FFMpegVideoDecoder}.
* @return True if vide decoder was initialized, false otherwise.
*/
private boolean initVideo(DecoderConfig config) {
/* Read decoder config (VIDEO). */
int maxWidth = config.namedAsInt(CONFIG_MAXWIDTH_PROPERTY, CONFIG_MAXWIDTH_DEFAULT);
int maxHeight = config.namedAsInt(CONFIG_HEIGHT_PROPERTY, CONFIG_MAXHEIGHT_DEFAULT);
/* Find the best video stream. */
final AVCodec codec = avcodec.av_codec_iterate(new Pointer());
this.videoStream = avformat.av_find_best_stream(this.pFormatCtx, avutil.AVMEDIA_TYPE_VIDEO, -1, -1, codec, 0);
if (this.videoStream == -1) {
LOGGER.error("Couldn't find a video stream.");
return false;
}
/* Allocate new codec-context for codec returned by av_find_best_stream(). */
this.pCodecCtxVideo = avcodec.avcodec_alloc_context3(codec);
avcodec.avcodec_parameters_to_context(this.pCodecCtxVideo, this.pFormatCtx.streams(this.videoStream).codecpar());
/* Open the code context. */
if (avcodec.avcodec_open2(this.pCodecCtxVideo, codec, (AVDictionary) null) < 0) {
LOGGER.error("Error, Could not open video codec.");
return false;
}
/* Allocate an AVFrame structure that will hold the resized video. */
this.pFrameRGB = avutil.av_frame_alloc();
if (pFrameRGB == null) {
LOGGER.error("Error. Could not allocate frame for resized video.");
return false;
}
int originalWidth = pCodecCtxVideo.width();
int originalHeight = pCodecCtxVideo.height();
int width = originalWidth;
int height = originalHeight;
if (originalWidth > maxWidth || originalHeight > maxHeight) {
float scaleDown = Math.min((float) maxWidth / (float) originalWidth, (float) maxHeight / (float) originalHeight);
width = Math.round(originalWidth * scaleDown);
height = Math.round(originalHeight * scaleDown);
LOGGER.debug("scaling input video down by a factor of {} from {}x{} to {}x{}", scaleDown, originalWidth, originalHeight, width, height);
}
bytes = new byte[width * height * 3];
pixels = new int[width * height];
/* Initialize data-structures used for resized image. */
int numBytes = avutil.av_image_get_buffer_size(avutil.AV_PIX_FMT_RGB24, pCodecCtxVideo.width(), pCodecCtxVideo.height(), 1);
this.buffer = new BytePointer(avutil.av_malloc(numBytes));
avutil.av_image_fill_arrays(this.pFrameRGB.data(), this.pFrameRGB.linesize(), this.buffer, avutil.AV_PIX_FMT_RGB24, width, height, 1);
/* Initialize SWS Context. */
this.sws_ctx = swscale.sws_getContext(this.pCodecCtxVideo.width(), this.pCodecCtxVideo.height(), this.pCodecCtxVideo.pix_fmt(), width, height, avutil.AV_PIX_FMT_RGB24, swscale.SWS_BILINEAR, null, null, (DoublePointer) null);
/* Initialize VideoDescriptor. */
AVRational timebase = this.pFormatCtx.streams(this.videoStream).time_base();
long duration = (1000L * timebase.num() * this.pFormatCtx.streams(this.videoStream).duration() / timebase.den());
AVRational framerate = this.pFormatCtx.streams(this.videoStream).avg_frame_rate();
float fps = ((float) framerate.num()) / ((float) framerate.den());
this.videoDescriptor = new VideoDescriptor(fps, duration, width, height);
/* Return true (success). */
return true;
}
use of org.bytedeco.javacpp.BytePointer in project djl by deepjavalibrary.
the class JavacppUtils method createEagerSession.
@SuppressWarnings({ "unchecked", "try" })
public static TFE_Context createEagerSession(boolean async, int devicePlacementPolicy, ConfigProto config) {
try (PointerScope ignored = new PointerScope()) {
TFE_ContextOptions opts = TFE_ContextOptions.newContextOptions();
TF_Status status = TF_Status.newStatus();
if (config != null) {
BytePointer configBytes = new BytePointer(config.toByteArray());
tensorflow.TFE_ContextOptionsSetConfig(opts, configBytes, configBytes.capacity(), status);
status.throwExceptionIfNotOK();
}
tensorflow.TFE_ContextOptionsSetAsync(opts, (byte) (async ? 1 : 0));
tensorflow.TFE_ContextOptionsSetDevicePlacementPolicy(opts, devicePlacementPolicy);
TFE_Context context = AbstractTFE_Context.newContext(opts, status);
status.throwExceptionIfNotOK();
return context.retainReference();
}
}
use of org.bytedeco.javacpp.BytePointer in project kurento-java by Kurento.
the class BrowserTest method ocr.
public String ocr(BufferedImage imgBuff) {
String parsedOut = null;
try {
// Color image to pure black and white
for (int x = 0; x < imgBuff.getWidth(); x++) {
for (int y = 0; y < imgBuff.getHeight(); y++) {
Color color = new Color(imgBuff.getRGB(x, y));
int red = color.getRed();
int green = color.getBlue();
int blue = color.getGreen();
if (red + green + blue > OCR_COLOR_THRESHOLD) {
// Black
red = green = blue = 0;
} else {
// White
red = green = blue = 255;
}
Color col = new Color(red, green, blue);
imgBuff.setRGB(x, y, col.getRGB());
}
}
// OCR recognition
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(imgBuff, "png", baos);
byte[] imageBytes = baos.toByteArray();
TessBaseAPI api = new TessBaseAPI();
api.Init(null, "eng");
ByteBuffer imgBB = ByteBuffer.wrap(imageBytes);
PIX image = pixReadMem(imgBB, imageBytes.length);
api.SetImage(image);
// Get OCR result
BytePointer outText = api.GetUTF8Text();
// Destroy used object and release memory
api.End();
api.close();
outText.deallocate();
pixDestroy(image);
// OCR corrections
parsedOut = outText.getString().replaceAll("l", "1").replaceAll("Z", "2").replaceAll("O", "0").replaceAll("B", "8").replaceAll("G", "6").replaceAll("S", "8").replaceAll("'", "").replaceAll("‘", "").replaceAll("\\.", ":").replaceAll("E", "8").replaceAll("o", "0").replaceAll("fl", "0").replaceAll("fi", "6").replaceAll("§", "5").replaceAll("I", "1").replaceAll("T", "7").replaceAll("’", "").replaceAll("U", "0").replaceAll("D", "0");
if (parsedOut.length() > 7) {
parsedOut = parsedOut.substring(0, 7) + ":" + parsedOut.substring(8, parsedOut.length());
}
parsedOut = parsedOut.replaceAll("::", ":");
// Remove last part (number of frames)
int iSpace = parsedOut.lastIndexOf(" ");
if (iSpace != -1) {
parsedOut = parsedOut.substring(0, iSpace);
}
} catch (IOException e) {
log.warn("IOException in OCR", e);
}
return parsedOut;
}
use of org.bytedeco.javacpp.BytePointer in project myrobotlab by MyRobotLab.
the class OpenCVFilterFaceRecognizer method train.
/**
* This method will load all of the image files in a directory. The filename
* will be parsed for the label to apply to the image. At least 2 different
* labels must exist in the training set.
*
* @return true if the training was successful.
* @throws IOException
*/
public boolean train() throws IOException {
//
if (facemask == null) {
File filterfile = new File("resource/facerec/Filter.png");
if (!filterfile.exists()) {
// work around to fix this so it works in eclipse
filterfile = new File("src/resource/facerec/Filter.png");
}
//
if (!filterfile.exists()) {
log.warn("No image filter file found. {}", filterfile.getAbsolutePath());
} else {
// Read the filter and rescale it to the current image size
// BytePointer fbp = new BytePointer(FileUtils.getFileAsBytes(filterfile.getAbsolutePath()));
// Mat incomingfacemask = imread(fbp, CV_LOAD_IMAGE_GRAYSCALE);
Mat incomingfacemask = imread(filterfile.getAbsolutePath(), CV_LOAD_IMAGE_GRAYSCALE);
facemask = resizeImage(incomingfacemask);
if (debug) {
show(facemask, "Face Mask");
}
}
}
File root = new File(trainingDir);
if (root.isFile()) {
log.warn("Training directory was a file, not a directory. {}", root.getAbsolutePath());
return false;
}
if (!root.exists()) {
log.info("Creating new training directory {}", root.getAbsolutePath());
root.mkdirs();
}
log.info("Using {} for training data.", root.getAbsolutePath());
ArrayList<File> imageFiles = listImageFiles(root);
if (imageFiles.size() < 1) {
log.info("No images found for training.");
return false;
}
// Storage for the files that we load.
MatVector images = new MatVector(imageFiles.size());
// storage for the labels for the images
Mat labels = new Mat(imageFiles.size(), 1, CV_32SC1);
IntBuffer labelsBuf = labels.getIntBuffer();
int counter = 0;
// a map between the hashcode and the string label
HashMap<Integer, String> idToLabelMap = new HashMap<Integer, String>();
for (File image : imageFiles) {
// load the image
log.info("Loading training image file: {}", image.getAbsolutePath());
// we know that imread doesn't work with non-ascii file paths.. so we want to use a different
// so, load the image into memory, warp it in a byte pointer and pass it to imdecode to load the image from memory, instead of from disk
byte[] tmpImg = FileUtils.getFileAsBytes(image);
Mat img = imdecode(new Mat(new BytePointer(tmpImg)), CV_LOAD_IMAGE_GRAYSCALE);
// IplImage tempImg = cvLoadImage(image.getAbsolutePath());
// Mat img = converterToMat.convertToMat(converterToMat.convert(tempImg));
// The directory name is the label.
String personName = image.getParentFile().getName();
// String personName = UnicodeFolder.get(image.getParentFile().getName());
// TODO: we need an integer to represent this string .. for now we're
// using a hashcode here.
// this can definitely have a collision!
// we really need a better metadata store for these images.
int label = personName.hashCode();
// make sure all our test images are resized
Mat resized = resizeImage(img);
//
if (facemask != null) {
Mat maskedface = facemask.clone();
resized.copyTo(maskedface, facemask);
resized = maskedface;
}
// TODO: add a debug method to show the image
if (debug) {
show(resized, personName);
}
// TODO: our training images are indexed by integer,
images.put(counter, resized);
labelsBuf.put(counter, label);
// keep track of what string the hash code maps to.
idToLabelMap.put(label, personName);
counter++;
}
initRecognizer();
// must be at least 2 things to classify, is it A or B ?
if (idToLabelMap.keySet().size() > 1) {
faceRecognizer.train(images, labels);
trained = true;
} else {
log.info("No labeled images loaded. training skipped.");
trained = false;
}
// populate the human readable labels.
for (int k : idToLabelMap.keySet()) {
faceRecognizer.setLabelInfo(k, idToLabelMap.get(k));
}
return true;
}
Aggregations