use of org.springframework.cloud.gcp.vision.DocumentOcrResultSet in project spring-cloud-gcp by spring-cloud.
the class DocumentOcrTemplateIntegrationTests method testParseOcrFile.
@Test
public void testParseOcrFile() throws InvalidProtocolBufferException {
GoogleStorageLocation ocrOutputFile = GoogleStorageLocation.forFile("vision-integration-test-bucket", "json_output_set/test_output-2-to-2.json");
DocumentOcrResultSet pages = this.documentOcrTemplate.readOcrOutputFile(ocrOutputFile);
String text = pages.getPage(2).getText();
assertThat(text).contains("Hello World. Is mayonnaise an instrument?");
}
use of org.springframework.cloud.gcp.vision.DocumentOcrResultSet in project spring-cloud-gcp by spring-cloud.
the class WebController method submitDocument.
@PostMapping("/submitDocument")
public ModelAndView submitDocument(@RequestParam("documentUrl") String documentUrl) throws IOException {
// Uploads the document to the GCS bucket
Resource documentResource = resourceLoader.getResource(documentUrl);
BlobId outputBlobId = BlobId.of(ocrBucket, documentResource.getFilename());
BlobInfo blobInfo = BlobInfo.newBuilder(outputBlobId).setContentType(getFileType(documentResource)).build();
try (WriteChannel writer = storage.writer(blobInfo)) {
ByteStreams.copy(documentResource.getInputStream(), Channels.newOutputStream(writer));
}
// Run OCR on the document
GoogleStorageLocation documentLocation = GoogleStorageLocation.forFile(outputBlobId.getBucket(), outputBlobId.getName());
GoogleStorageLocation outputLocation = GoogleStorageLocation.forFolder(outputBlobId.getBucket(), "ocr_results/" + documentLocation.getBlobName());
ListenableFuture<DocumentOcrResultSet> result = documentOcrTemplate.runOcrForDocument(documentLocation, outputLocation);
ocrStatusReporter.registerFuture(documentLocation.uriString(), result);
return new ModelAndView("submit_done");
}
use of org.springframework.cloud.gcp.vision.DocumentOcrResultSet in project spring-cloud-gcp by spring-cloud.
the class DocumentOcrTemplateIntegrationTests method testParseOcrResultSet.
@Test
public void testParseOcrResultSet() throws InvalidProtocolBufferException {
GoogleStorageLocation ocrOutputPrefix = GoogleStorageLocation.forFolder("vision-integration-test-bucket", "json_output_set/");
DocumentOcrResultSet result = this.documentOcrTemplate.readOcrOutputFileSet(ocrOutputPrefix);
String text = result.getPage(2).getText();
assertThat(text).contains("Hello World. Is mayonnaise an instrument?");
}
use of org.springframework.cloud.gcp.vision.DocumentOcrResultSet in project spring-cloud-gcp by spring-cloud.
the class DocumentOcrTemplateIntegrationTests method testDocumentOcrTemplate.
@Test
public void testDocumentOcrTemplate() throws ExecutionException, InterruptedException, InvalidProtocolBufferException, TimeoutException {
GoogleStorageLocation document = GoogleStorageLocation.forFile("vision-integration-test-bucket", "test.pdf");
GoogleStorageLocation outputLocationPrefix = GoogleStorageLocation.forFile("vision-integration-test-bucket", "it_output/test-");
ListenableFuture<DocumentOcrResultSet> result = this.documentOcrTemplate.runOcrForDocument(document, outputLocationPrefix);
DocumentOcrResultSet ocrPages = result.get(5, TimeUnit.MINUTES);
String page1Text = ocrPages.getPage(1).getText();
assertThat(page1Text).contains("Hello World. Is mayonnaise an instrument?");
String page2Text = ocrPages.getPage(2).getText();
assertThat(page2Text).contains("Page 2 stuff");
ArrayList<String> pageContent = new ArrayList<>();
Iterator<TextAnnotation> pageIterator = ocrPages.getAllPages();
while (pageIterator.hasNext()) {
pageContent.add(pageIterator.next().getText());
}
assertThat(pageContent).containsExactly("Hello World. Is mayonnaise an instrument?\n", "Page 2 stuff\n", "Page 3 stuff\n", "Page 4 stuff\n");
}
Aggregations