use of org.apache.tika.mime.MediaType in project tika by apache.
the class TesseractOCRParserTest method offersNoTypesIfNotFound.
/*
Check that if Tesseract is not found, the TesseractOCRParser claims to not support
any file types. So, the standard image parser is called instead.
*/
@Test
public void offersNoTypesIfNotFound() throws Exception {
TesseractOCRParser parser = new TesseractOCRParser();
DefaultParser defaultParser = new DefaultParser();
MediaType png = MediaType.image("png");
// With an invalid path, will offer no types
TesseractOCRConfig invalidConfig = new TesseractOCRConfig();
invalidConfig.setTesseractPath("/made/up/path");
ParseContext parseContext = new ParseContext();
parseContext.set(TesseractOCRConfig.class, invalidConfig);
// No types offered
assertEquals(0, parser.getSupportedTypes(parseContext).size());
// And DefaultParser won't use us
assertEquals(ImageParser.class, defaultParser.getParsers(parseContext).get(png).getClass());
}
use of org.apache.tika.mime.MediaType in project tika by apache.
the class PDFParserTest method testInitializationOfNonPrimitivesViaConfig.
@Test
public void testInitializationOfNonPrimitivesViaConfig() throws Exception {
InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/pdf/tika-config-non-primitives.xml");
assertNotNull(is);
TikaConfig tikaConfig = new TikaConfig(is);
AutoDetectParser p = new AutoDetectParser(tikaConfig);
Map<MediaType, Parser> parsers = p.getParsers();
Parser composite = parsers.get(MediaType.application("pdf"));
Parser pdfParser = ((CompositeParser) composite).getParsers().get(MediaType.application("pdf"));
assertEquals("org.apache.tika.parser.pdf.PDFParser", pdfParser.getClass().getName());
assertEquals(PDFParserConfig.OCR_STRATEGY.OCR_ONLY, ((PDFParser) pdfParser).getPDFParserConfig().getOcrStrategy());
assertEquals(ImageType.RGB, ((PDFParser) pdfParser).getPDFParserConfig().getOcrImageType());
}
use of org.apache.tika.mime.MediaType in project uPortal by Jasig.
the class JaxbPortalDataHandlerService method getMediaType.
protected MediaType getMediaType(BufferedInputStream inputStream, String fileName) throws IOException {
final TikaInputStream tikaInputStreamStream = TikaInputStream.get(new CloseShieldInputStream(inputStream));
try {
final Detector detector = new DefaultDetector();
final Metadata metadata = new Metadata();
metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
final MediaType type = detector.detect(tikaInputStreamStream, metadata);
logger.debug("Determined '{}' for '{}'", type, fileName);
return type;
} catch (IOException e) {
logger.warn("Failed to determine media type for '" + fileName + "' assuming XML", e);
return null;
} finally {
IOUtils.closeQuietly(tikaInputStreamStream);
// Reset the buffered stream to make up for anything read by the detector
inputStream.reset();
}
}
use of org.apache.tika.mime.MediaType in project winery by eclipse.
the class WriterUtils method storeDefinitions.
public static void storeDefinitions(Definitions definitions, boolean overwrite, Path dir) {
Path path = null;
try {
path = Files.createTempDirectory("winery");
} catch (IOException e) {
e.printStackTrace();
}
LOGGER.debug("Store definition: {}", definitions.getId());
saveDefinitions(definitions, path, definitions.getTargetNamespace(), definitions.getId());
Definitions cleanDefinitions = loadDefinitions(path, definitions.getTargetNamespace(), definitions.getId());
CsarImporter csarImporter = new CsarImporter();
List<Exception> exceptions = new ArrayList<>();
cleanDefinitions.getServiceTemplateOrNodeTypeOrNodeTypeImplementation().forEach(entry -> {
String namespace = csarImporter.getNamespace(entry, definitions.getTargetNamespace());
csarImporter.setNamespace(entry, namespace);
String id = ModelUtilities.getId(entry);
Class<? extends DefinitionsChildId> widClazz = Util.getComponentIdClassForTExtensibleElements(entry.getClass());
final DefinitionsChildId wid = BackendUtils.getDefinitionsChildId(widClazz, namespace, id, false);
if (RepositoryFactory.getRepository().exists(wid)) {
if (overwrite) {
try {
RepositoryFactory.getRepository().forceDelete(wid);
} catch (IOException e) {
exceptions.add(e);
}
} else {
return;
}
}
if (entry instanceof TArtifactTemplate) {
TArtifactTemplate.ArtifactReferences artifactReferences = ((TArtifactTemplate) entry).getArtifactReferences();
Stream.of(artifactReferences).filter(Objects::nonNull).flatMap(ref -> ref.getArtifactReference().stream()).filter(Objects::nonNull).forEach(ref -> {
String reference = ref.getReference();
URI refURI;
try {
refURI = new URI(reference);
} catch (URISyntaxException e) {
LOGGER.error("Invalid URI {}", reference);
return;
}
if (refURI.isAbsolute()) {
return;
}
Path artifactPath = dir.resolve(reference);
if (!Files.exists(artifactPath)) {
LOGGER.error("File not found {}", artifactPath);
return;
}
ArtifactTemplateFilesDirectoryId aDir = new ArtifactTemplateFilesDirectoryId((ArtifactTemplateId) wid);
RepositoryFileReference aFile = new RepositoryFileReference(aDir, artifactPath.getFileName().toString());
MediaType mediaType = null;
try (InputStream is = Files.newInputStream(artifactPath);
BufferedInputStream bis = new BufferedInputStream(is)) {
mediaType = BackendUtils.getMimeType(bis, artifactPath.getFileName().toString());
RepositoryFactory.getRepository().putContentToFile(aFile, bis, mediaType);
} catch (IOException e) {
LOGGER.error("Could not read artifact template file: {}", artifactPath);
return;
}
});
}
final Definitions part = BackendUtils.createWrapperDefinitions(wid);
part.getServiceTemplateOrNodeTypeOrNodeTypeImplementation().add(entry);
RepositoryFileReference ref = BackendUtils.getRefOfDefinitions(wid);
String content = BackendUtils.getXMLAsString(part, true);
try {
RepositoryFactory.getRepository().putContentToFile(ref, content, MediaTypes.MEDIATYPE_TOSCA_DEFINITIONS);
} catch (Exception e) {
exceptions.add(e);
}
});
}
use of org.apache.tika.mime.MediaType in project winery by eclipse.
the class CsarImporter method importOtherImport.
/**
* SIDE EFFECT: modifies the location of imp to point to the correct relative location (when read from the exported
* CSAR)
*
* @param rootPath the absolute path where to resolve files from
*/
private void importOtherImport(Path rootPath, TImport imp, final List<String> errors, String type, boolean overwrite) {
assert (!type.equals(Namespaces.TOSCA_NAMESPACE));
String loc = imp.getLocation();
if (!Util.isRelativeURI(loc)) {
// This is just an information message
errors.add("Absolute URIs are not resolved by Winery (" + loc + ")");
return;
}
// location URLs are encoded: http://www.w3.org/TR/2001/WD-charmod-20010126/#sec-URIs, RFC http://www.ietf.org/rfc/rfc2396.txt
loc = Util.URLdecode(loc);
Path path;
try {
path = rootPath.resolve(loc);
} catch (Exception e) {
// java.nio.file.InvalidPathException could be thrown which is a RuntimeException
errors.add(e.getMessage());
return;
}
if (!Files.exists(path)) {
// fallback for older CSARs, where the location is given from the root
path = rootPath.getParent().resolve(loc);
if (!Files.exists(path)) {
errors.add(String.format("File %1$s does not exist", loc));
return;
}
}
String namespace = imp.getNamespace();
String fileName = path.getFileName().toString();
String id = fileName;
id = FilenameUtils.removeExtension(id);
// Convention: id of import is filename without extension
GenericImportId rid;
if (type.equals(XMLConstants.W3C_XML_SCHEMA_NS_URI)) {
rid = new XSDImportId(namespace, id, false);
} else {
rid = new GenericImportId(namespace, id, false, type);
}
boolean importDataExistsInRepo = RepositoryFactory.getRepository().exists(rid);
if (!importDataExistsInRepo) {
// We have to
// a) create a .definitions file
// b) put the file itself in the repo
// Create the definitions file
TDefinitions defs = BackendUtils.createWrapperDefinitions(rid);
defs.getImport().add(imp);
// QUICK HACK: We change the imp object's location here and below again
// This is "OK" as "storeDefinitions" serializes the current state and not the future state of the imp object
// change the location to point to the file in the folder of the .definitions file
imp.setLocation(fileName);
// put the definitions file to the repository
CsarImporter.storeDefinitions(rid, defs);
}
// put the file itself to the repo
// ref is required to generate fileRef
RepositoryFileReference ref = BackendUtils.getRefOfDefinitions(rid);
RepositoryFileReference fileRef = new RepositoryFileReference(ref.getParent(), fileName);
// location is relative to Definitions/
// even if the import already exists, we have to adapt the path
// URIs are encoded
String newLoc = "../" + Util.getUrlPath(fileRef);
imp.setLocation(newLoc);
if (!importDataExistsInRepo || overwrite) {
// finally write the file to the storage
try (InputStream is = Files.newInputStream(path);
BufferedInputStream bis = new BufferedInputStream(is)) {
MediaType mediaType;
if (type.equals(XMLConstants.W3C_XML_SCHEMA_NS_URI)) {
mediaType = MediaTypes.MEDIATYPE_XSD;
} else {
mediaType = BackendUtils.getMimeType(bis, path.getFileName().toString());
}
RepositoryFactory.getRepository().putContentToFile(fileRef, bis, mediaType);
} catch (IllegalArgumentException | IOException e) {
throw new IllegalStateException(e);
}
// we have to update the cache in case of a new XSD to speedup usage of winery
if (rid instanceof XSDImportId) {
// We do the initialization asynchronously
// We do not check whether the XSD has already been checked
// We cannot just checck whether an XSD already has been handled since the XSD could change over time
// Synchronization at org.eclipse.winery.repository.resources.imports.xsdimports.XSDImportResource.getAllDefinedLocalNames(short) also isn't feasible as the backend doesn't support locks
CsarImporter.xsdParsingService.submit(() -> {
CsarImporter.LOGGER.debug("Updating XSD import cache data");
// We call the queries without storing the result:
// We use the SIDEEFFECT that a cache is created
final XsdImportManager xsdImportManager = RepositoryFactory.getRepository().getXsdImportManager();
xsdImportManager.getAllDeclaredElementsLocalNames();
xsdImportManager.getAllDefinedTypesLocalNames();
CsarImporter.LOGGER.debug("Updated XSD import cache data");
});
}
}
}
Aggregations