use of org.apache.uima.util.XMLInputSource in project tika by apache.
the class CTAKESUtils method getAnalysisEngine.
/**
* Returns a new UIMA Analysis Engine (AE). This method ensures that only
* one instance of an AE is created.
*
* <p>
* An Analysis Engine is a component responsible for analyzing unstructured
* information, discovering and representing semantic content. Unstructured
* information includes, but is not restricted to, text documents.
* </p>
*
* @param aeDescriptor
* pathname for XML file including an AnalysisEngineDescription
* that contains all of the information needed to instantiate and
* use an AnalysisEngine.
* @param umlsUser
* UMLS username for NLM database
* @param umlsPass
* UMLS password for NLM database
* @return an Analysis Engine for analyzing unstructured information.
* @throws IOException
* if any I/O error occurs.
* @throws InvalidXMLException
* if the input XML is not valid or does not specify a valid
* ResourceSpecifier.
* @throws ResourceInitializationException
* if a failure occurred during production of the resource.
* @throws URISyntaxException
* if URL of the resource is not formatted strictly according to
* to RFC2396 and cannot be converted to a URI.
*/
public static AnalysisEngine getAnalysisEngine(String aeDescriptor, String umlsUser, String umlsPass) throws IOException, InvalidXMLException, ResourceInitializationException, URISyntaxException {
// UMLS user ID and password.
String aeDescriptorPath = CTAKESUtils.class.getResource(aeDescriptor).toURI().getPath();
// get Resource Specifier from XML
XMLInputSource aeIputSource = new XMLInputSource(aeDescriptorPath);
ResourceSpecifier aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(aeIputSource);
// UMLS user ID and password
if ((umlsUser != null) && (!umlsUser.isEmpty()) && (umlsPass != null) && (!umlsPass.isEmpty())) {
/*
* It is highly recommended that you change UMLS credentials in the
* XML configuration file instead of giving user and password using
* CTAKESConfig.
*/
System.setProperty(CTAKES_UMLS_USER, umlsUser);
System.setProperty(CTAKES_UMLS_PASS, umlsPass);
}
// create AE
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(aeSpecifier);
return ae;
}
use of org.apache.uima.util.XMLInputSource in project lucene-solr by apache.
the class BasicAEProvider method getAE.
@Override
public AnalysisEngine getAE() throws ResourceInitializationException {
synchronized (this) {
if (cachedDescription == null) {
XMLInputSource in = null;
boolean success = false;
try {
// get Resource Specifier from XML file
in = getInputSource();
// get AE description
cachedDescription = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(in);
configureDescription(cachedDescription);
success = true;
} catch (Exception e) {
throw new ResourceInitializationException(e);
} finally {
if (success) {
try {
IOUtils.close(in.getInputStream());
} catch (IOException e) {
throw new ResourceInitializationException(e);
}
} else if (in != null) {
IOUtils.closeWhileHandlingException(in.getInputStream());
}
}
}
}
return UIMAFramework.produceAnalysisEngine(cachedDescription);
}
Aggregations