use of org.apache.commons.io.input.CloseShieldInputStream in project lucene-solr by apache.
the class HttpSolrCall method remoteQuery.
private void remoteQuery(String coreUrl, HttpServletResponse resp) throws IOException {
HttpRequestBase method = null;
HttpEntity httpEntity = null;
try {
String urlstr = coreUrl + queryParams.toQueryString();
boolean isPostOrPutRequest = "POST".equals(req.getMethod()) || "PUT".equals(req.getMethod());
if ("GET".equals(req.getMethod())) {
method = new HttpGet(urlstr);
} else if ("HEAD".equals(req.getMethod())) {
method = new HttpHead(urlstr);
} else if (isPostOrPutRequest) {
HttpEntityEnclosingRequestBase entityRequest = "POST".equals(req.getMethod()) ? new HttpPost(urlstr) : new HttpPut(urlstr);
// Prevent close of container streams
InputStream in = new CloseShieldInputStream(req.getInputStream());
HttpEntity entity = new InputStreamEntity(in, req.getContentLength());
entityRequest.setEntity(entity);
method = entityRequest;
} else if ("DELETE".equals(req.getMethod())) {
method = new HttpDelete(urlstr);
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unexpected method type: " + req.getMethod());
}
for (Enumeration<String> e = req.getHeaderNames(); e.hasMoreElements(); ) {
String headerName = e.nextElement();
if (!"host".equalsIgnoreCase(headerName) && !"authorization".equalsIgnoreCase(headerName) && !"accept".equalsIgnoreCase(headerName)) {
method.addHeader(headerName, req.getHeader(headerName));
}
}
// These headers not supported for HttpEntityEnclosingRequests
if (method instanceof HttpEntityEnclosingRequest) {
method.removeHeaders(TRANSFER_ENCODING_HEADER);
method.removeHeaders(CONTENT_LENGTH_HEADER);
}
final HttpResponse response = solrDispatchFilter.httpClient.execute(method, HttpClientUtil.createNewHttpClientRequestContext());
int httpStatus = response.getStatusLine().getStatusCode();
httpEntity = response.getEntity();
resp.setStatus(httpStatus);
for (HeaderIterator responseHeaders = response.headerIterator(); responseHeaders.hasNext(); ) {
Header header = responseHeaders.nextHeader();
// encoding issues with Tomcat
if (header != null && !header.getName().equalsIgnoreCase(TRANSFER_ENCODING_HEADER) && !header.getName().equalsIgnoreCase(CONNECTION_HEADER)) {
resp.addHeader(header.getName(), header.getValue());
}
}
if (httpEntity != null) {
if (httpEntity.getContentEncoding() != null)
resp.setCharacterEncoding(httpEntity.getContentEncoding().getValue());
if (httpEntity.getContentType() != null)
resp.setContentType(httpEntity.getContentType().getValue());
InputStream is = httpEntity.getContent();
OutputStream os = resp.getOutputStream();
IOUtils.copyLarge(is, os);
}
} catch (IOException e) {
sendError(new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error trying to proxy request for url: " + coreUrl, e));
} finally {
Utils.consumeFully(httpEntity);
}
}
use of org.apache.commons.io.input.CloseShieldInputStream in project tika by apache.
the class Word2006MLParser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
//set OfficeParserConfig if the user hasn't specified one
configure(context);
final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
try {
context.getSAXParser().parse(new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler(new Word2006MLDocHandler(xhtml, metadata, context))));
} catch (SAXException e) {
throw new TikaException("XML parse error", e);
} finally {
xhtml.endDocument();
}
}
use of org.apache.commons.io.input.CloseShieldInputStream in project tika by apache.
the class AbstractXML2003Parser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
setContentType(metadata);
final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
TaggedContentHandler tagged = new TaggedContentHandler(xhtml);
try {
context.getSAXParser().parse(new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler(getContentHandler(tagged, metadata, context))));
} catch (SAXException e) {
tagged.throwIfCauseOf(e);
throw new TikaException("XML parse error", e);
} finally {
xhtml.endDocument();
}
}
use of org.apache.commons.io.input.CloseShieldInputStream in project tika by apache.
the class OOXMLExtractorFactory method parse.
public static void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
Locale locale = context.get(Locale.class, Locale.getDefault());
ExtractorFactory.setThreadPrefersEventExtractors(true);
try {
OOXMLExtractor extractor;
OPCPackage pkg;
// Locate or Open the OPCPackage for the file
TikaInputStream tis = TikaInputStream.cast(stream);
if (tis != null && tis.getOpenContainer() instanceof OPCPackage) {
pkg = (OPCPackage) tis.getOpenContainer();
} else if (tis != null && tis.hasFile()) {
pkg = OPCPackage.open(tis.getFile().getPath(), PackageAccess.READ);
tis.setOpenContainer(pkg);
} else {
InputStream shield = new CloseShieldInputStream(stream);
pkg = OPCPackage.open(shield);
}
// Get the type, and ensure it's one we handle
MediaType type = ZipContainerDetector.detectOfficeOpenXML(pkg);
if (type == null || OOXMLParser.UNSUPPORTED_OOXML_TYPES.contains(type)) {
// Not a supported type, delegate to Empty Parser
EmptyParser.INSTANCE.parse(stream, baseHandler, metadata, context);
return;
}
metadata.set(Metadata.CONTENT_TYPE, type.toString());
// Have the appropriate OOXML text extractor picked
POIXMLTextExtractor poiExtractor = null;
// This has already been set by OOXMLParser's call to configure()
// We can rely on this being non-null.
OfficeParserConfig config = context.get(OfficeParserConfig.class);
if (config.getUseSAXDocxExtractor()) {
poiExtractor = trySXWPF(pkg);
}
if (poiExtractor == null && config.getUseSAXPptxExtractor()) {
poiExtractor = trySXSLF(pkg);
}
if (poiExtractor == null) {
poiExtractor = ExtractorFactory.createExtractor(pkg);
}
POIXMLDocument document = poiExtractor.getDocument();
if (poiExtractor instanceof XSSFBEventBasedExcelExtractor) {
extractor = new XSSFBExcelExtractorDecorator(context, poiExtractor, locale);
} else if (poiExtractor instanceof XSSFEventBasedExcelExtractor) {
extractor = new XSSFExcelExtractorDecorator(context, poiExtractor, locale);
} else if (poiExtractor instanceof XWPFEventBasedWordExtractor) {
extractor = new SXWPFWordExtractorDecorator(metadata, context, (XWPFEventBasedWordExtractor) poiExtractor);
metadata.add("X-Parsed-By", XWPFEventBasedWordExtractor.class.getCanonicalName());
} else if (poiExtractor instanceof XSLFEventBasedPowerPointExtractor) {
extractor = new SXSLFPowerPointExtractorDecorator(metadata, context, (XSLFEventBasedPowerPointExtractor) poiExtractor);
metadata.add("X-Parsed-By", XSLFEventBasedPowerPointExtractor.class.getCanonicalName());
} else if (document == null) {
throw new TikaException("Expecting UserModel based POI OOXML extractor with a document, but none found. " + "The extractor returned was a " + poiExtractor);
} else if (document instanceof XMLSlideShow) {
extractor = new XSLFPowerPointExtractorDecorator(context, (org.apache.poi.xslf.extractor.XSLFPowerPointExtractor) poiExtractor);
} else if (document instanceof XWPFDocument) {
extractor = new XWPFWordExtractorDecorator(context, (XWPFWordExtractor) poiExtractor);
} else {
extractor = new POIXMLTextExtractorDecorator(context, poiExtractor);
}
// Get the bulk of the metadata first, so that it's accessible during
// parsing if desired by the client (see TIKA-1109)
extractor.getMetadataExtractor().extract(metadata);
// Extract the text, along with any in-document metadata
extractor.getXHTML(baseHandler, metadata, context);
} catch (IllegalArgumentException e) {
if (e.getMessage() != null && e.getMessage().startsWith("No supported documents found")) {
throw new TikaException("TIKA-418: RuntimeException while getting content" + " for thmx and xps file types", e);
} else {
throw new TikaException("Error creating OOXML extractor", e);
}
} catch (InvalidFormatException e) {
throw new TikaException("Error creating OOXML extractor", e);
} catch (OpenXML4JException e) {
throw new TikaException("Error creating OOXML extractor", e);
} catch (XmlException e) {
throw new TikaException("Error creating OOXML extractor", e);
}
}
use of org.apache.commons.io.input.CloseShieldInputStream in project tika by apache.
the class OpenDocumentContentParser method parseInternal.
void parseInternal(InputStream stream, final ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
DefaultHandler dh = new OpenDocumentElementMappingContentHandler(handler, MAPPINGS);
SAXParser parser = context.getSAXParser();
parser.parse(new CloseShieldInputStream(stream), new OfflineContentHandler(new NSNormalizerContentHandler(dh)));
}
Aggregations