use of org.apache.tika.exception.TikaException in project tika by apache.
the class DBFReader method fillRow.
//returns whether or not some content was read.
//it might not be complete!
private boolean fillRow(DBFRow row) throws IOException, TikaException {
if (row == null) {
return false;
}
DBFCell[] cells = row.cells;
int isDeletedByte = is.read();
boolean isDeleted = false;
if (isDeletedByte == 32) {
//all ok
} else if (isDeletedByte == 42) {
//asterisk
isDeleted = true;
} else if (isDeletedByte == 26) {
//marker for end of dbf file
return false;
} else if (isDeletedByte == -1) {
//truncated file
if (DBFReader.STRICT) {
throw new IOException("EOF reached too early");
}
return false;
} else {
throw new TikaException("Expecting space or asterisk at beginning of record, not:" + isDeletedByte);
}
row.setDeleted(isDeleted);
boolean readSomeContent = false;
for (int i = 0; i < cells.length; i++) {
if (cells[i].read(is)) {
readSomeContent = true;
}
}
return readSomeContent;
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class DWGParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, TikaException, SAXException {
// First up, which version of the format are we handling?
byte[] header = new byte[128];
IOUtils.readFully(stream, header);
String version = new String(header, 0, 6, "US-ASCII");
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
if (version.equals("AC1015")) {
metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
if (skipTo2000PropertyInfoSection(stream, header)) {
get2000Props(stream, metadata, xhtml);
}
} else if (version.equals("AC1018")) {
metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
if (skipToPropertyInfoSection(stream, header)) {
get2004Props(stream, metadata, xhtml);
}
} else if (version.equals("AC1021") || version.equals("AC1024")) {
metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
if (skipToPropertyInfoSection(stream, header)) {
get2007and2010Props(stream, metadata, xhtml);
}
} else {
throw new TikaException("Unsupported AutoCAD drawing version: " + version);
}
xhtml.endDocument();
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class ImageMetadataExtractor method parseTiff.
public void parseTiff(File file) throws IOException, SAXException, TikaException {
try {
com.drew.metadata.Metadata tiffMetadata = TiffMetadataReader.readMetadata(file);
handle(tiffMetadata);
} catch (MetadataException e) {
throw new TikaException("Can't read TIFF metadata", e);
} catch (TiffProcessingException e) {
throw new TikaException("Can't read TIFF metadata", e);
}
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class ImageMetadataExtractor method parseRawExif.
public void parseRawExif(byte[] exifData) throws IOException, SAXException, TikaException {
com.drew.metadata.Metadata metadata = new com.drew.metadata.Metadata();
ExifReader reader = new ExifReader();
reader.extract(new ByteArrayReader(exifData), metadata, ExifReader.JPEG_SEGMENT_PREAMBLE.length());
try {
handle(metadata);
} catch (MetadataException e) {
throw new TikaException("Can't process the EXIF Data", e);
}
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class Tika method parseToString.
/**
* Parses the given document and returns the extracted text content.
* The given input stream is closed by this method. This method lets
* you control the maxStringLength per call.
* <p>
* To avoid unpredictable excess memory use, the returned string contains
* only up to maxLength (parameter) first characters extracted
* from the input document.
* <p>
* <strong>NOTE:</strong> Unlike most other Tika methods that take an
* {@link InputStream}, this method will close the given stream for
* you as a convenience. With other methods you are still responsible
* for closing the stream or a wrapper instance returned by Tika.
*
* @param stream the document to be parsed
* @param metadata document metadata
* @param maxLength maximum length of the returned string
* @return extracted text content
* @throws IOException if the document can not be read
* @throws TikaException if the document can not be parsed
*/
public String parseToString(InputStream stream, Metadata metadata, int maxLength) throws IOException, TikaException {
WriteOutContentHandler handler = new WriteOutContentHandler(maxLength);
try {
ParseContext context = new ParseContext();
context.set(Parser.class, parser);
parser.parse(stream, new BodyContentHandler(handler), metadata, context);
} catch (SAXException e) {
if (!handler.isWriteLimitReached(e)) {
// This should never happen with BodyContentHandler...
throw new TikaException("Unexpected SAX processing failure", e);
}
} finally {
stream.close();
}
return handler.toString();
}
Aggregations