use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class AsterixSource method open.
@Override
public void open() throws TexeraException {
if (cursor == OPENED) {
return;
}
try {
String asterixAddress = "http://" + predicate.getHost() + ":" + predicate.getPort() + "/query/service";
String asterixQuery = generateAsterixQuery(predicate);
HttpResponse<JsonNode> jsonResponse = Unirest.post(asterixAddress).queryString("statement", asterixQuery).field("mode", "immediate").asJson();
// if status is 200 OK, store the results
if (jsonResponse.getStatus() == 200) {
this.resultJsonArray = jsonResponse.getBody().getObject().getJSONArray("results");
} else {
throw new DataflowException("Send query to asterix failed: " + "error status: " + jsonResponse.getStatusText() + ", " + "error body: " + jsonResponse.getBody().toString());
}
cursor = OPENED;
} catch (UnirestException e) {
throw new DataflowException(e);
}
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class FileExtractorUtils method extractPPTFile.
/**
* Extracts data from PPT/PPTX from using poi.
*
* @param path
* @return
* @throws DataflowException
*/
public static String extractPPTFile(Path path) throws DataflowException {
try (FileInputStream inputStream = new FileInputStream(path.toString());
XMLSlideShow ppt = new XMLSlideShow(inputStream)) {
StringBuffer res = new StringBuffer();
for (XSLFSlide slide : ppt.getSlides()) {
List<XSLFShape> shapes = slide.getShapes();
for (XSLFShape shape : shapes) {
if (shape instanceof XSLFTextShape) {
XSLFTextShape textShape = (XSLFTextShape) shape;
String text = textShape.getText();
res.append(text);
}
}
}
return res.toString();
} catch (IOException e) {
throw new DataflowException(e);
}
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class FileExtractorUtils method extractWordFile.
/**
* Extract data from MS Word DOC/DOCX file to text
*
* @param path
* @return
* @throws DataflowException
*/
public static String extractWordFile(Path path) throws DataflowException {
try (FileInputStream inputStream = new FileInputStream(path.toString())) {
BodyContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
AutoDetectParser parser = new AutoDetectParser();
parser.parse(inputStream, handler, metadata);
return handler.toString();
} catch (IOException | SAXException | TikaException e) {
throw new DataflowException(e);
}
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class FileSourceOperator method getNextTuple.
@Override
public Tuple getNextTuple() throws TexeraException {
if (cursor == CLOSED || cursor >= pathList.size()) {
return null;
}
// 2) the cursor reaches the end
while (cursor < pathList.size()) {
try {
Path path = pathIterator.next();
String extension = com.google.common.io.Files.getFileExtension(path.toString());
String content;
if (extension.equalsIgnoreCase("pdf")) {
content = FileExtractorUtils.extractPDFFile(path);
} else if (extension.equalsIgnoreCase("ppt") || extension.equalsIgnoreCase("pptx")) {
content = FileExtractorUtils.extractPPTFile(path);
} else if (extension.equalsIgnoreCase("doc") || extension.equalsIgnoreCase("docx")) {
content = FileExtractorUtils.extractWordFile(path);
} else {
content = FileExtractorUtils.extractPlainTextFile(path);
}
Tuple tuple = new Tuple(outputSchema, IDField.newRandomID(), new TextField(content));
cursor++;
return tuple;
} catch (DataflowException e) {
// ignore error and move on
// TODO: use log4j
System.out.println("FileSourceOperator: file read error, file is ignored. " + e.getMessage());
}
}
return null;
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class AbstractSingleInputOperator method getNextTuple.
@Override
public Tuple getNextTuple() throws TexeraException {
if (cursor == CLOSED) {
throw new DataflowException(ErrorMessages.OPERATOR_NOT_OPENED);
}
if (cursor >= limit + offset) {
return null;
}
try {
Tuple resultTuple = null;
while (true) {
resultTuple = computeNextMatchingTuple();
if (resultTuple == null) {
break;
}
cursor++;
if (cursor > offset) {
break;
}
}
return resultTuple;
} catch (Exception e) {
throw new DataflowException(e.getMessage(), e);
}
}
Aggregations