use of org.apache.jena.riot.lang.StreamRDFCounting in project jena by apache.
the class AbstractNodeTupleOutputFormatTests method countTuples.
/**
* Counts tuples in the output file
*
* @param f
* Output file
* @return Tuple count
*/
protected final long countTuples(File f) {
StreamRDFCounting counter = StreamRDFLib.count();
RDFDataMgr.parse(counter, f.getAbsolutePath(), this.getRdfLanguage());
return counter.count();
}
use of org.apache.jena.riot.lang.StreamRDFCounting in project jena by apache.
the class CmdLangParse method parseRIOT.
protected ParseRecord parseRIOT(RDFParserBuilder builder, /*Info for the ProcessOutcome*/
String filename) {
boolean checking = true;
if (modLangParse.explicitChecking())
checking = true;
if (modLangParse.explicitNoChecking())
checking = false;
builder.checking(checking);
ErrorHandler errHandler = ErrorHandlerFactory.errorHandlerWarn;
if (checking) {
if (modLangParse.stopOnBadTerm())
errHandler = ErrorHandlerFactory.errorHandlerStd;
else
// Try to go on if possible. This is the default behaviour.
errHandler = ErrorHandlerFactory.errorHandlerWarn;
}
if (modLangParse.skipOnBadTerm()) {
// skipOnBadterm - this needs collaboration from the parser.
}
// Make a flag.
// Input and output subflags.
// If input is "label, then output using NodeToLabel.createBNodeByLabelRaw() ;
// else use NodeToLabel.createBNodeByLabel() ;
// Also, as URI.
final boolean labelsAsGiven = false;
if (labelsAsGiven)
builder.labelToNode(LabelToNode.createUseLabelAsGiven());
StreamRDF s = outputStream;
if (setup != null)
s = InfFactory.inf(s, setup);
StreamRDFCounting sink = StreamRDFLib.count(s);
s = null;
boolean successful = true;
if (checking)
SysRIOT.setStrictMode(true);
builder.errorHandler(errHandler);
modTime.startTimer();
sink.start();
RDFParser parser = builder.build();
try {
parser.parse(sink);
successful = true;
} catch (RiotException ex) {
successful = false;
}
sink.finish();
long x = modTime.endTimer();
// TEMP
ParseRecord outcome = new ParseRecord(filename, successful, x, sink.countTriples(), sink.countQuads());
return outcome;
}
use of org.apache.jena.riot.lang.StreamRDFCounting in project jena by apache.
the class SPARQL_Upload method uploadWorker.
/** Process an HTTP file upload of RDF with additiona name field for the graph name.
* We can't stream straight into a dataset because the graph name can be after the data.
* @return graph name and count
*/
// ?? Combine with Upload.fileUploadWorker
// Difference is the handling of names for graphs.
private static UploadDetails uploadWorker(HttpAction action, String base) {
DatasetGraph dsgTmp = DatasetGraphFactory.create();
ServletFileUpload upload = new ServletFileUpload();
String graphName = null;
boolean isQuads = false;
long count = -1;
String name = null;
ContentType ct = null;
Lang lang = null;
try {
FileItemIterator iter = upload.getItemIterator(action.request);
while (iter.hasNext()) {
FileItemStream item = iter.next();
String fieldName = item.getFieldName();
InputStream stream = item.openStream();
if (item.isFormField()) {
// Graph name.
String value = Streams.asString(stream, "UTF-8");
if (fieldName.equals(HttpNames.paramGraph)) {
graphName = value;
if (graphName != null && !graphName.equals("") && !graphName.equals(HttpNames.valueDefault)) {
IRI iri = IRIResolver.parseIRI(value);
if (iri.hasViolation(false))
ServletOps.errorBadRequest("Bad IRI: " + graphName);
if (iri.getScheme() == null)
ServletOps.errorBadRequest("Bad IRI: no IRI scheme name: " + graphName);
if (iri.getScheme().equalsIgnoreCase("http") || iri.getScheme().equalsIgnoreCase("https")) {
// Redundant??
if (iri.getRawHost() == null)
ServletOps.errorBadRequest("Bad IRI: no host name: " + graphName);
if (iri.getRawPath() == null || iri.getRawPath().length() == 0)
ServletOps.errorBadRequest("Bad IRI: no path: " + graphName);
if (iri.getRawPath().charAt(0) != '/')
ServletOps.errorBadRequest("Bad IRI: Path does not start '/': " + graphName);
}
}
} else if (fieldName.equals(HttpNames.paramDefaultGraphURI))
graphName = null;
else
// Add file type?
action.log.info(format("[%d] Upload: Field=%s ignored", action.id, fieldName));
} else {
// Process the input stream
name = item.getName();
if (name == null || name.equals("") || name.equals("UNSET FILE NAME"))
ServletOps.errorBadRequest("No name for content - can't determine RDF syntax");
String contentTypeHeader = item.getContentType();
ct = ContentType.create(contentTypeHeader);
lang = RDFLanguages.contentTypeToLang(ct.getContentType());
if (lang == null) {
lang = RDFLanguages.filenameToLang(name);
// present we wrap the stream accordingly
if (name.endsWith(".gz"))
stream = new GZIPInputStream(stream);
}
if (lang == null)
// Desperate.
lang = RDFLanguages.RDFXML;
isQuads = RDFLanguages.isQuads(lang);
action.log.info(format("[%d] Upload: Filename: %s, Content-Type=%s, Charset=%s => %s", action.id, name, ct.getContentType(), ct.getCharset(), lang.getName()));
StreamRDF x = StreamRDFLib.dataset(dsgTmp);
StreamRDFCounting dest = StreamRDFLib.count(x);
ActionSPARQL.parse(action, dest, stream, lang, base);
count = dest.count();
}
}
if (graphName == null || graphName.equals(""))
graphName = HttpNames.valueDefault;
if (isQuads)
graphName = null;
return new UploadDetails(graphName, dsgTmp, count);
} catch (ActionErrorException ex) {
throw ex;
} catch (Exception ex) {
ServletOps.errorOccurred(ex);
return null;
}
}
use of org.apache.jena.riot.lang.StreamRDFCounting in project jena by apache.
the class Upload method incomingData.
public static UploadDetails incomingData(HttpAction action, StreamRDF dest) {
ContentType ct = FusekiLib.getContentType(action);
if (ct == null) {
ServletOps.errorBadRequest("No content type");
return null;
}
if (matchContentType(ctMultipartFormData, ct)) {
return fileUploadWorker(action, dest);
}
// Single graph (or quads) in body.
String base = ActionLib.wholeRequestURL(action.request);
Lang lang = RDFLanguages.contentTypeToLang(ct.getContentType());
if (lang == null) {
ServletOps.errorBadRequest("Unknown content type for triples: " + ct);
return null;
}
InputStream input = null;
try {
input = action.request.getInputStream();
} catch (IOException ex) {
IO.exception(ex);
}
int len = action.request.getContentLength();
StreamRDFCounting countingDest = StreamRDFLib.count(dest);
try {
ActionSPARQL.parse(action, countingDest, input, lang, base);
UploadDetails details = new UploadDetails(countingDest.count(), countingDest.countTriples(), countingDest.countQuads());
action.log.info(format("[%d] Body: Content-Length=%d, Content-Type=%s, Charset=%s => %s : %s", action.id, len, ct.getContentType(), ct.getCharset(), lang.getName(), details.detailsStr()));
return details;
} catch (RiotParseException ex) {
action.log.info(format("[%d] Body: Content-Length=%d, Content-Type=%s, Charset=%s => %s : %s", action.id, len, ct.getContentType(), ct.getCharset(), lang.getName(), ex.getMessage()));
throw ex;
}
}
use of org.apache.jena.riot.lang.StreamRDFCounting in project jena by apache.
the class Upload method fileUploadWorker.
/** Process an HTTP upload of RDF files (triples or quads)
* Stream straight into a graph or dataset -- unlike SPARQL_Upload the destination
* is known at the start of the multipart file body
*/
public static UploadDetails fileUploadWorker(HttpAction action, StreamRDF dest) {
String base = ActionLib.wholeRequestURL(action.request);
ServletFileUpload upload = new ServletFileUpload();
//log.info(format("[%d] Upload: Field=%s ignored", action.id, fieldName)) ;
// Overall counting.
StreamRDFCounting countingDest = StreamRDFLib.count(dest);
try {
FileItemIterator iter = upload.getItemIterator(action.request);
while (iter.hasNext()) {
FileItemStream fileStream = iter.next();
if (fileStream.isFormField()) {
// Ignore?
String fieldName = fileStream.getFieldName();
InputStream stream = fileStream.openStream();
String value = Streams.asString(stream, "UTF-8");
ServletOps.errorBadRequest(format("Only files accepted in multipart file upload (got %s=%s)", fieldName, value));
}
//Ignore the field name.
//String fieldName = fileStream.getFieldName();
InputStream stream = fileStream.openStream();
// Process the input stream
String contentTypeHeader = fileStream.getContentType();
ContentType ct = ContentType.create(contentTypeHeader);
Lang lang = null;
if (!matchContentType(ctTextPlain, ct))
lang = RDFLanguages.contentTypeToLang(ct.getContentType());
if (lang == null) {
String name = fileStream.getName();
if (name == null || name.equals(""))
ServletOps.errorBadRequest("No name for content - can't determine RDF syntax");
lang = RDFLanguages.filenameToLang(name);
if (name.endsWith(".gz"))
stream = new GZIPInputStream(stream);
}
if (lang == null)
// Desperate.
lang = RDFLanguages.RDFXML;
String printfilename = fileStream.getName();
if (printfilename == null || printfilename.equals(""))
printfilename = "<none>";
// Before
// action.log.info(format("[%d] Filename: %s, Content-Type=%s, Charset=%s => %s",
// action.id, printfilename, ct.getContentType(), ct.getCharset(), lang.getName())) ;
// count just this step
StreamRDFCounting countingDest2 = StreamRDFLib.count(countingDest);
try {
ActionSPARQL.parse(action, countingDest2, stream, lang, base);
UploadDetails details1 = new UploadDetails(countingDest2.count(), countingDest2.countTriples(), countingDest2.countQuads());
action.log.info(format("[%d] Filename: %s, Content-Type=%s, Charset=%s => %s : %s", action.id, printfilename, ct.getContentType(), ct.getCharset(), lang.getName(), details1.detailsStr()));
} catch (RiotParseException ex) {
action.log.info(format("[%d] Filename: %s, Content-Type=%s, Charset=%s => %s : %s", action.id, printfilename, ct.getContentType(), ct.getCharset(), lang.getName(), ex.getMessage()));
throw ex;
}
}
} catch (ActionErrorException ex) {
throw ex;
} catch (Exception ex) {
ServletOps.errorOccurred(ex.getMessage());
}
// Overall results.
UploadDetails details = new UploadDetails(countingDest.count(), countingDest.countTriples(), countingDest.countQuads());
return details;
}
Aggregations