Search in sources :

Example 1 with StreamRDFCounting

use of org.apache.jena.riot.lang.StreamRDFCounting in project jena by apache.

the class AbstractNodeTupleOutputFormatTests method countTuples.

/**
     * Counts tuples in the output file
     * 
     * @param f
     *            Output file
     * @return Tuple count
     */
protected final long countTuples(File f) {
    StreamRDFCounting counter = StreamRDFLib.count();
    RDFDataMgr.parse(counter, f.getAbsolutePath(), this.getRdfLanguage());
    return counter.count();
}
Also used : StreamRDFCounting(org.apache.jena.riot.lang.StreamRDFCounting)

Example 2 with StreamRDFCounting

use of org.apache.jena.riot.lang.StreamRDFCounting in project jena by apache.

the class CmdLangParse method parseRIOT.

protected ParseRecord parseRIOT(RDFParserBuilder builder, /*Info for the ProcessOutcome*/
String filename) {
    boolean checking = true;
    if (modLangParse.explicitChecking())
        checking = true;
    if (modLangParse.explicitNoChecking())
        checking = false;
    builder.checking(checking);
    ErrorHandler errHandler = ErrorHandlerFactory.errorHandlerWarn;
    if (checking) {
        if (modLangParse.stopOnBadTerm())
            errHandler = ErrorHandlerFactory.errorHandlerStd;
        else
            // Try to go on if possible.  This is the default behaviour.
            errHandler = ErrorHandlerFactory.errorHandlerWarn;
    }
    if (modLangParse.skipOnBadTerm()) {
    // skipOnBadterm - this needs collaboration from the parser.
    }
    // Make a flag.
    // Input and output subflags.
    // If input is "label, then output using NodeToLabel.createBNodeByLabelRaw() ;
    // else use NodeToLabel.createBNodeByLabel() ;
    // Also, as URI.
    final boolean labelsAsGiven = false;
    if (labelsAsGiven)
        builder.labelToNode(LabelToNode.createUseLabelAsGiven());
    StreamRDF s = outputStream;
    if (setup != null)
        s = InfFactory.inf(s, setup);
    StreamRDFCounting sink = StreamRDFLib.count(s);
    s = null;
    boolean successful = true;
    if (checking)
        SysRIOT.setStrictMode(true);
    builder.errorHandler(errHandler);
    modTime.startTimer();
    sink.start();
    RDFParser parser = builder.build();
    try {
        parser.parse(sink);
        successful = true;
    } catch (RiotException ex) {
        successful = false;
    }
    sink.finish();
    long x = modTime.endTimer();
    // TEMP
    ParseRecord outcome = new ParseRecord(filename, successful, x, sink.countTriples(), sink.countQuads());
    return outcome;
}
Also used : StreamRDFCounting(org.apache.jena.riot.lang.StreamRDFCounting)

Example 3 with StreamRDFCounting

use of org.apache.jena.riot.lang.StreamRDFCounting in project jena by apache.

the class SPARQL_Upload method uploadWorker.

/** Process an HTTP file upload of RDF with additiona name field for the graph name.
     *  We can't stream straight into a dataset because the graph name can be after the data.
     *  @return graph name and count
     */
// ?? Combine with Upload.fileUploadWorker
// Difference is the handling of names for graphs.
private static UploadDetails uploadWorker(HttpAction action, String base) {
    DatasetGraph dsgTmp = DatasetGraphFactory.create();
    ServletFileUpload upload = new ServletFileUpload();
    String graphName = null;
    boolean isQuads = false;
    long count = -1;
    String name = null;
    ContentType ct = null;
    Lang lang = null;
    try {
        FileItemIterator iter = upload.getItemIterator(action.request);
        while (iter.hasNext()) {
            FileItemStream item = iter.next();
            String fieldName = item.getFieldName();
            InputStream stream = item.openStream();
            if (item.isFormField()) {
                // Graph name.
                String value = Streams.asString(stream, "UTF-8");
                if (fieldName.equals(HttpNames.paramGraph)) {
                    graphName = value;
                    if (graphName != null && !graphName.equals("") && !graphName.equals(HttpNames.valueDefault)) {
                        IRI iri = IRIResolver.parseIRI(value);
                        if (iri.hasViolation(false))
                            ServletOps.errorBadRequest("Bad IRI: " + graphName);
                        if (iri.getScheme() == null)
                            ServletOps.errorBadRequest("Bad IRI: no IRI scheme name: " + graphName);
                        if (iri.getScheme().equalsIgnoreCase("http") || iri.getScheme().equalsIgnoreCase("https")) {
                            // Redundant??
                            if (iri.getRawHost() == null)
                                ServletOps.errorBadRequest("Bad IRI: no host name: " + graphName);
                            if (iri.getRawPath() == null || iri.getRawPath().length() == 0)
                                ServletOps.errorBadRequest("Bad IRI: no path: " + graphName);
                            if (iri.getRawPath().charAt(0) != '/')
                                ServletOps.errorBadRequest("Bad IRI: Path does not start '/': " + graphName);
                        }
                    }
                } else if (fieldName.equals(HttpNames.paramDefaultGraphURI))
                    graphName = null;
                else
                    // Add file type?
                    action.log.info(format("[%d] Upload: Field=%s ignored", action.id, fieldName));
            } else {
                // Process the input stream
                name = item.getName();
                if (name == null || name.equals("") || name.equals("UNSET FILE NAME"))
                    ServletOps.errorBadRequest("No name for content - can't determine RDF syntax");
                String contentTypeHeader = item.getContentType();
                ct = ContentType.create(contentTypeHeader);
                lang = RDFLanguages.contentTypeToLang(ct.getContentType());
                if (lang == null) {
                    lang = RDFLanguages.filenameToLang(name);
                    // present we wrap the stream accordingly
                    if (name.endsWith(".gz"))
                        stream = new GZIPInputStream(stream);
                }
                if (lang == null)
                    // Desperate.
                    lang = RDFLanguages.RDFXML;
                isQuads = RDFLanguages.isQuads(lang);
                action.log.info(format("[%d] Upload: Filename: %s, Content-Type=%s, Charset=%s => %s", action.id, name, ct.getContentType(), ct.getCharset(), lang.getName()));
                StreamRDF x = StreamRDFLib.dataset(dsgTmp);
                StreamRDFCounting dest = StreamRDFLib.count(x);
                ActionSPARQL.parse(action, dest, stream, lang, base);
                count = dest.count();
            }
        }
        if (graphName == null || graphName.equals(""))
            graphName = HttpNames.valueDefault;
        if (isQuads)
            graphName = null;
        return new UploadDetails(graphName, dsgTmp, count);
    } catch (ActionErrorException ex) {
        throw ex;
    } catch (Exception ex) {
        ServletOps.errorOccurred(ex);
        return null;
    }
}
Also used : IRI(org.apache.jena.iri.IRI) ContentType(org.apache.jena.atlas.web.ContentType) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) Lang(org.apache.jena.riot.Lang) DatasetGraph(org.apache.jena.sparql.core.DatasetGraph) GZIPInputStream(java.util.zip.GZIPInputStream) ServletFileUpload(org.apache.commons.fileupload.servlet.ServletFileUpload) FileItemStream(org.apache.commons.fileupload.FileItemStream) StreamRDF(org.apache.jena.riot.system.StreamRDF) StreamRDFCounting(org.apache.jena.riot.lang.StreamRDFCounting) FileItemIterator(org.apache.commons.fileupload.FileItemIterator)

Example 4 with StreamRDFCounting

use of org.apache.jena.riot.lang.StreamRDFCounting in project jena by apache.

the class Upload method incomingData.

public static UploadDetails incomingData(HttpAction action, StreamRDF dest) {
    ContentType ct = FusekiLib.getContentType(action);
    if (ct == null) {
        ServletOps.errorBadRequest("No content type");
        return null;
    }
    if (matchContentType(ctMultipartFormData, ct)) {
        return fileUploadWorker(action, dest);
    }
    // Single graph (or quads) in body.
    String base = ActionLib.wholeRequestURL(action.request);
    Lang lang = RDFLanguages.contentTypeToLang(ct.getContentType());
    if (lang == null) {
        ServletOps.errorBadRequest("Unknown content type for triples: " + ct);
        return null;
    }
    InputStream input = null;
    try {
        input = action.request.getInputStream();
    } catch (IOException ex) {
        IO.exception(ex);
    }
    int len = action.request.getContentLength();
    StreamRDFCounting countingDest = StreamRDFLib.count(dest);
    try {
        ActionSPARQL.parse(action, countingDest, input, lang, base);
        UploadDetails details = new UploadDetails(countingDest.count(), countingDest.countTriples(), countingDest.countQuads());
        action.log.info(format("[%d] Body: Content-Length=%d, Content-Type=%s, Charset=%s => %s : %s", action.id, len, ct.getContentType(), ct.getCharset(), lang.getName(), details.detailsStr()));
        return details;
    } catch (RiotParseException ex) {
        action.log.info(format("[%d] Body: Content-Length=%d, Content-Type=%s, Charset=%s => %s : %s", action.id, len, ct.getContentType(), ct.getCharset(), lang.getName(), ex.getMessage()));
        throw ex;
    }
}
Also used : RiotParseException(org.apache.jena.riot.RiotParseException) WebContent.matchContentType(org.apache.jena.riot.WebContent.matchContentType) ContentType(org.apache.jena.atlas.web.ContentType) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) StreamRDFCounting(org.apache.jena.riot.lang.StreamRDFCounting) Lang(org.apache.jena.riot.Lang) IOException(java.io.IOException)

Example 5 with StreamRDFCounting

use of org.apache.jena.riot.lang.StreamRDFCounting in project jena by apache.

the class Upload method fileUploadWorker.

/**  Process an HTTP upload of RDF files (triples or quads)
     *   Stream straight into a graph or dataset -- unlike SPARQL_Upload the destination
     *   is known at the start of the multipart file body
     */
public static UploadDetails fileUploadWorker(HttpAction action, StreamRDF dest) {
    String base = ActionLib.wholeRequestURL(action.request);
    ServletFileUpload upload = new ServletFileUpload();
    //log.info(format("[%d] Upload: Field=%s ignored", action.id, fieldName)) ;
    // Overall counting.
    StreamRDFCounting countingDest = StreamRDFLib.count(dest);
    try {
        FileItemIterator iter = upload.getItemIterator(action.request);
        while (iter.hasNext()) {
            FileItemStream fileStream = iter.next();
            if (fileStream.isFormField()) {
                // Ignore?
                String fieldName = fileStream.getFieldName();
                InputStream stream = fileStream.openStream();
                String value = Streams.asString(stream, "UTF-8");
                ServletOps.errorBadRequest(format("Only files accepted in multipart file upload (got %s=%s)", fieldName, value));
            }
            //Ignore the field name.
            //String fieldName = fileStream.getFieldName();
            InputStream stream = fileStream.openStream();
            // Process the input stream
            String contentTypeHeader = fileStream.getContentType();
            ContentType ct = ContentType.create(contentTypeHeader);
            Lang lang = null;
            if (!matchContentType(ctTextPlain, ct))
                lang = RDFLanguages.contentTypeToLang(ct.getContentType());
            if (lang == null) {
                String name = fileStream.getName();
                if (name == null || name.equals(""))
                    ServletOps.errorBadRequest("No name for content - can't determine RDF syntax");
                lang = RDFLanguages.filenameToLang(name);
                if (name.endsWith(".gz"))
                    stream = new GZIPInputStream(stream);
            }
            if (lang == null)
                // Desperate.
                lang = RDFLanguages.RDFXML;
            String printfilename = fileStream.getName();
            if (printfilename == null || printfilename.equals(""))
                printfilename = "<none>";
            // Before
            // action.log.info(format("[%d] Filename: %s, Content-Type=%s, Charset=%s => %s", 
            //                        action.id, printfilename,  ct.getContentType(), ct.getCharset(), lang.getName())) ;
            // count just this step
            StreamRDFCounting countingDest2 = StreamRDFLib.count(countingDest);
            try {
                ActionSPARQL.parse(action, countingDest2, stream, lang, base);
                UploadDetails details1 = new UploadDetails(countingDest2.count(), countingDest2.countTriples(), countingDest2.countQuads());
                action.log.info(format("[%d] Filename: %s, Content-Type=%s, Charset=%s => %s : %s", action.id, printfilename, ct.getContentType(), ct.getCharset(), lang.getName(), details1.detailsStr()));
            } catch (RiotParseException ex) {
                action.log.info(format("[%d] Filename: %s, Content-Type=%s, Charset=%s => %s : %s", action.id, printfilename, ct.getContentType(), ct.getCharset(), lang.getName(), ex.getMessage()));
                throw ex;
            }
        }
    } catch (ActionErrorException ex) {
        throw ex;
    } catch (Exception ex) {
        ServletOps.errorOccurred(ex.getMessage());
    }
    // Overall results.
    UploadDetails details = new UploadDetails(countingDest.count(), countingDest.countTriples(), countingDest.countQuads());
    return details;
}
Also used : RiotParseException(org.apache.jena.riot.RiotParseException) WebContent.matchContentType(org.apache.jena.riot.WebContent.matchContentType) ContentType(org.apache.jena.atlas.web.ContentType) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) Lang(org.apache.jena.riot.Lang) IOException(java.io.IOException) RiotParseException(org.apache.jena.riot.RiotParseException) GZIPInputStream(java.util.zip.GZIPInputStream) ServletFileUpload(org.apache.commons.fileupload.servlet.ServletFileUpload) FileItemStream(org.apache.commons.fileupload.FileItemStream) StreamRDFCounting(org.apache.jena.riot.lang.StreamRDFCounting) FileItemIterator(org.apache.commons.fileupload.FileItemIterator)

Aggregations

StreamRDFCounting (org.apache.jena.riot.lang.StreamRDFCounting)7 InputStream (java.io.InputStream)3 GZIPInputStream (java.util.zip.GZIPInputStream)3 ContentType (org.apache.jena.atlas.web.ContentType)3 Lang (org.apache.jena.riot.Lang)3 IOException (java.io.IOException)2 FileItemIterator (org.apache.commons.fileupload.FileItemIterator)2 FileItemStream (org.apache.commons.fileupload.FileItemStream)2 ServletFileUpload (org.apache.commons.fileupload.servlet.ServletFileUpload)2 BaseTest (org.apache.jena.atlas.junit.BaseTest)2 RiotParseException (org.apache.jena.riot.RiotParseException)2 WebContent.matchContentType (org.apache.jena.riot.WebContent.matchContentType)2 Test (org.junit.Test)2 IRI (org.apache.jena.iri.IRI)1 StreamRDF (org.apache.jena.riot.system.StreamRDF)1 DatasetGraph (org.apache.jena.sparql.core.DatasetGraph)1