Search in sources :

Example 1 with StandardizingSPARQLParser

use of openrdffork.StandardizingSPARQLParser in project QueryAnalysis by Wikidata.

the class OutputHandlerAnonymizer method writeLine.

@Override
public void writeLine(String queryToAnalyze, Validity validityStatus, String userAgent, String timeStamp, long currentLine, int currentDay, String currentFile) {
    List<Object> line = new ArrayList<>();
    QueryHandler queryHandler = queryHandlerFactory.getQueryHandler(validityStatus, currentLine, currentDay, queryToAnalyze, userAgent, currentFile, threadNumber);
    if (queryHandler.getValidityStatus().equals(QueryHandler.Validity.VALID)) {
        ASTQueryContainer qc;
        try {
            qc = SyntaxTreeBuilder.parseQuery(queryToAnalyze);
        } catch (TokenMgrError | ParseException e) {
            logger.error("Failed to parse the query although it was found valid - this is a serious bug.", e);
            return;
        }
        try {
            StandardizingSPARQLParser.debug(qc);
            StringEscapesProcessor.process(qc);
            BaseDeclProcessor.process(qc, OpenRDFQueryHandler.BASE_URI);
            StandardizingPrefixDeclProcessor.process(qc);
            StandardizingSPARQLParser.anonymize(qc);
        } catch (MalformedQueryException e) {
            logger.error("Failed to debug or anonymize query. " + queryToAnalyze);
        }
        String renderedQueryString;
        try {
            renderedQueryString = qc.jjtAccept(new RenderVisitor(), "").toString();
        } catch (VisitorException e) {
            logger.error("Failed to render the query.", e);
            return;
        }
        try {
            new StandardizingSPARQLParser().parseQuery(renderedQueryString, OpenRDFQueryHandler.BASE_URI);
        } catch (MalformedQueryException e) {
            String queryName = this.threadNumber + "_" + this.failedQueriesNumber + ".query";
            logger.error("Anonymized query was not valid anymore. " + queryName, e);
            try (BufferedWriter bw = new BufferedWriter(new FileWriter(this.outputFile.substring(0, this.outputFile.lastIndexOf("/")) + "failedQueriesFolder/" + queryName))) {
                bw.write(queryToAnalyze);
                this.failedQueriesNumber++;
            } catch (IOException i) {
                logger.error("Could not write the failed query to failed queries folder.", i);
            }
            return;
        } catch (ClassCastException e) {
            logger.error("Unexpected class cast exception after anonymization.", e);
        }
        String encodedRenderedQueryString;
        try {
            encodedRenderedQueryString = URLEncoder.encode(renderedQueryString, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            logger.error("Apparently this system does not support UTF-8. Please fix this before running the program again.");
            return;
        }
        line.add("?query=" + encodedRenderedQueryString);
        line.add(timeStamp);
        if (queryHandler.getSourceCategory().equals(QueryHandler.SourceCategory.USER)) {
            line.add("organic");
        } else {
            line.add("robotic");
        }
        if (QueryHandler.isOrganicUserAgent(queryHandler.getUserAgent())) {
            line.add("browser");
        } else {
            line.add(queryHandler.getUserAgent());
        }
        writer.writeRow(line);
    }
}
Also used : OpenRDFQueryHandler(query.OpenRDFQueryHandler) QueryHandler(query.QueryHandler) ArrayList(java.util.ArrayList) RenderVisitor(openrdffork.RenderVisitor) StandardizingSPARQLParser(openrdffork.StandardizingSPARQLParser) MalformedQueryException(org.openrdf.query.MalformedQueryException)

Example 2 with StandardizingSPARQLParser

use of openrdffork.StandardizingSPARQLParser in project QueryAnalysis by Wikidata.

the class Test method main.

public static void main(String[] args) {
    Main.loadStandardPrefixes();
    Anonymizer.loadWhitelistDatatypes();
    int worked = 0;
    int failed = 0;
    int failedToParse = 0;
    try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(Paths.get("/home/adrian/workspace/java/months/exampleQueries/"))) {
        for (Path filePath : directoryStream) {
            if (Files.isRegularFile(filePath)) {
                String queryString = new String(readAllBytes(filePath));
                try {
                    ParsedQuery parsedQuery = new StandardizingSPARQLParser().parseQuery(queryString, OpenRDFQueryHandler.BASE_URI);
                } catch (MalformedQueryException e) {
                    failedToParse++;
                    continue;
                }
                ASTQueryContainer qc;
                try {
                    qc = SyntaxTreeBuilder.parseQuery(queryString);
                } catch (TokenMgrError | ParseException e) {
                    // e.printStackTrace();
                    continue;
                }
                try {
                    StandardizingSPARQLParser.debug(qc);
                    StringEscapesProcessor.process(qc);
                    BaseDeclProcessor.process(qc, OpenRDFQueryHandler.BASE_URI);
                    StandardizingPrefixDeclProcessor.process(qc);
                    StandardizingSPARQLParser.anonymize(qc);
                } catch (MalformedQueryException e) {
                    System.out.println("Failed to debug or anonymize query. " + queryString);
                }
                String renderedQueryString;
                try {
                    renderedQueryString = qc.jjtAccept(new RenderVisitor(), "").toString();
                // System.out.println(renderedQueryString);
                } catch (VisitorException e) {
                    // e.printStackTrace();
                    continue;
                }
                try {
                    ParsedQuery parsedQuery = new StandardizingSPARQLParser().parseQuery(renderedQueryString, OpenRDFQueryHandler.BASE_URI);
                    worked++;
                } catch (MalformedQueryException | ClassCastException e) {
                    failed++;
                    System.out.println("-----------------------------------");
                    System.out.println(filePath);
                    System.out.println(queryString);
                    continue;
                }
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    System.out.println("Worked: " + worked + " Failed: " + failed + " Failed to Parse: " + failedToParse);
}
Also used : Path(java.nio.file.Path) ParsedQuery(org.openrdf.query.parser.ParsedQuery) RenderVisitor(openrdffork.RenderVisitor) IOException(java.io.IOException) StandardizingSPARQLParser(openrdffork.StandardizingSPARQLParser) MalformedQueryException(org.openrdf.query.MalformedQueryException)

Example 3 with StandardizingSPARQLParser

use of openrdffork.StandardizingSPARQLParser in project QueryAnalysis by Wikidata.

the class OpenRDFQueryHandler method normalize.

/**
 * Normalizes a given query by:
 * - replacing all wikidata uris at subject and object positions with sub1, sub2 ... (obj1, obj2 ...).
 *
 * @param queryToNormalize the query to be normalized
 * @return the normalized query
 * @throws MalformedQueryException If the query was malformed (would be a bug since the input was a parsed query)
 * @throws VisitorException        If there is an error during normalization
 */
private ParsedQuery normalize(ParsedQuery queryToNormalize) throws MalformedQueryException, VisitorException {
    ParsedQuery normalizedQuery = new StandardizingSPARQLParser().parseNormalizeQuery(queryToNormalize.getSourceString(), BASE_URI);
    final Map<String, Integer> valueConstants = new HashMap<>();
    final Set<String> subjectsAndObjects = new HashSet<String>();
    final Set<String> predicates = new HashSet<String>();
    final Set<String> predicateVariables = new HashSet<String>();
    normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {

        @Override
        public void meet(StatementPattern statementPattern) throws VisitorException {
            Var predicate = statementPattern.getPredicateVar();
            if (!predicate.isConstant() && !predicate.isAnonymous()) {
                predicateVariables.add(predicate.getName());
            }
            meetNode(statementPattern);
        }
    });
    normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {

        @Override
        public void meet(ExtensionElem extensionElem) throws VisitorException {
            if (!predicateVariables.contains(extensionElem.getName())) {
                extensionElem.setExpr(normalizeValueExprHelper(extensionElem.getExpr(), valueConstants));
            }
            meetNode(extensionElem);
        }
    });
    normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {

        @Override
        public void meet(BindingSetAssignment bindingSetAssignment) throws VisitorException {
            List<BindingSet> bindingSets = new ArrayList<BindingSet>();
            for (BindingSet bindingSet : bindingSetAssignment.getBindingSets()) {
                List<String> names = new ArrayList<String>();
                List<Value> values = new ArrayList<Value>();
                for (Binding binding : bindingSet) {
                    String name = binding.getName();
                    if (!predicateVariables.contains(name)) {
                        names.add(name);
                        values.add(normalizeValueHelper(binding.getValue(), valueConstants));
                    } else {
                        names.add(name);
                        values.add(binding.getValue());
                    }
                }
                bindingSets.add(new ListBindingSet(names, values));
            }
            bindingSetAssignment.setBindingSets(bindingSets);
            meetNode(bindingSetAssignment);
        }
    });
    normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {

        @Override
        public void meet(StatementPattern statementPattern) throws VisitorException {
            statementPattern.setSubjectVar(normalizeSubjectsAndObjectsHelper(statementPattern.getSubjectVar(), valueConstants, subjectsAndObjects));
            statementPattern.setObjectVar(normalizeSubjectsAndObjectsHelper(statementPattern.getObjectVar(), valueConstants, subjectsAndObjects));
            try {
                String uri = getURI(statementPattern.getPredicateVar());
                predicates.add(uri);
            } catch (NoURIException e) {
            // NoURIException is used to notify us that there is no URI in this predicate, so we just don't add it.
            }
            // checkForVariable(statementPattern.getPredicateVar());
            meetNode(statementPattern);
        }
    });
    normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {

        @Override
        public void meet(ArbitraryLengthPath arbitraryLengthPath) throws VisitorException {
            arbitraryLengthPath.setSubjectVar(normalizeSubjectsAndObjectsHelper(arbitraryLengthPath.getSubjectVar(), valueConstants, subjectsAndObjects));
            arbitraryLengthPath.setObjectVar(normalizeSubjectsAndObjectsHelper(arbitraryLengthPath.getObjectVar(), valueConstants, subjectsAndObjects));
            meetNode(arbitraryLengthPath);
        }
    });
    normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {

        @Override
        public void meet(Compare compare) throws VisitorException {
            compare.setLeftArg(normalizeValueExprHelper(compare.getLeftArg(), valueConstants));
            compare.setRightArg(normalizeValueExprHelper(compare.getRightArg(), valueConstants));
            meetBinaryValueOperator(compare);
        }
    });
    normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {

        @Override
        public void meet(IsLiteral isLiteral) throws VisitorException {
            isLiteral.setArg(normalizeValueExprHelper(isLiteral.getArg(), valueConstants));
            meetUnaryValueOperator(isLiteral);
        }
    });
    normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {

        @Override
        public void meet(StatementPattern statementPattern) throws VisitorException {
            statementPattern.setSubjectVar(normalizeNonConstAnonymousHelper(statementPattern.getSubjectVar(), valueConstants));
            statementPattern.setObjectVar(normalizeNonConstAnonymousHelper(statementPattern.getObjectVar(), valueConstants));
            meetNode(statementPattern);
        }
    });
    this.setqIDs(subjectsAndObjects);
    this.setpIDs(predicates);
    return normalizedQuery;
}
Also used : ParsedQuery(org.openrdf.query.parser.ParsedQuery) NoURIException(utility.NoURIException) Binding(org.openrdf.query.Binding) BindingSet(org.openrdf.query.BindingSet) ListBindingSet(org.openrdf.query.impl.ListBindingSet) ListBindingSet(org.openrdf.query.impl.ListBindingSet) StandardizingSPARQLParser(openrdffork.StandardizingSPARQLParser) Value(org.openrdf.model.Value)

Example 4 with StandardizingSPARQLParser

use of openrdffork.StandardizingSPARQLParser in project QueryAnalysis by Wikidata.

the class OpenRDFQueryHandler method computeNonSimplePropertyPaths.

@Override
protected final void computeNonSimplePropertyPaths() {
    if (getValidityStatus() != QueryHandler.Validity.VALID) {
        this.nonSimplePropertyPaths = getValidityStatus().toString();
        return;
    }
    try {
        ASTQueryContainer qc = new StandardizingSPARQLParser().getASTQueryContainerPrefixesProcessed(getQueryString(), BASE_URI);
        Set<String> nonSimplePropertyPaths = new NonSimplePropertyPathVisitor().getNonSimplePropertyPaths(qc);
        this.nonSimplePropertyPaths = this.computeAnyIDString(nonSimplePropertyPaths);
        if (this.nonSimplePropertyPaths.equals("")) {
            this.nonSimplePropertyPaths = "NONE";
        }
    } catch (VisitorException | MalformedQueryException e) {
        this.nonSimplePropertyPaths = "INTERNAL_ERROR";
        logger.error("Unexpected error while calculating non-simple property paths.", e);
    }
}
Also used : NonSimplePropertyPathVisitor(query.statistics.NonSimplePropertyPathVisitor) StandardizingSPARQLParser(openrdffork.StandardizingSPARQLParser) MalformedQueryException(org.openrdf.query.MalformedQueryException)

Example 5 with StandardizingSPARQLParser

use of openrdffork.StandardizingSPARQLParser in project QueryAnalysis by Wikidata.

the class OpenRDFQueryHandler method computeSparqlStatistics.

@Override
protected void computeSparqlStatistics() {
    if (getValidityStatus() != QueryHandler.Validity.VALID) {
        this.sparqlStatistics = new HashMap<>();
        return;
    }
    try {
        ASTQueryContainer queryContainer = new StandardizingSPARQLParser().getDebuggedASTQueryContainer(getQueryString(), BASE_URI);
        QueryContainerSparqlStatisticsCollector queryContainerSparqlStatisticsCollector = new QueryContainerSparqlStatisticsCollector();
        queryContainer.jjtAccept(queryContainerSparqlStatisticsCollector, null);
        this.sparqlStatistics = queryContainerSparqlStatisticsCollector.getStatistics();
        TupleExprSparqlStatisticsCollector tupleExprSparqlStatisticsCollector = new TupleExprSparqlStatisticsCollector();
        this.query.getTupleExpr().visitChildren(tupleExprSparqlStatisticsCollector);
        this.query.getTupleExpr().visit(tupleExprSparqlStatisticsCollector);
        this.sparqlStatistics.putAll(tupleExprSparqlStatisticsCollector.getStatistics());
        this.primaryLanguage = tupleExprSparqlStatisticsCollector.getPrimaryLanguage();
    } catch (TokenMgrError | MalformedQueryException e) {
        logger.error("Failed to parse the query although it was found valid - this is a serious bug.", e);
    } catch (VisitorException e) {
        logger.error("Failed to calculate the SPARQL Keyword Statistics. Error occured while visiting the query.", e);
    } catch (Exception e) {
        logger.error("An unknown error occured while computing the sparql statistics: ", e);
    }
}
Also used : TupleExprSparqlStatisticsCollector(query.statistics.TupleExprSparqlStatisticsCollector) StandardizingSPARQLParser(openrdffork.StandardizingSPARQLParser) QueryContainerSparqlStatisticsCollector(query.statistics.QueryContainerSparqlStatisticsCollector) MalformedQueryException(org.openrdf.query.MalformedQueryException) MalformedQueryException(org.openrdf.query.MalformedQueryException) NoURIException(utility.NoURIException)

Aggregations

StandardizingSPARQLParser (openrdffork.StandardizingSPARQLParser)5 MalformedQueryException (org.openrdf.query.MalformedQueryException)4 RenderVisitor (openrdffork.RenderVisitor)2 ParsedQuery (org.openrdf.query.parser.ParsedQuery)2 NoURIException (utility.NoURIException)2 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 ArrayList (java.util.ArrayList)1 Value (org.openrdf.model.Value)1 Binding (org.openrdf.query.Binding)1 BindingSet (org.openrdf.query.BindingSet)1 ListBindingSet (org.openrdf.query.impl.ListBindingSet)1 OpenRDFQueryHandler (query.OpenRDFQueryHandler)1 QueryHandler (query.QueryHandler)1 NonSimplePropertyPathVisitor (query.statistics.NonSimplePropertyPathVisitor)1 QueryContainerSparqlStatisticsCollector (query.statistics.QueryContainerSparqlStatisticsCollector)1 TupleExprSparqlStatisticsCollector (query.statistics.TupleExprSparqlStatisticsCollector)1