Search in sources :

Example 1 with MalformedQueryException

use of org.openrdf.query.MalformedQueryException in project QueryAnalysis by Wikidata.

the class StandardizingSPARQLParser method anonymize.

/**
 * Normalizes a query by:
 * - replacing all variables with var1, var2 ...
 * - replacing all strings with string1, string2 ...
 * - replacing all rdfLiterals with rdfLiteral1, rdfLiteral2 ...
 *
 * @param queryContainer The query to be anonymized
 * @throws MalformedQueryException if the query was malformed
 */
public static void anonymize(ASTQueryContainer queryContainer) throws MalformedQueryException {
    final Map<String, Integer> variables = new HashMap<>();
    final Map<String, Integer> strings = new HashMap<>();
    try {
        queryContainer.jjtAccept(new ASTVisitorBase() {

            public Object visit(ASTVar variable, Object data) throws VisitorException {
                String label = "Label";
                if (variable.getName().toLowerCase().endsWith(label.toLowerCase())) {
                    String variableName = variable.getName().substring(0, variable.getName().length() - label.length());
                    variable.setName(replacementName(variableName) + label);
                } else {
                    variable.setName(replacementName(variable.getName()));
                }
                return super.visit(variable, data);
            }

            private String replacementName(String variableName) {
                if (!variables.containsKey(variableName)) {
                    variables.put(variableName, variables.keySet().size() + 1);
                }
                return "var" + variables.get(variableName);
            }

            @Override
            public Object visit(ASTString string, Object data) throws VisitorException {
                if (string.getValue().length() < Anonymizer.unanonymizedStringLength) {
                    return super.visit(string, data);
                }
                if (Anonymizer.whitelistedStrings.contains(string.getValue())) {
                    return super.visit(string, data);
                }
                if (NumberUtils.isNumber(string.getValue())) {
                    return super.visit(string, data);
                }
                // Determine if this is part of a service call
                Node rdfLiteral = string.jjtGetParent();
                if (rdfLiteral instanceof ASTRDFLiteral && rdfLiteral.jjtGetNumChildren() == 1) {
                    Node objectList = rdfLiteral.jjtGetParent();
                    if (objectList instanceof ASTObjectList && objectList.jjtGetNumChildren() == 1) {
                        Node propertyListPath = objectList.jjtGetParent();
                        if (propertyListPath instanceof ASTPropertyListPath && propertyListPath.jjtGetNumChildren() == 2) {
                            // Checking the path towards wikibase:language
                            Node pathAlternative = propertyListPath.jjtGetChild(0);
                            if (pathAlternative instanceof ASTPathAlternative && pathAlternative.jjtGetNumChildren() == 1) {
                                Node pathSequence = pathAlternative.jjtGetChild(0);
                                if (pathSequence instanceof ASTPathSequence && pathSequence.jjtGetNumChildren() == 1) {
                                    Node pathElt = pathSequence.jjtGetChild(0);
                                    if (pathElt instanceof ASTPathElt && pathElt.jjtGetNumChildren() == 1) {
                                        Node languageIRI = pathElt.jjtGetChild(0);
                                        if (languageIRI instanceof ASTIRI && languageIRI.jjtGetNumChildren() == 0) {
                                            ASTIRI languageASTIRI = (ASTIRI) languageIRI;
                                            if (languageASTIRI.getValue().equals("http://wikiba.se/ontology#language")) {
                                                // Checking the path towards ServiceGraphPattern
                                                Node triplesSameSubjectPath = propertyListPath.jjtGetParent();
                                                if (triplesSameSubjectPath instanceof ASTTriplesSameSubjectPath && triplesSameSubjectPath.jjtGetNumChildren() == 2) {
                                                    // Small branch to serviceParam
                                                    Node serviceParamIRI = triplesSameSubjectPath.jjtGetChild(0);
                                                    if (serviceParamIRI instanceof ASTIRI && serviceParamIRI.jjtGetNumChildren() == 0) {
                                                        ASTIRI serviceParamASTIRI = (ASTIRI) serviceParamIRI;
                                                        if (serviceParamASTIRI.getValue().equals("http://www.bigdata.com/rdf#serviceParam")) {
                                                            // Further on with the path to ServiceGraphPattern
                                                            Node basicGraphPattern = triplesSameSubjectPath.jjtGetParent();
                                                            if (basicGraphPattern instanceof ASTBasicGraphPattern && basicGraphPattern.jjtGetNumChildren() == 1) {
                                                                Node graphPatternGroup = basicGraphPattern.jjtGetParent();
                                                                if (graphPatternGroup instanceof ASTGraphPatternGroup && graphPatternGroup.jjtGetNumChildren() == 1) {
                                                                    Node serviceGraphPattern = graphPatternGroup.jjtGetParent();
                                                                    if (serviceGraphPattern instanceof ASTServiceGraphPattern && serviceGraphPattern.jjtGetNumChildren() == 2) {
                                                                        // Final check for wikibase:label
                                                                        Node labelIRI = serviceGraphPattern.jjtGetChild(0);
                                                                        if (labelIRI instanceof ASTIRI && labelIRI.jjtGetNumChildren() == 0) {
                                                                            ASTIRI labelASTIRI = (ASTIRI) labelIRI;
                                                                            if (labelASTIRI.getValue().equals("http://wikiba.se/ontology#label")) {
                                                                                return super.visit(string, data);
                                                                            }
                                                                        }
                                                                    }
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                String datatype = "";
                // Find the datatype for this.
                Node parent = string.jjtGetParent();
                if (parent instanceof ASTRDFLiteral) {
                    if (parent.jjtGetNumChildren() > 1) {
                        Node sibling = parent.jjtGetChild(1);
                        if (sibling instanceof ASTIRI) {
                            datatype = ((ASTIRI) sibling).getValue();
                            if (Anonymizer.whitelistedDatatypes.contains(datatype)) {
                                return super.visit(string, data);
                            }
                        }
                    }
                }
                if (datatype.equals("http://www.opengis.net/ont/geosparql#wktLiteral")) {
                    Matcher matcher = OpenRDFQueryHandler.POINT_REGEX.matcher(string.getValue());
                    if (matcher.find()) {
                        String firstValue = matcher.group(1);
                        String secondValue = matcher.group(2);
                        float first = Float.valueOf(firstValue);
                        float second = Float.valueOf(secondValue);
                        int roundFirst = (Integer) Math.round(first);
                        int roundSecond = (Integer) Math.round(second);
                        string.setValue("POINT(" + roundFirst + " " + roundSecond + ")");
                        return super.visit(string, data);
                    }
                }
                if (!strings.containsKey(string.getValue())) {
                    strings.put(string.getValue(), strings.keySet().size() + 1);
                }
                string.setValue("string" + strings.get(string.getValue()));
                return super.visit(string, data);
            }
        }, null);
    } catch (TokenMgrError | VisitorException e) {
        throw new MalformedQueryException(e);
    }
}
Also used : HashMap(java.util.HashMap) Matcher(java.util.regex.Matcher) MalformedQueryException(org.openrdf.query.MalformedQueryException)

Example 2 with MalformedQueryException

use of org.openrdf.query.MalformedQueryException in project QueryAnalysis by Wikidata.

the class StandardizingSPARQLParser method normalize.

/**
 * Normalizes a query by:
 * - replacing all variables with var1, var2 ...
 * - replacing all strings with string1, string2 ...
 * - replacing all limits with 1, 2 ...
 * - replacing all numeric literals with 1, 2 ...
 * - replacing all rdfLiterals with rdfLiteral1, rdfLiteral2 ...
 *
 * @param queryContainer The query to be normalized
 * @throws MalformedQueryException if the query was malformed
 */
public static void normalize(ASTQueryContainer queryContainer) throws MalformedQueryException {
    final Map<String, Integer> variables = new HashMap<>();
    final Map<String, Integer> strings = new HashMap<>();
    final Map<Long, Long> limits = new HashMap<>();
    final Map<Long, Long> offsets = new HashMap<>();
    final Map<String, Integer> numericLiterals = new HashMap<>();
    final Map<String, Integer> rdfLiterals = new HashMap<>();
    try {
        queryContainer.jjtAccept(new ASTVisitorBase() {

            public Object visit(ASTVar variable, Object data) throws VisitorException {
                if (!variables.containsKey(variable.getName())) {
                    variables.put(variable.getName(), variables.keySet().size() + 1);
                }
                variable.setName("var" + variables.get(variable.getName()));
                return super.visit(variable, data);
            }

            @Override
            public Object visit(ASTString string, Object data) throws VisitorException {
                if (!strings.containsKey(string.getValue())) {
                    strings.put(string.getValue(), strings.keySet().size() + 1);
                }
                string.setValue("string" + strings.get(string.getValue()));
                return super.visit(string, data);
            }

            @Override
            public Object visit(ASTLimit limit, Object data) throws VisitorException {
                if (!limits.containsKey(limit.getValue())) {
                    limits.put(limit.getValue(), (long) (limits.keySet().size() + 1));
                }
                limit.setValue(limits.get(limit.getValue()));
                return super.visit(limit, data);
            }

            @Override
            public Object visit(ASTOffset offset, Object data) throws VisitorException {
                if (!offsets.containsKey(offset.getValue())) {
                    offsets.put(offset.getValue(), (long) (offsets.keySet().size() + 1));
                }
                offset.setValue(offsets.get(offset.getValue()));
                return super.visit(offset, data);
            }

            @Override
            public Object visit(ASTNumericLiteral numericLiteral, Object data) throws VisitorException {
                if (!numericLiterals.containsKey(numericLiteral.getValue())) {
                    numericLiterals.put(numericLiteral.getValue(), numericLiterals.keySet().size() + 1);
                }
                numericLiteral.setValue(numericLiterals.get(numericLiteral.getValue()).toString());
                return super.visit(numericLiteral, data);
            }

            @Override
            public Object visit(ASTRDFLiteral rdfLiteral, Object data) throws VisitorException {
                if (!rdfLiterals.containsKey(rdfLiteral.getLang())) {
                    rdfLiterals.put(rdfLiteral.getLang(), rdfLiterals.keySet().size() + 1);
                }
                rdfLiteral.setLang("language-" + rdfLiterals.get(rdfLiteral.getLang()).toString());
                return super.visit(rdfLiteral, data);
            }
        }, null);
    } catch (TokenMgrError | VisitorException e) {
        throw new MalformedQueryException(e);
    }
}
Also used : HashMap(java.util.HashMap) MalformedQueryException(org.openrdf.query.MalformedQueryException)

Example 3 with MalformedQueryException

use of org.openrdf.query.MalformedQueryException in project QueryAnalysis by Wikidata.

the class OutputHandlerAnonymizer method writeLine.

@Override
public void writeLine(String queryToAnalyze, Validity validityStatus, String userAgent, String timeStamp, long currentLine, int currentDay, String currentFile) {
    List<Object> line = new ArrayList<>();
    QueryHandler queryHandler = queryHandlerFactory.getQueryHandler(validityStatus, currentLine, currentDay, queryToAnalyze, userAgent, currentFile, threadNumber);
    if (queryHandler.getValidityStatus().equals(QueryHandler.Validity.VALID)) {
        ASTQueryContainer qc;
        try {
            qc = SyntaxTreeBuilder.parseQuery(queryToAnalyze);
        } catch (TokenMgrError | ParseException e) {
            logger.error("Failed to parse the query although it was found valid - this is a serious bug.", e);
            return;
        }
        try {
            StandardizingSPARQLParser.debug(qc);
            StringEscapesProcessor.process(qc);
            BaseDeclProcessor.process(qc, OpenRDFQueryHandler.BASE_URI);
            StandardizingPrefixDeclProcessor.process(qc);
            StandardizingSPARQLParser.anonymize(qc);
        } catch (MalformedQueryException e) {
            logger.error("Failed to debug or anonymize query. " + queryToAnalyze);
        }
        String renderedQueryString;
        try {
            renderedQueryString = qc.jjtAccept(new RenderVisitor(), "").toString();
        } catch (VisitorException e) {
            logger.error("Failed to render the query.", e);
            return;
        }
        try {
            new StandardizingSPARQLParser().parseQuery(renderedQueryString, OpenRDFQueryHandler.BASE_URI);
        } catch (MalformedQueryException e) {
            String queryName = this.threadNumber + "_" + this.failedQueriesNumber + ".query";
            logger.error("Anonymized query was not valid anymore. " + queryName, e);
            try (BufferedWriter bw = new BufferedWriter(new FileWriter(this.outputFile.substring(0, this.outputFile.lastIndexOf("/")) + "failedQueriesFolder/" + queryName))) {
                bw.write(queryToAnalyze);
                this.failedQueriesNumber++;
            } catch (IOException i) {
                logger.error("Could not write the failed query to failed queries folder.", i);
            }
            return;
        } catch (ClassCastException e) {
            logger.error("Unexpected class cast exception after anonymization.", e);
        }
        String encodedRenderedQueryString;
        try {
            encodedRenderedQueryString = URLEncoder.encode(renderedQueryString, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            logger.error("Apparently this system does not support UTF-8. Please fix this before running the program again.");
            return;
        }
        line.add("?query=" + encodedRenderedQueryString);
        line.add(timeStamp);
        if (queryHandler.getSourceCategory().equals(QueryHandler.SourceCategory.USER)) {
            line.add("organic");
        } else {
            line.add("robotic");
        }
        if (QueryHandler.isOrganicUserAgent(queryHandler.getUserAgent())) {
            line.add("browser");
        } else {
            line.add(queryHandler.getUserAgent());
        }
        writer.writeRow(line);
    }
}
Also used : OpenRDFQueryHandler(query.OpenRDFQueryHandler) QueryHandler(query.QueryHandler) ArrayList(java.util.ArrayList) RenderVisitor(openrdffork.RenderVisitor) StandardizingSPARQLParser(openrdffork.StandardizingSPARQLParser) MalformedQueryException(org.openrdf.query.MalformedQueryException)

Example 4 with MalformedQueryException

use of org.openrdf.query.MalformedQueryException in project QueryAnalysis by Wikidata.

the class Test method main.

public static void main(String[] args) {
    Main.loadStandardPrefixes();
    Anonymizer.loadWhitelistDatatypes();
    int worked = 0;
    int failed = 0;
    int failedToParse = 0;
    try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(Paths.get("/home/adrian/workspace/java/months/exampleQueries/"))) {
        for (Path filePath : directoryStream) {
            if (Files.isRegularFile(filePath)) {
                String queryString = new String(readAllBytes(filePath));
                try {
                    ParsedQuery parsedQuery = new StandardizingSPARQLParser().parseQuery(queryString, OpenRDFQueryHandler.BASE_URI);
                } catch (MalformedQueryException e) {
                    failedToParse++;
                    continue;
                }
                ASTQueryContainer qc;
                try {
                    qc = SyntaxTreeBuilder.parseQuery(queryString);
                } catch (TokenMgrError | ParseException e) {
                    // e.printStackTrace();
                    continue;
                }
                try {
                    StandardizingSPARQLParser.debug(qc);
                    StringEscapesProcessor.process(qc);
                    BaseDeclProcessor.process(qc, OpenRDFQueryHandler.BASE_URI);
                    StandardizingPrefixDeclProcessor.process(qc);
                    StandardizingSPARQLParser.anonymize(qc);
                } catch (MalformedQueryException e) {
                    System.out.println("Failed to debug or anonymize query. " + queryString);
                }
                String renderedQueryString;
                try {
                    renderedQueryString = qc.jjtAccept(new RenderVisitor(), "").toString();
                // System.out.println(renderedQueryString);
                } catch (VisitorException e) {
                    // e.printStackTrace();
                    continue;
                }
                try {
                    ParsedQuery parsedQuery = new StandardizingSPARQLParser().parseQuery(renderedQueryString, OpenRDFQueryHandler.BASE_URI);
                    worked++;
                } catch (MalformedQueryException | ClassCastException e) {
                    failed++;
                    System.out.println("-----------------------------------");
                    System.out.println(filePath);
                    System.out.println(queryString);
                    continue;
                }
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    System.out.println("Worked: " + worked + " Failed: " + failed + " Failed to Parse: " + failedToParse);
}
Also used : Path(java.nio.file.Path) ParsedQuery(org.openrdf.query.parser.ParsedQuery) RenderVisitor(openrdffork.RenderVisitor) IOException(java.io.IOException) StandardizingSPARQLParser(openrdffork.StandardizingSPARQLParser) MalformedQueryException(org.openrdf.query.MalformedQueryException)

Example 5 with MalformedQueryException

use of org.openrdf.query.MalformedQueryException in project QueryAnalysis by Wikidata.

the class OpenRDFQueryHandler method computeNonSimplePropertyPaths.

@Override
protected final void computeNonSimplePropertyPaths() {
    if (getValidityStatus() != QueryHandler.Validity.VALID) {
        this.nonSimplePropertyPaths = getValidityStatus().toString();
        return;
    }
    try {
        ASTQueryContainer qc = new StandardizingSPARQLParser().getASTQueryContainerPrefixesProcessed(getQueryString(), BASE_URI);
        Set<String> nonSimplePropertyPaths = new NonSimplePropertyPathVisitor().getNonSimplePropertyPaths(qc);
        this.nonSimplePropertyPaths = this.computeAnyIDString(nonSimplePropertyPaths);
        if (this.nonSimplePropertyPaths.equals("")) {
            this.nonSimplePropertyPaths = "NONE";
        }
    } catch (VisitorException | MalformedQueryException e) {
        this.nonSimplePropertyPaths = "INTERNAL_ERROR";
        logger.error("Unexpected error while calculating non-simple property paths.", e);
    }
}
Also used : NonSimplePropertyPathVisitor(query.statistics.NonSimplePropertyPathVisitor) StandardizingSPARQLParser(openrdffork.StandardizingSPARQLParser) MalformedQueryException(org.openrdf.query.MalformedQueryException)

Aggregations

MalformedQueryException (org.openrdf.query.MalformedQueryException)49 QueryEvaluationException (org.openrdf.query.QueryEvaluationException)15 RepositoryException (org.openrdf.repository.RepositoryException)14 ParsedQuery (org.openrdf.query.parser.ParsedQuery)12 SPARQLParser (org.openrdf.query.parser.sparql.SPARQLParser)11 TupleQuery (org.openrdf.query.TupleQuery)10 SailException (org.openrdf.sail.SailException)9 IOException (java.io.IOException)8 PCJStorageException (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException)8 ArrayList (java.util.ArrayList)7 RyaClientException (org.apache.rya.api.client.RyaClientException)7 PrecomputedJoinStorage (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage)7 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)6 RyaDAOException (org.apache.rya.api.persist.RyaDAOException)6 UnsupportedQueryException (org.apache.rya.indexing.pcj.fluo.app.query.UnsupportedQueryException)6 PcjException (org.apache.rya.indexing.pcj.storage.PcjException)6 PcjMetadata (org.apache.rya.indexing.pcj.storage.PcjMetadata)6 TupleQueryResultHandlerException (org.openrdf.query.TupleQueryResultHandlerException)6 TupleExpr (org.openrdf.query.algebra.TupleExpr)6 List (java.util.List)5