Search in sources :

Example 1 with OpenRDFQueryHandler

use of query.OpenRDFQueryHandler in project QueryAnalysis by Wikidata.

the class Main method loadPreBuildQueryTypes.

/**
 * Loads all pre-build query types.
 */
public static void loadPreBuildQueryTypes() {
    try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(Paths.get("preBuildQueryTypeFiles"))) {
        for (Path filePath : directoryStream) {
            if (Files.isRegularFile(filePath)) {
                if (filePath.toString().endsWith(".preBuildQueryType")) {
                    String queryString = new String(readAllBytes(filePath));
                    OpenRDFQueryHandler queryHandler = new OpenRDFQueryHandler(QueryHandler.Validity.DEFAULT, -1L, -1, queryString, "preBuildQueryTypes", "", -1);
                    if (queryHandler.getValidityStatus() != QueryHandler.Validity.VALID) {
                        logger.info("The Pre-build query " + filePath + " is no valid SPARQL");
                        continue;
                    }
                    ParsedQuery normalizedPreBuildQuery = queryHandler.getNormalizedQuery();
                    String queryTypeName = filePath.toString().substring(filePath.toString().lastIndexOf("/") + 1, filePath.toString().lastIndexOf("."));
                    if (normalizedPreBuildQuery != null) {
                        String queryDump = normalizedPreBuildQuery.getTupleExpr().toString();
                        byte[] md5 = DigestUtils.md5(queryDump);
                        int index = Math.floorMod(queryDump.hashCode(), numberOfQueryTypeDiskMaps);
                        if (queryTypes[index].containsKey(md5)) {
                            String existingName = queryTypes[index].get(md5);
                            if (!existingName.equals(queryTypeName)) {
                                logger.info(queryTypes[index].get(md5) + " is duplicate of " + queryTypeName);
                            }
                        }
                        queryTypes[index].put(md5, queryTypeName);
                    } else {
                        logger.info("Pre-build query " + queryTypeName + " could not be parsed.");
                    }
                }
                if (filePath.toString().endsWith(".tsv")) {
                    TsvParserSettings parserSettings = new TsvParserSettings();
                    parserSettings.setLineSeparatorDetectionEnabled(true);
                    parserSettings.setHeaderExtractionEnabled(true);
                    parserSettings.setSkipEmptyLines(true);
                    parserSettings.setReadInputOnSeparateThread(true);
                    ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {

                        @Override
                        public void rowProcessed(Object[] row, ParsingContext parsingContext) {
                            if (row.length <= 1) {
                                logger.warn("Ignoring line without tab while parsing.");
                                return;
                            }
                            if (row.length == 5) {
                                queryTypeToToolMapping.put(new Tuple2<>(row[0].toString(), row[1].toString()), new Tuple2<>(row[2].toString(), row[3].toString()));
                                return;
                            }
                            logger.warn("Line with row length " + row.length + " found. Is the formatting of toolMapping.tsv correct?");
                            return;
                        }
                    };
                    parserSettings.setProcessor(rowProcessor);
                    TsvParser parser = new TsvParser(parserSettings);
                    parser.parse(filePath.toFile());
                }
            }
        }
    } catch (IOException e) {
        logger.error("Could not read from directory inputData/queryType/premadeQueryTypeFiles", e);
    }
}
Also used : Path(java.nio.file.Path) ParsingContext(com.univocity.parsers.common.ParsingContext) ParsedQuery(org.openrdf.query.parser.ParsedQuery) OpenRDFQueryHandler(query.OpenRDFQueryHandler) TsvParserSettings(com.univocity.parsers.tsv.TsvParserSettings) ObjectRowProcessor(com.univocity.parsers.common.processor.ObjectRowProcessor) TsvParser(com.univocity.parsers.tsv.TsvParser)

Example 2 with OpenRDFQueryHandler

use of query.OpenRDFQueryHandler in project QueryAnalysis by Wikidata.

the class Main method getExampleQueries.

/**
 * Reads the example queries from https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples.
 */
private static void getExampleQueries() {
    Document doc;
    Connection connection = Jsoup.connect("http://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples").header("Accept-Encoding", "gzip, deflate").userAgent("github.com/Wikidata/QueryAnalysis").maxBodySize(0);
    try {
        doc = connection.get();
    } catch (IOException e) {
        try {
            logger.warn("While trying to download the example queries could not connect directloy to wikidata.org, trying via a proxy now.");
            doc = connection.proxy("webproxy.eqiad.wmnet", 8080).get();
        } catch (IOException e2) {
            logger.error("Could not even connect to wikidata.org via the proxy.", e2);
            return;
        }
    }
    doc.select("span.lineno").remove();
    Elements links = doc.select("pre");
    for (Element link : links) {
        Element previous = link.parent();
        String name = null;
        while (name == null) {
            if (previous.nodeName().matches("h[1-6]")) {
                name = previous.child(0).text();
                break;
            }
            if (previous.previousElementSibling() != null) {
                previous = previous.previousElementSibling();
            } else if (previous.parent() != null) {
                previous = previous.parent();
            } else {
                break;
            }
        }
        if (name != null) {
            String query = link.text();
            exampleQueriesString.put(query, name);
            OpenRDFQueryHandler queryHandler = new OpenRDFQueryHandler(QueryHandler.Validity.DEFAULT, -1L, -1, query, "exampleQueries", "", -1);
            if (queryHandler.getValidityStatus() != QueryHandler.Validity.VALID) {
                logger.warn("The example query " + name + " is no valid SPARQL.");
            } else {
                exampleQueriesTupleExpr.put(new TupleExprWrapper(queryHandler.getParsedQuery().getTupleExpr()), name);
            }
        } else {
            logger.error("Could not find header to: " + link.text());
        }
    }
}
Also used : OpenRDFQueryHandler(query.OpenRDFQueryHandler) TupleExprWrapper(openrdffork.TupleExprWrapper) Element(org.jsoup.nodes.Element) Connection(org.jsoup.Connection) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Aggregations

OpenRDFQueryHandler (query.OpenRDFQueryHandler)2 ParsingContext (com.univocity.parsers.common.ParsingContext)1 ObjectRowProcessor (com.univocity.parsers.common.processor.ObjectRowProcessor)1 TsvParser (com.univocity.parsers.tsv.TsvParser)1 TsvParserSettings (com.univocity.parsers.tsv.TsvParserSettings)1 Path (java.nio.file.Path)1 TupleExprWrapper (openrdffork.TupleExprWrapper)1 Connection (org.jsoup.Connection)1 Document (org.jsoup.nodes.Document)1 Element (org.jsoup.nodes.Element)1 Elements (org.jsoup.select.Elements)1 ParsedQuery (org.openrdf.query.parser.ParsedQuery)1