use of openrdffork.StandardizingSPARQLParser in project QueryAnalysis by Wikidata.
the class OutputHandlerAnonymizer method writeLine.
@Override
public void writeLine(String queryToAnalyze, Validity validityStatus, String userAgent, String timeStamp, long currentLine, int currentDay, String currentFile) {
List<Object> line = new ArrayList<>();
QueryHandler queryHandler = queryHandlerFactory.getQueryHandler(validityStatus, currentLine, currentDay, queryToAnalyze, userAgent, currentFile, threadNumber);
if (queryHandler.getValidityStatus().equals(QueryHandler.Validity.VALID)) {
ASTQueryContainer qc;
try {
qc = SyntaxTreeBuilder.parseQuery(queryToAnalyze);
} catch (TokenMgrError | ParseException e) {
logger.error("Failed to parse the query although it was found valid - this is a serious bug.", e);
return;
}
try {
StandardizingSPARQLParser.debug(qc);
StringEscapesProcessor.process(qc);
BaseDeclProcessor.process(qc, OpenRDFQueryHandler.BASE_URI);
StandardizingPrefixDeclProcessor.process(qc);
StandardizingSPARQLParser.anonymize(qc);
} catch (MalformedQueryException e) {
logger.error("Failed to debug or anonymize query. " + queryToAnalyze);
}
String renderedQueryString;
try {
renderedQueryString = qc.jjtAccept(new RenderVisitor(), "").toString();
} catch (VisitorException e) {
logger.error("Failed to render the query.", e);
return;
}
try {
new StandardizingSPARQLParser().parseQuery(renderedQueryString, OpenRDFQueryHandler.BASE_URI);
} catch (MalformedQueryException e) {
String queryName = this.threadNumber + "_" + this.failedQueriesNumber + ".query";
logger.error("Anonymized query was not valid anymore. " + queryName, e);
try (BufferedWriter bw = new BufferedWriter(new FileWriter(this.outputFile.substring(0, this.outputFile.lastIndexOf("/")) + "failedQueriesFolder/" + queryName))) {
bw.write(queryToAnalyze);
this.failedQueriesNumber++;
} catch (IOException i) {
logger.error("Could not write the failed query to failed queries folder.", i);
}
return;
} catch (ClassCastException e) {
logger.error("Unexpected class cast exception after anonymization.", e);
}
String encodedRenderedQueryString;
try {
encodedRenderedQueryString = URLEncoder.encode(renderedQueryString, "UTF-8");
} catch (UnsupportedEncodingException e) {
logger.error("Apparently this system does not support UTF-8. Please fix this before running the program again.");
return;
}
line.add("?query=" + encodedRenderedQueryString);
line.add(timeStamp);
if (queryHandler.getSourceCategory().equals(QueryHandler.SourceCategory.USER)) {
line.add("organic");
} else {
line.add("robotic");
}
if (QueryHandler.isOrganicUserAgent(queryHandler.getUserAgent())) {
line.add("browser");
} else {
line.add(queryHandler.getUserAgent());
}
writer.writeRow(line);
}
}
use of openrdffork.StandardizingSPARQLParser in project QueryAnalysis by Wikidata.
the class Test method main.
public static void main(String[] args) {
Main.loadStandardPrefixes();
Anonymizer.loadWhitelistDatatypes();
int worked = 0;
int failed = 0;
int failedToParse = 0;
try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(Paths.get("/home/adrian/workspace/java/months/exampleQueries/"))) {
for (Path filePath : directoryStream) {
if (Files.isRegularFile(filePath)) {
String queryString = new String(readAllBytes(filePath));
try {
ParsedQuery parsedQuery = new StandardizingSPARQLParser().parseQuery(queryString, OpenRDFQueryHandler.BASE_URI);
} catch (MalformedQueryException e) {
failedToParse++;
continue;
}
ASTQueryContainer qc;
try {
qc = SyntaxTreeBuilder.parseQuery(queryString);
} catch (TokenMgrError | ParseException e) {
// e.printStackTrace();
continue;
}
try {
StandardizingSPARQLParser.debug(qc);
StringEscapesProcessor.process(qc);
BaseDeclProcessor.process(qc, OpenRDFQueryHandler.BASE_URI);
StandardizingPrefixDeclProcessor.process(qc);
StandardizingSPARQLParser.anonymize(qc);
} catch (MalformedQueryException e) {
System.out.println("Failed to debug or anonymize query. " + queryString);
}
String renderedQueryString;
try {
renderedQueryString = qc.jjtAccept(new RenderVisitor(), "").toString();
// System.out.println(renderedQueryString);
} catch (VisitorException e) {
// e.printStackTrace();
continue;
}
try {
ParsedQuery parsedQuery = new StandardizingSPARQLParser().parseQuery(renderedQueryString, OpenRDFQueryHandler.BASE_URI);
worked++;
} catch (MalformedQueryException | ClassCastException e) {
failed++;
System.out.println("-----------------------------------");
System.out.println(filePath);
System.out.println(queryString);
continue;
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("Worked: " + worked + " Failed: " + failed + " Failed to Parse: " + failedToParse);
}
use of openrdffork.StandardizingSPARQLParser in project QueryAnalysis by Wikidata.
the class OpenRDFQueryHandler method normalize.
/**
* Normalizes a given query by:
* - replacing all wikidata uris at subject and object positions with sub1, sub2 ... (obj1, obj2 ...).
*
* @param queryToNormalize the query to be normalized
* @return the normalized query
* @throws MalformedQueryException If the query was malformed (would be a bug since the input was a parsed query)
* @throws VisitorException If there is an error during normalization
*/
private ParsedQuery normalize(ParsedQuery queryToNormalize) throws MalformedQueryException, VisitorException {
ParsedQuery normalizedQuery = new StandardizingSPARQLParser().parseNormalizeQuery(queryToNormalize.getSourceString(), BASE_URI);
final Map<String, Integer> valueConstants = new HashMap<>();
final Set<String> subjectsAndObjects = new HashSet<String>();
final Set<String> predicates = new HashSet<String>();
final Set<String> predicateVariables = new HashSet<String>();
normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {
@Override
public void meet(StatementPattern statementPattern) throws VisitorException {
Var predicate = statementPattern.getPredicateVar();
if (!predicate.isConstant() && !predicate.isAnonymous()) {
predicateVariables.add(predicate.getName());
}
meetNode(statementPattern);
}
});
normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {
@Override
public void meet(ExtensionElem extensionElem) throws VisitorException {
if (!predicateVariables.contains(extensionElem.getName())) {
extensionElem.setExpr(normalizeValueExprHelper(extensionElem.getExpr(), valueConstants));
}
meetNode(extensionElem);
}
});
normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {
@Override
public void meet(BindingSetAssignment bindingSetAssignment) throws VisitorException {
List<BindingSet> bindingSets = new ArrayList<BindingSet>();
for (BindingSet bindingSet : bindingSetAssignment.getBindingSets()) {
List<String> names = new ArrayList<String>();
List<Value> values = new ArrayList<Value>();
for (Binding binding : bindingSet) {
String name = binding.getName();
if (!predicateVariables.contains(name)) {
names.add(name);
values.add(normalizeValueHelper(binding.getValue(), valueConstants));
} else {
names.add(name);
values.add(binding.getValue());
}
}
bindingSets.add(new ListBindingSet(names, values));
}
bindingSetAssignment.setBindingSets(bindingSets);
meetNode(bindingSetAssignment);
}
});
normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {
@Override
public void meet(StatementPattern statementPattern) throws VisitorException {
statementPattern.setSubjectVar(normalizeSubjectsAndObjectsHelper(statementPattern.getSubjectVar(), valueConstants, subjectsAndObjects));
statementPattern.setObjectVar(normalizeSubjectsAndObjectsHelper(statementPattern.getObjectVar(), valueConstants, subjectsAndObjects));
try {
String uri = getURI(statementPattern.getPredicateVar());
predicates.add(uri);
} catch (NoURIException e) {
// NoURIException is used to notify us that there is no URI in this predicate, so we just don't add it.
}
// checkForVariable(statementPattern.getPredicateVar());
meetNode(statementPattern);
}
});
normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {
@Override
public void meet(ArbitraryLengthPath arbitraryLengthPath) throws VisitorException {
arbitraryLengthPath.setSubjectVar(normalizeSubjectsAndObjectsHelper(arbitraryLengthPath.getSubjectVar(), valueConstants, subjectsAndObjects));
arbitraryLengthPath.setObjectVar(normalizeSubjectsAndObjectsHelper(arbitraryLengthPath.getObjectVar(), valueConstants, subjectsAndObjects));
meetNode(arbitraryLengthPath);
}
});
normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {
@Override
public void meet(Compare compare) throws VisitorException {
compare.setLeftArg(normalizeValueExprHelper(compare.getLeftArg(), valueConstants));
compare.setRightArg(normalizeValueExprHelper(compare.getRightArg(), valueConstants));
meetBinaryValueOperator(compare);
}
});
normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {
@Override
public void meet(IsLiteral isLiteral) throws VisitorException {
isLiteral.setArg(normalizeValueExprHelper(isLiteral.getArg(), valueConstants));
meetUnaryValueOperator(isLiteral);
}
});
normalizedQuery.getTupleExpr().visit(new QueryModelVisitorBase<VisitorException>() {
@Override
public void meet(StatementPattern statementPattern) throws VisitorException {
statementPattern.setSubjectVar(normalizeNonConstAnonymousHelper(statementPattern.getSubjectVar(), valueConstants));
statementPattern.setObjectVar(normalizeNonConstAnonymousHelper(statementPattern.getObjectVar(), valueConstants));
meetNode(statementPattern);
}
});
this.setqIDs(subjectsAndObjects);
this.setpIDs(predicates);
return normalizedQuery;
}
use of openrdffork.StandardizingSPARQLParser in project QueryAnalysis by Wikidata.
the class OpenRDFQueryHandler method computeNonSimplePropertyPaths.
@Override
protected final void computeNonSimplePropertyPaths() {
if (getValidityStatus() != QueryHandler.Validity.VALID) {
this.nonSimplePropertyPaths = getValidityStatus().toString();
return;
}
try {
ASTQueryContainer qc = new StandardizingSPARQLParser().getASTQueryContainerPrefixesProcessed(getQueryString(), BASE_URI);
Set<String> nonSimplePropertyPaths = new NonSimplePropertyPathVisitor().getNonSimplePropertyPaths(qc);
this.nonSimplePropertyPaths = this.computeAnyIDString(nonSimplePropertyPaths);
if (this.nonSimplePropertyPaths.equals("")) {
this.nonSimplePropertyPaths = "NONE";
}
} catch (VisitorException | MalformedQueryException e) {
this.nonSimplePropertyPaths = "INTERNAL_ERROR";
logger.error("Unexpected error while calculating non-simple property paths.", e);
}
}
use of openrdffork.StandardizingSPARQLParser in project QueryAnalysis by Wikidata.
the class OpenRDFQueryHandler method computeSparqlStatistics.
@Override
protected void computeSparqlStatistics() {
if (getValidityStatus() != QueryHandler.Validity.VALID) {
this.sparqlStatistics = new HashMap<>();
return;
}
try {
ASTQueryContainer queryContainer = new StandardizingSPARQLParser().getDebuggedASTQueryContainer(getQueryString(), BASE_URI);
QueryContainerSparqlStatisticsCollector queryContainerSparqlStatisticsCollector = new QueryContainerSparqlStatisticsCollector();
queryContainer.jjtAccept(queryContainerSparqlStatisticsCollector, null);
this.sparqlStatistics = queryContainerSparqlStatisticsCollector.getStatistics();
TupleExprSparqlStatisticsCollector tupleExprSparqlStatisticsCollector = new TupleExprSparqlStatisticsCollector();
this.query.getTupleExpr().visitChildren(tupleExprSparqlStatisticsCollector);
this.query.getTupleExpr().visit(tupleExprSparqlStatisticsCollector);
this.sparqlStatistics.putAll(tupleExprSparqlStatisticsCollector.getStatistics());
this.primaryLanguage = tupleExprSparqlStatisticsCollector.getPrimaryLanguage();
} catch (TokenMgrError | MalformedQueryException e) {
logger.error("Failed to parse the query although it was found valid - this is a serious bug.", e);
} catch (VisitorException e) {
logger.error("Failed to calculate the SPARQL Keyword Statistics. Error occured while visiting the query.", e);
} catch (Exception e) {
logger.error("An unknown error occured while computing the sparql statistics: ", e);
}
}
Aggregations