use of org.openrdf.query.MalformedQueryException in project QueryAnalysis by Wikidata.
the class StandardizingSPARQLParser method anonymize.
/**
* Normalizes a query by:
* - replacing all variables with var1, var2 ...
* - replacing all strings with string1, string2 ...
* - replacing all rdfLiterals with rdfLiteral1, rdfLiteral2 ...
*
* @param queryContainer The query to be anonymized
* @throws MalformedQueryException if the query was malformed
*/
public static void anonymize(ASTQueryContainer queryContainer) throws MalformedQueryException {
final Map<String, Integer> variables = new HashMap<>();
final Map<String, Integer> strings = new HashMap<>();
try {
queryContainer.jjtAccept(new ASTVisitorBase() {
public Object visit(ASTVar variable, Object data) throws VisitorException {
String label = "Label";
if (variable.getName().toLowerCase().endsWith(label.toLowerCase())) {
String variableName = variable.getName().substring(0, variable.getName().length() - label.length());
variable.setName(replacementName(variableName) + label);
} else {
variable.setName(replacementName(variable.getName()));
}
return super.visit(variable, data);
}
private String replacementName(String variableName) {
if (!variables.containsKey(variableName)) {
variables.put(variableName, variables.keySet().size() + 1);
}
return "var" + variables.get(variableName);
}
@Override
public Object visit(ASTString string, Object data) throws VisitorException {
if (string.getValue().length() < Anonymizer.unanonymizedStringLength) {
return super.visit(string, data);
}
if (Anonymizer.whitelistedStrings.contains(string.getValue())) {
return super.visit(string, data);
}
if (NumberUtils.isNumber(string.getValue())) {
return super.visit(string, data);
}
// Determine if this is part of a service call
Node rdfLiteral = string.jjtGetParent();
if (rdfLiteral instanceof ASTRDFLiteral && rdfLiteral.jjtGetNumChildren() == 1) {
Node objectList = rdfLiteral.jjtGetParent();
if (objectList instanceof ASTObjectList && objectList.jjtGetNumChildren() == 1) {
Node propertyListPath = objectList.jjtGetParent();
if (propertyListPath instanceof ASTPropertyListPath && propertyListPath.jjtGetNumChildren() == 2) {
// Checking the path towards wikibase:language
Node pathAlternative = propertyListPath.jjtGetChild(0);
if (pathAlternative instanceof ASTPathAlternative && pathAlternative.jjtGetNumChildren() == 1) {
Node pathSequence = pathAlternative.jjtGetChild(0);
if (pathSequence instanceof ASTPathSequence && pathSequence.jjtGetNumChildren() == 1) {
Node pathElt = pathSequence.jjtGetChild(0);
if (pathElt instanceof ASTPathElt && pathElt.jjtGetNumChildren() == 1) {
Node languageIRI = pathElt.jjtGetChild(0);
if (languageIRI instanceof ASTIRI && languageIRI.jjtGetNumChildren() == 0) {
ASTIRI languageASTIRI = (ASTIRI) languageIRI;
if (languageASTIRI.getValue().equals("http://wikiba.se/ontology#language")) {
// Checking the path towards ServiceGraphPattern
Node triplesSameSubjectPath = propertyListPath.jjtGetParent();
if (triplesSameSubjectPath instanceof ASTTriplesSameSubjectPath && triplesSameSubjectPath.jjtGetNumChildren() == 2) {
// Small branch to serviceParam
Node serviceParamIRI = triplesSameSubjectPath.jjtGetChild(0);
if (serviceParamIRI instanceof ASTIRI && serviceParamIRI.jjtGetNumChildren() == 0) {
ASTIRI serviceParamASTIRI = (ASTIRI) serviceParamIRI;
if (serviceParamASTIRI.getValue().equals("http://www.bigdata.com/rdf#serviceParam")) {
// Further on with the path to ServiceGraphPattern
Node basicGraphPattern = triplesSameSubjectPath.jjtGetParent();
if (basicGraphPattern instanceof ASTBasicGraphPattern && basicGraphPattern.jjtGetNumChildren() == 1) {
Node graphPatternGroup = basicGraphPattern.jjtGetParent();
if (graphPatternGroup instanceof ASTGraphPatternGroup && graphPatternGroup.jjtGetNumChildren() == 1) {
Node serviceGraphPattern = graphPatternGroup.jjtGetParent();
if (serviceGraphPattern instanceof ASTServiceGraphPattern && serviceGraphPattern.jjtGetNumChildren() == 2) {
// Final check for wikibase:label
Node labelIRI = serviceGraphPattern.jjtGetChild(0);
if (labelIRI instanceof ASTIRI && labelIRI.jjtGetNumChildren() == 0) {
ASTIRI labelASTIRI = (ASTIRI) labelIRI;
if (labelASTIRI.getValue().equals("http://wikiba.se/ontology#label")) {
return super.visit(string, data);
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
String datatype = "";
// Find the datatype for this.
Node parent = string.jjtGetParent();
if (parent instanceof ASTRDFLiteral) {
if (parent.jjtGetNumChildren() > 1) {
Node sibling = parent.jjtGetChild(1);
if (sibling instanceof ASTIRI) {
datatype = ((ASTIRI) sibling).getValue();
if (Anonymizer.whitelistedDatatypes.contains(datatype)) {
return super.visit(string, data);
}
}
}
}
if (datatype.equals("http://www.opengis.net/ont/geosparql#wktLiteral")) {
Matcher matcher = OpenRDFQueryHandler.POINT_REGEX.matcher(string.getValue());
if (matcher.find()) {
String firstValue = matcher.group(1);
String secondValue = matcher.group(2);
float first = Float.valueOf(firstValue);
float second = Float.valueOf(secondValue);
int roundFirst = (Integer) Math.round(first);
int roundSecond = (Integer) Math.round(second);
string.setValue("POINT(" + roundFirst + " " + roundSecond + ")");
return super.visit(string, data);
}
}
if (!strings.containsKey(string.getValue())) {
strings.put(string.getValue(), strings.keySet().size() + 1);
}
string.setValue("string" + strings.get(string.getValue()));
return super.visit(string, data);
}
}, null);
} catch (TokenMgrError | VisitorException e) {
throw new MalformedQueryException(e);
}
}
use of org.openrdf.query.MalformedQueryException in project QueryAnalysis by Wikidata.
the class StandardizingSPARQLParser method normalize.
/**
* Normalizes a query by:
* - replacing all variables with var1, var2 ...
* - replacing all strings with string1, string2 ...
* - replacing all limits with 1, 2 ...
* - replacing all numeric literals with 1, 2 ...
* - replacing all rdfLiterals with rdfLiteral1, rdfLiteral2 ...
*
* @param queryContainer The query to be normalized
* @throws MalformedQueryException if the query was malformed
*/
public static void normalize(ASTQueryContainer queryContainer) throws MalformedQueryException {
final Map<String, Integer> variables = new HashMap<>();
final Map<String, Integer> strings = new HashMap<>();
final Map<Long, Long> limits = new HashMap<>();
final Map<Long, Long> offsets = new HashMap<>();
final Map<String, Integer> numericLiterals = new HashMap<>();
final Map<String, Integer> rdfLiterals = new HashMap<>();
try {
queryContainer.jjtAccept(new ASTVisitorBase() {
public Object visit(ASTVar variable, Object data) throws VisitorException {
if (!variables.containsKey(variable.getName())) {
variables.put(variable.getName(), variables.keySet().size() + 1);
}
variable.setName("var" + variables.get(variable.getName()));
return super.visit(variable, data);
}
@Override
public Object visit(ASTString string, Object data) throws VisitorException {
if (!strings.containsKey(string.getValue())) {
strings.put(string.getValue(), strings.keySet().size() + 1);
}
string.setValue("string" + strings.get(string.getValue()));
return super.visit(string, data);
}
@Override
public Object visit(ASTLimit limit, Object data) throws VisitorException {
if (!limits.containsKey(limit.getValue())) {
limits.put(limit.getValue(), (long) (limits.keySet().size() + 1));
}
limit.setValue(limits.get(limit.getValue()));
return super.visit(limit, data);
}
@Override
public Object visit(ASTOffset offset, Object data) throws VisitorException {
if (!offsets.containsKey(offset.getValue())) {
offsets.put(offset.getValue(), (long) (offsets.keySet().size() + 1));
}
offset.setValue(offsets.get(offset.getValue()));
return super.visit(offset, data);
}
@Override
public Object visit(ASTNumericLiteral numericLiteral, Object data) throws VisitorException {
if (!numericLiterals.containsKey(numericLiteral.getValue())) {
numericLiterals.put(numericLiteral.getValue(), numericLiterals.keySet().size() + 1);
}
numericLiteral.setValue(numericLiterals.get(numericLiteral.getValue()).toString());
return super.visit(numericLiteral, data);
}
@Override
public Object visit(ASTRDFLiteral rdfLiteral, Object data) throws VisitorException {
if (!rdfLiterals.containsKey(rdfLiteral.getLang())) {
rdfLiterals.put(rdfLiteral.getLang(), rdfLiterals.keySet().size() + 1);
}
rdfLiteral.setLang("language-" + rdfLiterals.get(rdfLiteral.getLang()).toString());
return super.visit(rdfLiteral, data);
}
}, null);
} catch (TokenMgrError | VisitorException e) {
throw new MalformedQueryException(e);
}
}
use of org.openrdf.query.MalformedQueryException in project QueryAnalysis by Wikidata.
the class OutputHandlerAnonymizer method writeLine.
@Override
public void writeLine(String queryToAnalyze, Validity validityStatus, String userAgent, String timeStamp, long currentLine, int currentDay, String currentFile) {
List<Object> line = new ArrayList<>();
QueryHandler queryHandler = queryHandlerFactory.getQueryHandler(validityStatus, currentLine, currentDay, queryToAnalyze, userAgent, currentFile, threadNumber);
if (queryHandler.getValidityStatus().equals(QueryHandler.Validity.VALID)) {
ASTQueryContainer qc;
try {
qc = SyntaxTreeBuilder.parseQuery(queryToAnalyze);
} catch (TokenMgrError | ParseException e) {
logger.error("Failed to parse the query although it was found valid - this is a serious bug.", e);
return;
}
try {
StandardizingSPARQLParser.debug(qc);
StringEscapesProcessor.process(qc);
BaseDeclProcessor.process(qc, OpenRDFQueryHandler.BASE_URI);
StandardizingPrefixDeclProcessor.process(qc);
StandardizingSPARQLParser.anonymize(qc);
} catch (MalformedQueryException e) {
logger.error("Failed to debug or anonymize query. " + queryToAnalyze);
}
String renderedQueryString;
try {
renderedQueryString = qc.jjtAccept(new RenderVisitor(), "").toString();
} catch (VisitorException e) {
logger.error("Failed to render the query.", e);
return;
}
try {
new StandardizingSPARQLParser().parseQuery(renderedQueryString, OpenRDFQueryHandler.BASE_URI);
} catch (MalformedQueryException e) {
String queryName = this.threadNumber + "_" + this.failedQueriesNumber + ".query";
logger.error("Anonymized query was not valid anymore. " + queryName, e);
try (BufferedWriter bw = new BufferedWriter(new FileWriter(this.outputFile.substring(0, this.outputFile.lastIndexOf("/")) + "failedQueriesFolder/" + queryName))) {
bw.write(queryToAnalyze);
this.failedQueriesNumber++;
} catch (IOException i) {
logger.error("Could not write the failed query to failed queries folder.", i);
}
return;
} catch (ClassCastException e) {
logger.error("Unexpected class cast exception after anonymization.", e);
}
String encodedRenderedQueryString;
try {
encodedRenderedQueryString = URLEncoder.encode(renderedQueryString, "UTF-8");
} catch (UnsupportedEncodingException e) {
logger.error("Apparently this system does not support UTF-8. Please fix this before running the program again.");
return;
}
line.add("?query=" + encodedRenderedQueryString);
line.add(timeStamp);
if (queryHandler.getSourceCategory().equals(QueryHandler.SourceCategory.USER)) {
line.add("organic");
} else {
line.add("robotic");
}
if (QueryHandler.isOrganicUserAgent(queryHandler.getUserAgent())) {
line.add("browser");
} else {
line.add(queryHandler.getUserAgent());
}
writer.writeRow(line);
}
}
use of org.openrdf.query.MalformedQueryException in project QueryAnalysis by Wikidata.
the class Test method main.
public static void main(String[] args) {
Main.loadStandardPrefixes();
Anonymizer.loadWhitelistDatatypes();
int worked = 0;
int failed = 0;
int failedToParse = 0;
try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(Paths.get("/home/adrian/workspace/java/months/exampleQueries/"))) {
for (Path filePath : directoryStream) {
if (Files.isRegularFile(filePath)) {
String queryString = new String(readAllBytes(filePath));
try {
ParsedQuery parsedQuery = new StandardizingSPARQLParser().parseQuery(queryString, OpenRDFQueryHandler.BASE_URI);
} catch (MalformedQueryException e) {
failedToParse++;
continue;
}
ASTQueryContainer qc;
try {
qc = SyntaxTreeBuilder.parseQuery(queryString);
} catch (TokenMgrError | ParseException e) {
// e.printStackTrace();
continue;
}
try {
StandardizingSPARQLParser.debug(qc);
StringEscapesProcessor.process(qc);
BaseDeclProcessor.process(qc, OpenRDFQueryHandler.BASE_URI);
StandardizingPrefixDeclProcessor.process(qc);
StandardizingSPARQLParser.anonymize(qc);
} catch (MalformedQueryException e) {
System.out.println("Failed to debug or anonymize query. " + queryString);
}
String renderedQueryString;
try {
renderedQueryString = qc.jjtAccept(new RenderVisitor(), "").toString();
// System.out.println(renderedQueryString);
} catch (VisitorException e) {
// e.printStackTrace();
continue;
}
try {
ParsedQuery parsedQuery = new StandardizingSPARQLParser().parseQuery(renderedQueryString, OpenRDFQueryHandler.BASE_URI);
worked++;
} catch (MalformedQueryException | ClassCastException e) {
failed++;
System.out.println("-----------------------------------");
System.out.println(filePath);
System.out.println(queryString);
continue;
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("Worked: " + worked + " Failed: " + failed + " Failed to Parse: " + failedToParse);
}
use of org.openrdf.query.MalformedQueryException in project QueryAnalysis by Wikidata.
the class OpenRDFQueryHandler method computeNonSimplePropertyPaths.
@Override
protected final void computeNonSimplePropertyPaths() {
if (getValidityStatus() != QueryHandler.Validity.VALID) {
this.nonSimplePropertyPaths = getValidityStatus().toString();
return;
}
try {
ASTQueryContainer qc = new StandardizingSPARQLParser().getASTQueryContainerPrefixesProcessed(getQueryString(), BASE_URI);
Set<String> nonSimplePropertyPaths = new NonSimplePropertyPathVisitor().getNonSimplePropertyPaths(qc);
this.nonSimplePropertyPaths = this.computeAnyIDString(nonSimplePropertyPaths);
if (this.nonSimplePropertyPaths.equals("")) {
this.nonSimplePropertyPaths = "NONE";
}
} catch (VisitorException | MalformedQueryException e) {
this.nonSimplePropertyPaths = "INTERNAL_ERROR";
logger.error("Unexpected error while calculating non-simple property paths.", e);
}
}
Aggregations