use of gate.creole.ir.SearchException in project gate-core by GateNLP.
the class QueryParser method createTerms.
public List<?>[] createTerms(String elem) throws gate.creole.ir.SearchException {
areAllTermsTokens = true;
List<Term> terms = new ArrayList<Term>();
List<Integer> pos = new ArrayList<Integer>();
List<Boolean> consider = new ArrayList<Boolean>();
elem = elem.trim();
if (elem.charAt(0) == '{' && elem.charAt(elem.length() - 1) == '}') {
// possible
elem = elem.substring(1, elem.length() - 1);
int index = elem.indexOf("==");
int index1 = findIndexOf(elem, '.');
if (index == -1 && index1 == -1) {
// 3. {AnnotationType}
// this can be {AnnotationType, AnnotationType...}
ArrayList<String> fields = splitString(elem, ',', true);
for (int p = 0; p < fields.size(); p++) {
if (areAllTermsTokens && !fields.get(p).equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, norm(fields.get(p)), "*"));
pos.add(position);
consider.add(p == 0);
}
position++;
} else if (index != -1 && index1 == -1) {
// 4. {AnnotationType==String}
// 5. {AnnotationType=="String"}
ArrayList<String> fields = splitString(elem, ',', false);
for (int p = 0; p < fields.size(); p++) {
index = fields.get(p).indexOf("==");
// {AnnotationType, AnnotationType=="String"}
if (index != -1) {
String annotType = norm(fields.get(p).substring(0, index).trim());
String annotText = norm(fields.get(p).substring(index + 2, fields.get(p).length()).trim());
if (annotText.length() > 2 && annotText.charAt(0) == '\"' && annotText.charAt(annotText.length() - 1) == '\"') {
annotText = annotText.substring(1, annotText.length() - 1);
}
if (!annotType.trim().equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, annotText, annotType + ".string"));
pos.add(position);
consider.add(p == 0);
} else {
if (!(norm(fields.get(p))).equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, norm(fields.get(p)), "*"));
pos.add(position);
consider.add(p == 0);
}
}
position++;
} else if (index == -1 && index1 != -1) {
throw new SearchException("missing operator", "an equal operator (==) is missing", elem, (elem.indexOf("=", index1) != -1) ? elem.indexOf("=", index1) : elem.length());
} else if (index != -1 && index1 != -1) {
// it can be {AT, AT.f==S, AT=="S"}
int index2 = findIndexOf(elem, ',');
String[] subElems = null;
if (index2 == -1) {
subElems = new String[] { elem };
} else {
ArrayList<String> list = splitString(elem, ',', false);
subElems = new String[list.size()];
for (int k = 0; k < list.size(); k++) {
subElems[k] = list.get(k);
}
}
int lengthTravelledSoFar = 0;
for (int j = 0; j < subElems.length; j++) {
// 7. {AnnotationType.feature==string}
// 8. {AnnotationType.feature=="string"}
index = subElems[j].indexOf("==");
index1 = findIndexOf(subElems[j], '.');
if (index == -1 && index1 == -1) {
// this is {AT}
if (!norm(subElems[j].trim()).equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, norm(subElems[j].trim()), "*"));
pos.add(position);
consider.add(j == 0);
} else if (index != -1 && index1 == -1) {
// this is {AT=="String"}
String annotType = norm(subElems[j].substring(0, index).trim());
String annotText = norm(subElems[j].substring(index + 2, subElems[j].length()).trim());
if (annotText.charAt(0) == '\"' && annotText.charAt(annotText.length() - 1) == '\"') {
annotText = annotText.substring(1, annotText.length() - 1);
}
if (!annotType.trim().equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, annotText, annotType + ".string"));
pos.add(position);
consider.add(j == 0);
} else if (index == -1 && index1 != -1) {
throw new SearchException("missing operator", "an equal operator (==) is missing", elem, (elem.indexOf("=", lengthTravelledSoFar) != -1) ? elem.indexOf("=", lengthTravelledSoFar) : elem.length());
} else {
// this is {AT.f == "s"}
String annotType = norm(subElems[j].substring(0, index1).trim());
String featureType = norm(subElems[j].substring(index1 + 1, index).trim());
String featureText = norm(subElems[j].substring(index + 2, subElems[j].length()).trim());
if (featureText.length() > 2 && featureText.charAt(0) == '\"' && featureText.charAt(featureText.length() - 1) == '\"')
featureText = featureText.substring(1, featureText.length() - 1);
if (!annotType.trim().equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, featureText, annotType + "." + featureType));
pos.add(position);
consider.add(j == 0);
}
lengthTravelledSoFar += subElems[j].length() + 1;
}
position++;
}
} else {
// possible
// remove all the inverted commas
StringBuilder newString = new StringBuilder();
char prev = ' ', ch = ' ';
for (int i = 0; i < elem.length(); i++) {
prev = ch;
ch = elem.charAt(i);
if (ch == '\"' && prev != '\\') {
continue;
} else {
newString.append(ch);
}
}
// there can be many tokens
String[] subTokens = norm(newString.toString()).split("( )+");
for (int k = 0; k < subTokens.length; k++) {
if (subTokens[k].trim().length() > 0) {
terms.add(new Term(field, norm(subTokens[k]), baseTokenAnnotationType + ".string"));
pos.add(position);
consider.add(Boolean.TRUE);
position++;
}
}
}
return new List<?>[] { terms, pos, consider };
}
use of gate.creole.ir.SearchException in project gate-core by GateNLP.
the class SubQueryParser method parseQuery.
/**
* this method parses the query and returns the different queries
* converted into the OR normalized form
* for e.g. ({A}|{B}){C}
* this will be converted into ({A}{C}) | ({B}{C})
* and the arrayList consists of
* 1. {A}{C}
* 2. {B}{C}
*/
public static List<String> parseQuery(String q1) throws SearchException {
// arraylist to return - will contain all the OR normalized queries
List<String> queries = new ArrayList<String>();
// remove all extra spaces from the query
q1 = q1.trim();
// we add opening and closing brackets explicitly
q1 = "( " + q1 + " )";
q1 = extractWildcards(q1);
// add the main Query in the arraylist
queries.add(q1);
for (int index = 0; index < queries.size(); index++) {
// get the query to be parsed
String query = queries.get(index);
// current character and the previous character
char ch = ' ', pre = ' ';
// if query is ORed
// we need duplication
// for example: {A}({B}|{C})
// the normalized form will be
// {A}{B}
// {A}{C}
// here we need {A} to be duplicated two times
boolean duplicated = false;
int dupliSize = 0;
String data = "";
// we need to look into one query at a time and parse it
for (int i = 0; i < query.length(); i++) {
pre = ch;
ch = query.charAt(i);
// it is if it doesn't follow the '\' escape sequence
if (isOpenBracket(ch, pre)) {
// so find out where it gets closed
int brClPos = findBracketClosingPosition(i + 1, query);
if (brClPos == -1) {
throw new SearchException("unbalanced brackets", "a closing bracket ()) is missing for this opening bracket", query, i);
}
// see if there are any OR operators in it
ArrayList<String> orTokens = findOrTokens(query.substring(i + 1, brClPos));
// | operator
if (orTokens.size() > 1) {
String text = "";
// and {B} and {C} in orTokens
if (!duplicated && data.length() > 0) {
text = data;
data = "";
} else {
if (index == queries.size() - 1) {
// this is the case where we would select the
// text as ""
text = "";
} else {
text = queries.get(queries.size() - 1);
}
}
// so we need to duplicate the text orTokens.size()
// times
// for example "ABC" ({B} | {C})
// text = "ABC"
// orTokens {B} {C}
// so two queries will be added
// 1. "ABC"
// 2. "ABC"
queries = duplicate(queries, text, dupliSize, orTokens.size());
// and tokens will be added
// 1. "ABC" {B}
// 2. "ABC" {C}
queries = writeTokens(orTokens, queries, dupliSize);
// text is duplicated so make it true
duplicated = true;
// and how many times it was duplicated
if (dupliSize == 0)
dupliSize = 1;
dupliSize *= orTokens.size();
} else {
// check how many times we have duplicated the text
if (dupliSize == 0) {
// Query
if (data.length() == 0)
queries.add("");
else
queries.add(data);
// because we simply needs to add it only once
// but still we have copied it as a separate query
// so say duplicated = true
duplicated = true;
data = "";
// and ofcourse the size of the duplication will be
// only 1
dupliSize = 1;
}
// and we need to add all the contents between two
// brackets in the last duplicated
// queries
queries = writeStringInAll(query.substring(i + 1, brClPos), dupliSize, queries);
}
i = brClPos;
} else if (isClosingBracket(ch, pre)) {
throw new SearchException("unbalanced brackets", "a opening bracket (() is missing for this closing bracket", query, i);
} else {
if (duplicated) {
queries = writeCharInAll(ch, dupliSize, queries);
} else {
data += "" + ch;
}
}
}
boolean scan = scanQueryForOrOrBracket(query);
if (scan) {
queries.remove(index);
index--;
}
}
ArrayList<String> queriesToReturn = new ArrayList<String>();
for (int i = 0; i < queries.size(); i++) {
String q = queries.get(i);
if (q.trim().length() == 0) {
continue;
} else if (queriesToReturn.contains(q.trim())) {
continue;
} else {
queriesToReturn.add(q.trim());
}
}
return queriesToReturn;
}
Aggregations