Search in sources :

Example 1 with SearchException

use of gate.creole.ir.SearchException in project gate-core by GateNLP.

the class QueryParser method createTerms.

public List<?>[] createTerms(String elem) throws gate.creole.ir.SearchException {
    areAllTermsTokens = true;
    List<Term> terms = new ArrayList<Term>();
    List<Integer> pos = new ArrayList<Integer>();
    List<Boolean> consider = new ArrayList<Boolean>();
    elem = elem.trim();
    if (elem.charAt(0) == '{' && elem.charAt(elem.length() - 1) == '}') {
        // possible
        elem = elem.substring(1, elem.length() - 1);
        int index = elem.indexOf("==");
        int index1 = findIndexOf(elem, '.');
        if (index == -1 && index1 == -1) {
            // 3. {AnnotationType}
            // this can be {AnnotationType, AnnotationType...}
            ArrayList<String> fields = splitString(elem, ',', true);
            for (int p = 0; p < fields.size(); p++) {
                if (areAllTermsTokens && !fields.get(p).equals(baseTokenAnnotationType))
                    areAllTermsTokens = false;
                terms.add(new Term(field, norm(fields.get(p)), "*"));
                pos.add(position);
                consider.add(p == 0);
            }
            position++;
        } else if (index != -1 && index1 == -1) {
            // 4. {AnnotationType==String}
            // 5. {AnnotationType=="String"}
            ArrayList<String> fields = splitString(elem, ',', false);
            for (int p = 0; p < fields.size(); p++) {
                index = fields.get(p).indexOf("==");
                // {AnnotationType, AnnotationType=="String"}
                if (index != -1) {
                    String annotType = norm(fields.get(p).substring(0, index).trim());
                    String annotText = norm(fields.get(p).substring(index + 2, fields.get(p).length()).trim());
                    if (annotText.length() > 2 && annotText.charAt(0) == '\"' && annotText.charAt(annotText.length() - 1) == '\"') {
                        annotText = annotText.substring(1, annotText.length() - 1);
                    }
                    if (!annotType.trim().equals(baseTokenAnnotationType))
                        areAllTermsTokens = false;
                    terms.add(new Term(field, annotText, annotType + ".string"));
                    pos.add(position);
                    consider.add(p == 0);
                } else {
                    if (!(norm(fields.get(p))).equals(baseTokenAnnotationType))
                        areAllTermsTokens = false;
                    terms.add(new Term(field, norm(fields.get(p)), "*"));
                    pos.add(position);
                    consider.add(p == 0);
                }
            }
            position++;
        } else if (index == -1 && index1 != -1) {
            throw new SearchException("missing operator", "an equal operator (==) is missing", elem, (elem.indexOf("=", index1) != -1) ? elem.indexOf("=", index1) : elem.length());
        } else if (index != -1 && index1 != -1) {
            // it can be {AT, AT.f==S, AT=="S"}
            int index2 = findIndexOf(elem, ',');
            String[] subElems = null;
            if (index2 == -1) {
                subElems = new String[] { elem };
            } else {
                ArrayList<String> list = splitString(elem, ',', false);
                subElems = new String[list.size()];
                for (int k = 0; k < list.size(); k++) {
                    subElems[k] = list.get(k);
                }
            }
            int lengthTravelledSoFar = 0;
            for (int j = 0; j < subElems.length; j++) {
                // 7. {AnnotationType.feature==string}
                // 8. {AnnotationType.feature=="string"}
                index = subElems[j].indexOf("==");
                index1 = findIndexOf(subElems[j], '.');
                if (index == -1 && index1 == -1) {
                    // this is {AT}
                    if (!norm(subElems[j].trim()).equals(baseTokenAnnotationType))
                        areAllTermsTokens = false;
                    terms.add(new Term(field, norm(subElems[j].trim()), "*"));
                    pos.add(position);
                    consider.add(j == 0);
                } else if (index != -1 && index1 == -1) {
                    // this is {AT=="String"}
                    String annotType = norm(subElems[j].substring(0, index).trim());
                    String annotText = norm(subElems[j].substring(index + 2, subElems[j].length()).trim());
                    if (annotText.charAt(0) == '\"' && annotText.charAt(annotText.length() - 1) == '\"') {
                        annotText = annotText.substring(1, annotText.length() - 1);
                    }
                    if (!annotType.trim().equals(baseTokenAnnotationType))
                        areAllTermsTokens = false;
                    terms.add(new Term(field, annotText, annotType + ".string"));
                    pos.add(position);
                    consider.add(j == 0);
                } else if (index == -1 && index1 != -1) {
                    throw new SearchException("missing operator", "an equal operator (==) is missing", elem, (elem.indexOf("=", lengthTravelledSoFar) != -1) ? elem.indexOf("=", lengthTravelledSoFar) : elem.length());
                } else {
                    // this is {AT.f == "s"}
                    String annotType = norm(subElems[j].substring(0, index1).trim());
                    String featureType = norm(subElems[j].substring(index1 + 1, index).trim());
                    String featureText = norm(subElems[j].substring(index + 2, subElems[j].length()).trim());
                    if (featureText.length() > 2 && featureText.charAt(0) == '\"' && featureText.charAt(featureText.length() - 1) == '\"')
                        featureText = featureText.substring(1, featureText.length() - 1);
                    if (!annotType.trim().equals(baseTokenAnnotationType))
                        areAllTermsTokens = false;
                    terms.add(new Term(field, featureText, annotType + "." + featureType));
                    pos.add(position);
                    consider.add(j == 0);
                }
                lengthTravelledSoFar += subElems[j].length() + 1;
            }
            position++;
        }
    } else {
        // possible
        // remove all the inverted commas
        StringBuilder newString = new StringBuilder();
        char prev = ' ', ch = ' ';
        for (int i = 0; i < elem.length(); i++) {
            prev = ch;
            ch = elem.charAt(i);
            if (ch == '\"' && prev != '\\') {
                continue;
            } else {
                newString.append(ch);
            }
        }
        // there can be many tokens
        String[] subTokens = norm(newString.toString()).split("( )+");
        for (int k = 0; k < subTokens.length; k++) {
            if (subTokens[k].trim().length() > 0) {
                terms.add(new Term(field, norm(subTokens[k]), baseTokenAnnotationType + ".string"));
                pos.add(position);
                consider.add(Boolean.TRUE);
                position++;
            }
        }
    }
    return new List<?>[] { terms, pos, consider };
}
Also used : ArrayList(java.util.ArrayList) SearchException(gate.creole.ir.SearchException) Term(gate.creole.annic.apache.lucene.index.Term) List(java.util.List) ArrayList(java.util.ArrayList)

Example 2 with SearchException

use of gate.creole.ir.SearchException in project gate-core by GateNLP.

the class SubQueryParser method parseQuery.

/**
 * this method parses the query and returns the different queries
 * converted into the OR normalized form
 * for e.g. ({A}|{B}){C}
 * this will be converted into ({A}{C}) | ({B}{C})
 * and the arrayList consists of
 * 1. {A}{C}
 * 2. {B}{C}
 */
public static List<String> parseQuery(String q1) throws SearchException {
    // arraylist to return - will contain all the OR normalized queries
    List<String> queries = new ArrayList<String>();
    // remove all extra spaces from the query
    q1 = q1.trim();
    // we add opening and closing brackets explicitly
    q1 = "( " + q1 + " )";
    q1 = extractWildcards(q1);
    // add the main Query in the arraylist
    queries.add(q1);
    for (int index = 0; index < queries.size(); index++) {
        // get the query to be parsed
        String query = queries.get(index);
        // current character and the previous character
        char ch = ' ', pre = ' ';
        // if query is ORed
        // we need duplication
        // for example: {A}({B}|{C})
        // the normalized form will be
        // {A}{B}
        // {A}{C}
        // here we need {A} to be duplicated two times
        boolean duplicated = false;
        int dupliSize = 0;
        String data = "";
        // we need to look into one query at a time and parse it
        for (int i = 0; i < query.length(); i++) {
            pre = ch;
            ch = query.charAt(i);
            // it is if it doesn't follow the '\' escape sequence
            if (isOpenBracket(ch, pre)) {
                // so find out where it gets closed
                int brClPos = findBracketClosingPosition(i + 1, query);
                if (brClPos == -1) {
                    throw new SearchException("unbalanced brackets", "a closing bracket ()) is missing for this opening bracket", query, i);
                }
                // see if there are any OR operators in it
                ArrayList<String> orTokens = findOrTokens(query.substring(i + 1, brClPos));
                // | operator
                if (orTokens.size() > 1) {
                    String text = "";
                    // and {B} and {C} in orTokens
                    if (!duplicated && data.length() > 0) {
                        text = data;
                        data = "";
                    } else {
                        if (index == queries.size() - 1) {
                            // this is the case where we would select the
                            // text as ""
                            text = "";
                        } else {
                            text = queries.get(queries.size() - 1);
                        }
                    }
                    // so we need to duplicate the text orTokens.size()
                    // times
                    // for example "ABC" ({B} | {C})
                    // text = "ABC"
                    // orTokens {B} {C}
                    // so two queries will be added
                    // 1. "ABC"
                    // 2. "ABC"
                    queries = duplicate(queries, text, dupliSize, orTokens.size());
                    // and tokens will be added
                    // 1. "ABC" {B}
                    // 2. "ABC" {C}
                    queries = writeTokens(orTokens, queries, dupliSize);
                    // text is duplicated so make it true
                    duplicated = true;
                    // and how many times it was duplicated
                    if (dupliSize == 0)
                        dupliSize = 1;
                    dupliSize *= orTokens.size();
                } else {
                    // check how many times we have duplicated the text
                    if (dupliSize == 0) {
                        // Query
                        if (data.length() == 0)
                            queries.add("");
                        else
                            queries.add(data);
                        // because we simply needs to add it only once
                        // but still we have copied it as a separate query
                        // so say duplicated = true
                        duplicated = true;
                        data = "";
                        // and ofcourse the size of the duplication will be
                        // only 1
                        dupliSize = 1;
                    }
                    // and we need to add all the contents between two
                    // brackets in the last duplicated
                    // queries
                    queries = writeStringInAll(query.substring(i + 1, brClPos), dupliSize, queries);
                }
                i = brClPos;
            } else if (isClosingBracket(ch, pre)) {
                throw new SearchException("unbalanced brackets", "a opening bracket (() is missing for this closing bracket", query, i);
            } else {
                if (duplicated) {
                    queries = writeCharInAll(ch, dupliSize, queries);
                } else {
                    data += "" + ch;
                }
            }
        }
        boolean scan = scanQueryForOrOrBracket(query);
        if (scan) {
            queries.remove(index);
            index--;
        }
    }
    ArrayList<String> queriesToReturn = new ArrayList<String>();
    for (int i = 0; i < queries.size(); i++) {
        String q = queries.get(i);
        if (q.trim().length() == 0) {
            continue;
        } else if (queriesToReturn.contains(q.trim())) {
            continue;
        } else {
            queriesToReturn.add(q.trim());
        }
    }
    return queriesToReturn;
}
Also used : SearchException(gate.creole.ir.SearchException)

Aggregations

SearchException (gate.creole.ir.SearchException)2 Term (gate.creole.annic.apache.lucene.index.Term)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1