Search in sources :

Example 1 with Query

use of gate.creole.annic.apache.lucene.search.Query in project gate-core by GateNLP.

the class QueryParser method parse.

/**
 * Given a query, this method parses it to convert it into one or more
 * lucene queries.
 * @throws gate.creole.ir.SearchException
 */
public Query[] parse(String field, String query, String baseTokenAnnotationType, String corpusID, String annotationSetToSearchIn) throws gate.creole.ir.SearchException {
    this.field = field;
    this.baseTokenAnnotationType = baseTokenAnnotationType;
    this.position = 0;
    // at the moment this supports only | operator
    // it also support klene operators * and +
    // implicit operator is &
    // It supports simple String queries
    // it supports eight kinds of tokens
    // 1. String (without quotes)
    // 2. "String" (with quotes)
    // 3. {AnnotationType}
    // 4. {AnnotationType==String}
    // 5. {AnnotationType=="String"}
    // 7. {AnnotationType.feature==string}
    // 8. {AnnotationType.feature=="string"}
    // Steps
    // The query would we searched from left to right order
    // returned arraylist contains queries where each query is required
    // to
    // be converted into the Phrase query
    queries = SubQueryParser.parseQuery(query);
    Query[] q = new Query[queries.size()];
    for (int i = 0; i < queries.size(); i++) {
        Query phraseQuery = createPhraseQuery(queries.get(i));
        // boolean query
        if (corpusID == null && annotationSetToSearchIn == null) {
            BooleanQuery booleanQuery = new BooleanQuery();
            Term t = new Term(Constants.ANNOTATION_SET_ID, Constants.COMBINED_SET);
            TermQuery tQuery = new TermQuery(t);
            booleanQuery.add(tQuery, false, true);
            booleanQuery.add(phraseQuery, true, false);
            q[i] = booleanQuery;
        } else {
            BooleanQuery booleanQuery = new BooleanQuery();
            booleanQuery.add(phraseQuery, true, false);
            if (corpusID != null) {
                Term t = new Term(Constants.CORPUS_ID, corpusID);
                TermQuery tQuery = new TermQuery(t);
                booleanQuery.add(tQuery, true, false);
            }
            if (annotationSetToSearchIn != null) {
                Term t = new Term(Constants.ANNOTATION_SET_ID, annotationSetToSearchIn);
                TermQuery tQuery = new TermQuery(t);
                booleanQuery.add(tQuery, true, false);
            } else {
                Term t = new Term(Constants.ANNOTATION_SET_ID, Constants.COMBINED_SET);
                TermQuery tQuery = new TermQuery(t);
                booleanQuery.add(tQuery, false, true);
            }
            q[i] = booleanQuery;
        }
    }
    return q;
}
Also used : BooleanQuery(gate.creole.annic.apache.lucene.search.BooleanQuery) TermQuery(gate.creole.annic.apache.lucene.search.TermQuery) BooleanQuery(gate.creole.annic.apache.lucene.search.BooleanQuery) TermQuery(gate.creole.annic.apache.lucene.search.TermQuery) PhraseQuery(gate.creole.annic.apache.lucene.search.PhraseQuery) Query(gate.creole.annic.apache.lucene.search.Query) Term(gate.creole.annic.apache.lucene.index.Term)

Example 2 with Query

use of gate.creole.annic.apache.lucene.search.Query in project gate-core by GateNLP.

the class LuceneSearchThread method search.

/**
 * This method collects the necessary information from lucene and uses
 * it when the next method is called
 *
 * @param query query supplied by the user
 * @param patternWindow number of tokens to refer on left and right
 *          context
 * @param indexLocation location of the index the searcher should
 *          search in
 * @param luceneSearcher an instance of lucene search from where the
 *          instance of SearchThread is invoked
 * @return true iff search was successful false otherwise
 */
@SuppressWarnings("unchecked")
public boolean search(String query, int patternWindow, String indexLocation, String corpusToSearchIn, String annotationSetToSearchIn, LuceneSearcher luceneSearcher) throws SearchException {
    this.query = query;
    this.contextWindow = patternWindow;
    this.indexLocation = indexLocation;
    this.queryParser = new QueryParser();
    this.luceneSearcher = luceneSearcher;
    /*
     * reset all parameters that keep track of where we are in our
     * searching. These parameters are used mostly to keep track of
     * where to start fetching the next results from
     */
    searchResultInfoMap = new HashMap<String, List<QueryItem>>();
    serializedFileIDIndex = 0;
    queryItemIndex = 0;
    serializedFilesIDsList = new ArrayList<String>();
    ftpIndex = -1;
    success = false;
    fwdIterationEnded = false;
    try {
        // first find out the location of Index
        // TODO does this just replace \ with / if so we should do this better
        StringBuilder temp = new StringBuilder();
        for (int i = 0; i < indexLocation.length(); i++) {
            if (indexLocation.charAt(i) == '\\') {
                temp.append("/");
            } else {
                temp.append(indexLocation.charAt(i));
            }
        }
        indexLocation = temp.toString();
        /*
       * for each different location there can be different
       * baseTokenAnnotationType each index will have their index
       * Definition file stored under the index directory so first see
       * if given location is a valid directory
       */
        File locationFile = new File(indexLocation);
        if (!locationFile.isDirectory()) {
            System.out.println("Skipping the invalid Index Location :" + indexLocation);
            return false;
        }
        if (!indexLocation.endsWith("/")) {
            indexLocation += "/";
        }
        // otherwise let us read the index definition file
        locationFile = new File(indexLocation + "LuceneIndexDefinition.xml");
        // check if this file is available
        if (!locationFile.exists()) {
            System.out.println("Index Definition file not found - Skipping the invalid Index Location :" + indexLocation + "LuceneIndexDefinition.xml");
            return false;
        }
        Map<String, Object> indexInformation = null;
        // other wise read this file
        XStream xstream = new XStream(new StaxDriver());
        try (FileReader fileReader = new FileReader(indexLocation + "LuceneIndexDefinition.xml")) {
            // Saving was accomplished by using XML serialization of the map.
            indexInformation = (Map<String, Object>) xstream.fromXML(fileReader);
        }
        // find out if the current index was indexed by annicIndexPR
        String indexedWithANNICIndexPR = (String) indexInformation.get(Constants.CORPUS_INDEX_FEATURE);
        if (indexedWithANNICIndexPR == null || !indexedWithANNICIndexPR.equals(Constants.CORPUS_INDEX_FEATURE_VALUE)) {
            System.out.println("This corpus was not indexed by Annic Index PR - Skipping the invalid Index");
            return false;
        }
        // find out the baseTokenAnnotationType name
        baseTokenAnnotationType = ((String) indexInformation.get(Constants.BASE_TOKEN_ANNOTATION_TYPE)).trim();
        int separatorIndex = baseTokenAnnotationType.lastIndexOf('.');
        if (separatorIndex >= 0) {
            baseTokenAnnotationType = baseTokenAnnotationType.substring(separatorIndex + 1);
        }
        // create various Queries from the user's query
        Query[] luceneQueries = queryParser.parse("contents", query, baseTokenAnnotationType, corpusToSearchIn, annotationSetToSearchIn);
        if (queryParser.needValidation()) {
            if (DEBUG)
                System.out.println("Validation enabled!");
        } else {
            if (DEBUG)
                System.out.println("Validation disabled!");
        }
        // create an instance of Index Searcher
        LuceneIndexSearcher searcher = new LuceneIndexSearcher(indexLocation);
        try {
            // we need to iterate through one query at a time
            for (int luceneQueryIndex = 0; luceneQueryIndex < luceneQueries.length; luceneQueryIndex++) {
                /*
           * this call reinitializes the first Term positions arraylists
           * which are being used to store the results
           */
                searcher.initializeTermPositions();
                /*
           * and now execute the query result of which will be stored in
           * hits
           */
                Hits hits = searcher.search(luceneQueries[luceneQueryIndex]);
                /*
           * and so now find out the positions of the first terms in the
           * returned results. first term position is the position of the
           * first term in the found pattern
           */
                List<?>[] firstTermPositions = searcher.getFirstTermPositions();
                // if no result available, set null to our scores
                if (firstTermPositions[0].size() == 0) {
                    // do nothing
                    continue;
                }
                // information
                for (int hitIndex = 0; hitIndex < hits.length(); hitIndex++) {
                    int index = firstTermPositions[0].indexOf(Integer.valueOf(hits.id(hitIndex)));
                    // we fetch all the first term positions for the query
                    // issued
                    List<?> ftp = (List<?>) firstTermPositions[1].get(index);
                    /*
             * pattern length (in terms of total number of annotations
             * following one other)
             */
                    int patLen = ((Integer) firstTermPositions[2].get(index)).intValue();
                    /*
             * and the type of query (if it has only one annotation in it,
             * or multiple terms following them)
             */
                    int qType = ((Integer) firstTermPositions[3].get(index)).intValue();
                    // find out the documentID
                    String serializedFileID = hits.doc(hitIndex).get(Constants.DOCUMENT_ID_FOR_SERIALIZED_FILE);
                    QueryItem queryItem = new QueryItem();
                    queryItem.annotationSetName = hits.doc(hitIndex).get(Constants.ANNOTATION_SET_ID).intern();
                    queryItem.id = hits.id(hitIndex);
                    queryItem.documentID = hits.doc(hitIndex).get(Constants.DOCUMENT_ID).intern();
                    queryItem.ftp = ftp;
                    queryItem.patLen = patLen;
                    queryItem.qType = qType;
                    queryItem.query = luceneQueries[luceneQueryIndex];
                    queryItem.queryString = queryParser.getQueryString(luceneQueryIndex).intern();
                    /*
             * all these information go in the top level arrayList. we
             * create separate arrayList for each individual document
             * where each element in the arrayList provides information
             * about different query issued over it
             */
                    List<QueryItem> queryItemsList = searchResultInfoMap.get(serializedFileID);
                    if (queryItemsList == null) {
                        queryItemsList = new ArrayList<QueryItem>();
                        queryItemsList.add(queryItem);
                        searchResultInfoMap.put(serializedFileID, queryItemsList);
                        serializedFilesIDsList.add(serializedFileID);
                    } else {
                        // // before inserting we check if it is already added
                        // if(!doesAlreadyExist(queryItem, queryItemsList)) {
                        queryItemsList.add(queryItem);
                    // }
                    }
                }
            }
        } finally {
            searcher.close();
        }
        // if any result possible, return true
        if (searchResultInfoMap.size() > 0)
            success = true;
        else
            success = false;
    } catch (IOException | gate.creole.ir.SearchException e) {
        throw new SearchException(e);
    }
    return success;
}
Also used : Hits(gate.creole.annic.apache.lucene.search.Hits) Query(gate.creole.annic.apache.lucene.search.Query) SearchException(gate.creole.annic.SearchException) StaxDriver(com.thoughtworks.xstream.io.xml.StaxDriver) ArrayList(java.util.ArrayList) List(java.util.List) FileReader(java.io.FileReader) XStream(com.thoughtworks.xstream.XStream) IOException(java.io.IOException) File(java.io.File)

Aggregations

Query (gate.creole.annic.apache.lucene.search.Query)2 XStream (com.thoughtworks.xstream.XStream)1 StaxDriver (com.thoughtworks.xstream.io.xml.StaxDriver)1 SearchException (gate.creole.annic.SearchException)1 Term (gate.creole.annic.apache.lucene.index.Term)1 BooleanQuery (gate.creole.annic.apache.lucene.search.BooleanQuery)1 Hits (gate.creole.annic.apache.lucene.search.Hits)1 PhraseQuery (gate.creole.annic.apache.lucene.search.PhraseQuery)1 TermQuery (gate.creole.annic.apache.lucene.search.TermQuery)1 File (java.io.File)1 FileReader (java.io.FileReader)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1