Search in sources :

Example 1 with Hit

use of gate.creole.annic.Hit in project gate-core by GateNLP.

the class LuceneSearcher method next.

/**
 * Return the next numberOfHits -1 indicates all
 */
@Override
public Hit[] next(int numberOfHits) throws SearchException {
    annicPatterns = new ArrayList<Pattern>();
    if (!success) {
        this.annicPatterns = new ArrayList<Pattern>();
        return getHits();
    }
    if (fwdIterationEnded) {
        this.annicPatterns = new ArrayList<Pattern>();
        return getHits();
    }
    try {
        if (wasDeleteQuery) {
            List<String> docIDs = new ArrayList<String>();
            List<String> setNames = new ArrayList<String>();
            for (int i = 0; i < luceneHits.length(); i++) {
                Document luceneDoc = luceneHits.doc(i);
                String documentID = luceneDoc.get(Constants.DOCUMENT_ID);
                String annotationSetID = luceneDoc.get(Constants.ANNOTATION_SET_ID);
                int index = docIDs.indexOf(documentID);
                if (index == -1) {
                    docIDs.add(documentID);
                    setNames.add(annotationSetID);
                } else {
                    if (!setNames.get(index).equals(annotationSetID)) {
                        docIDs.add(documentID);
                        setNames.add(annotationSetID);
                    }
                }
            }
            Hit[] toReturn = new Hit[docIDs.size()];
            for (int i = 0; i < toReturn.length; i++) {
                toReturn[i] = new Hit(docIDs.get(i), setNames.get(i), 0, 0, "");
            }
            return toReturn;
        }
        for (; luceneSearchThreadIndex < luceneSearchThreads.size(); luceneSearchThreadIndex++) {
            LuceneSearchThread lst = luceneSearchThreads.get(luceneSearchThreadIndex);
            List<Pattern> results = lst.next(numberOfHits);
            if (results != null) {
                if (numberOfHits != -1) {
                    numberOfHits -= results.size();
                }
                this.annicPatterns.addAll(results);
                if (numberOfHits == 0) {
                    return getHits();
                }
            }
        }
        // if we are here, there wer no sufficient patterns available
        // so what we do is make success to false so that this method
        // return null on next call
        fwdIterationEnded = true;
        return getHits();
    } catch (Exception e) {
        throw new SearchException(e);
    }
}
Also used : Pattern(gate.creole.annic.Pattern) ArrayList(java.util.ArrayList) SearchException(gate.creole.annic.SearchException) Document(gate.creole.annic.apache.lucene.document.Document) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) SearchException(gate.creole.annic.SearchException) Hit(gate.creole.annic.Hit)

Example 2 with Hit

use of gate.creole.annic.Hit in project gate-core by GateNLP.

the class StatsCalculator method freqForAllValues.

/**
 * Calculates frequencies for all possible values of the provided AT.feature
 * @param patternsToSearchIn
 * @param annotationType
 * @param feature
 * @param inMatchedSpan
 * @param inContext
 * @return returns a map where key is the unique value of AT.feature and value is the Integer object giving count for the value.
 * @throws SearchException
 */
public static Map<String, Integer> freqForAllValues(List<Hit> patternsToSearchIn, String annotationType, String feature, boolean inMatchedSpan, boolean inContext) throws SearchException {
    Map<String, Integer> toReturn = new HashMap<String, Integer>();
    if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
        return toReturn;
    if (!inMatchedSpan && !inContext)
        throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
    for (Hit aResult1 : patternsToSearchIn) {
        Pattern aResult = (Pattern) aResult1;
        List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
        if (inMatchedSpan && !inContext) {
            annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
        } else if (!inMatchedSpan && inContext) {
            annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
            annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
        } else {
            // both matchedSpan and context are set to true
            annots = Arrays.asList(aResult.getPatternAnnotations());
        }
        if (annots.isEmpty())
            continue;
        List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType, feature);
        for (PatternAnnotation pa : subAnnots) {
            String uniqueKey = pa.getFeatures().get(feature);
            Integer counter = toReturn.get(uniqueKey);
            if (counter == null) {
                counter = 1;
                toReturn.put(uniqueKey, counter);
            } else {
                counter = counter.intValue() + 1;
                toReturn.put(uniqueKey, counter);
            }
        }
    }
    return toReturn;
}
Also used : Pattern(gate.creole.annic.Pattern) Hit(gate.creole.annic.Hit) HashMap(java.util.HashMap) PatternAnnotation(gate.creole.annic.PatternAnnotation) ArrayList(java.util.ArrayList) SearchException(gate.creole.annic.SearchException)

Example 3 with Hit

use of gate.creole.annic.Hit in project gate-core by GateNLP.

the class TestAnnic method testSearcher.

/**
 * Testing annic searching functionalities.
 *
 * @throws Exception
 */
public void testSearcher() throws Exception {
    LuceneSearcher searcher = new LuceneSearcher();
    Map<String, Object> parameters = new HashMap<String, Object>();
    List<String> indexLocations = new ArrayList<String>();
    indexLocations.add(indexURL.getAbsolutePath());
    parameters.put(Constants.INDEX_LOCATIONS, indexLocations);
    parameters.put(Constants.CONTEXT_WINDOW, new Integer(5));
    String query = "{Person}";
    @SuppressWarnings("unused") boolean success = searcher.search(query, parameters);
    int noOfHits = searcher.next(-1).length;
    assertEquals(12, noOfHits);
    query = "{Organization}({Token})*3{Person}";
    success = searcher.search(query, parameters);
    noOfHits = searcher.next(-1).length;
    assertEquals(noOfHits, 0);
    query = "{Organization}({Token})*3 (\"up\" | \"down\") ({Token})*3 ({Money} | {Percent})";
    success = searcher.search(query, parameters);
    Hit[] toExport = searcher.next(-1);
    assertEquals(toExport.length, 0);
    String xmlRepresentation = Parser.toXML(toExport);
    // and then read it back
    toExport = Parser.fromXML(xmlRepresentation);
    xmlRepresentation = Parser.toXML(toExport);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LuceneSearcher(gate.creole.annic.lucene.LuceneSearcher) Hit(gate.creole.annic.Hit)

Example 4 with Hit

use of gate.creole.annic.Hit in project gate-core by GateNLP.

the class StatsCalculator method freq.

/**
 * @see #freq(List, String, String, String, boolean, boolean)
 */
public static int freq(List<Hit> patternsToSearchIn, String annotationType, boolean inMatchedSpan, boolean inContext) throws SearchException {
    if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
        return 0;
    if (!inMatchedSpan && !inContext)
        throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
    int count = 0;
    for (Hit aResult1 : patternsToSearchIn) {
        Pattern aResult = (Pattern) aResult1;
        List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
        if (inMatchedSpan && !inContext) {
            annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
        } else if (!inMatchedSpan && inContext) {
            annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
            annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
        } else {
            // both matchedSpan and context are set to true
            annots = Arrays.asList(aResult.getPatternAnnotations());
        }
        if (annots.isEmpty())
            continue;
        List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType);
        count += subAnnots.size();
    }
    return count;
}
Also used : Pattern(gate.creole.annic.Pattern) Hit(gate.creole.annic.Hit) PatternAnnotation(gate.creole.annic.PatternAnnotation) ArrayList(java.util.ArrayList) SearchException(gate.creole.annic.SearchException)

Aggregations

Hit (gate.creole.annic.Hit)4 ArrayList (java.util.ArrayList)4 Pattern (gate.creole.annic.Pattern)3 SearchException (gate.creole.annic.SearchException)3 PatternAnnotation (gate.creole.annic.PatternAnnotation)2 HashMap (java.util.HashMap)2 Document (gate.creole.annic.apache.lucene.document.Document)1 LuceneSearcher (gate.creole.annic.lucene.LuceneSearcher)1 IOException (java.io.IOException)1 URISyntaxException (java.net.URISyntaxException)1