use of gate.creole.annic.Hit in project gate-core by GateNLP.
the class LuceneSearcher method next.
/**
* Return the next numberOfHits -1 indicates all
*/
@Override
public Hit[] next(int numberOfHits) throws SearchException {
annicPatterns = new ArrayList<Pattern>();
if (!success) {
this.annicPatterns = new ArrayList<Pattern>();
return getHits();
}
if (fwdIterationEnded) {
this.annicPatterns = new ArrayList<Pattern>();
return getHits();
}
try {
if (wasDeleteQuery) {
List<String> docIDs = new ArrayList<String>();
List<String> setNames = new ArrayList<String>();
for (int i = 0; i < luceneHits.length(); i++) {
Document luceneDoc = luceneHits.doc(i);
String documentID = luceneDoc.get(Constants.DOCUMENT_ID);
String annotationSetID = luceneDoc.get(Constants.ANNOTATION_SET_ID);
int index = docIDs.indexOf(documentID);
if (index == -1) {
docIDs.add(documentID);
setNames.add(annotationSetID);
} else {
if (!setNames.get(index).equals(annotationSetID)) {
docIDs.add(documentID);
setNames.add(annotationSetID);
}
}
}
Hit[] toReturn = new Hit[docIDs.size()];
for (int i = 0; i < toReturn.length; i++) {
toReturn[i] = new Hit(docIDs.get(i), setNames.get(i), 0, 0, "");
}
return toReturn;
}
for (; luceneSearchThreadIndex < luceneSearchThreads.size(); luceneSearchThreadIndex++) {
LuceneSearchThread lst = luceneSearchThreads.get(luceneSearchThreadIndex);
List<Pattern> results = lst.next(numberOfHits);
if (results != null) {
if (numberOfHits != -1) {
numberOfHits -= results.size();
}
this.annicPatterns.addAll(results);
if (numberOfHits == 0) {
return getHits();
}
}
}
// if we are here, there wer no sufficient patterns available
// so what we do is make success to false so that this method
// return null on next call
fwdIterationEnded = true;
return getHits();
} catch (Exception e) {
throw new SearchException(e);
}
}
use of gate.creole.annic.Hit in project gate-core by GateNLP.
the class StatsCalculator method freqForAllValues.
/**
* Calculates frequencies for all possible values of the provided AT.feature
* @param patternsToSearchIn
* @param annotationType
* @param feature
* @param inMatchedSpan
* @param inContext
* @return returns a map where key is the unique value of AT.feature and value is the Integer object giving count for the value.
* @throws SearchException
*/
public static Map<String, Integer> freqForAllValues(List<Hit> patternsToSearchIn, String annotationType, String feature, boolean inMatchedSpan, boolean inContext) throws SearchException {
Map<String, Integer> toReturn = new HashMap<String, Integer>();
if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
return toReturn;
if (!inMatchedSpan && !inContext)
throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
for (Hit aResult1 : patternsToSearchIn) {
Pattern aResult = (Pattern) aResult1;
List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
if (inMatchedSpan && !inContext) {
annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
} else if (!inMatchedSpan && inContext) {
annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
} else {
// both matchedSpan and context are set to true
annots = Arrays.asList(aResult.getPatternAnnotations());
}
if (annots.isEmpty())
continue;
List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType, feature);
for (PatternAnnotation pa : subAnnots) {
String uniqueKey = pa.getFeatures().get(feature);
Integer counter = toReturn.get(uniqueKey);
if (counter == null) {
counter = 1;
toReturn.put(uniqueKey, counter);
} else {
counter = counter.intValue() + 1;
toReturn.put(uniqueKey, counter);
}
}
}
return toReturn;
}
use of gate.creole.annic.Hit in project gate-core by GateNLP.
the class TestAnnic method testSearcher.
/**
* Testing annic searching functionalities.
*
* @throws Exception
*/
public void testSearcher() throws Exception {
LuceneSearcher searcher = new LuceneSearcher();
Map<String, Object> parameters = new HashMap<String, Object>();
List<String> indexLocations = new ArrayList<String>();
indexLocations.add(indexURL.getAbsolutePath());
parameters.put(Constants.INDEX_LOCATIONS, indexLocations);
parameters.put(Constants.CONTEXT_WINDOW, new Integer(5));
String query = "{Person}";
@SuppressWarnings("unused") boolean success = searcher.search(query, parameters);
int noOfHits = searcher.next(-1).length;
assertEquals(12, noOfHits);
query = "{Organization}({Token})*3{Person}";
success = searcher.search(query, parameters);
noOfHits = searcher.next(-1).length;
assertEquals(noOfHits, 0);
query = "{Organization}({Token})*3 (\"up\" | \"down\") ({Token})*3 ({Money} | {Percent})";
success = searcher.search(query, parameters);
Hit[] toExport = searcher.next(-1);
assertEquals(toExport.length, 0);
String xmlRepresentation = Parser.toXML(toExport);
// and then read it back
toExport = Parser.fromXML(xmlRepresentation);
xmlRepresentation = Parser.toXML(toExport);
}
use of gate.creole.annic.Hit in project gate-core by GateNLP.
the class StatsCalculator method freq.
/**
* @see #freq(List, String, String, String, boolean, boolean)
*/
public static int freq(List<Hit> patternsToSearchIn, String annotationType, boolean inMatchedSpan, boolean inContext) throws SearchException {
if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
return 0;
if (!inMatchedSpan && !inContext)
throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
int count = 0;
for (Hit aResult1 : patternsToSearchIn) {
Pattern aResult = (Pattern) aResult1;
List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
if (inMatchedSpan && !inContext) {
annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
} else if (!inMatchedSpan && inContext) {
annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
} else {
// both matchedSpan and context are set to true
annots = Arrays.asList(aResult.getPatternAnnotations());
}
if (annots.isEmpty())
continue;
List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType);
count += subAnnots.size();
}
return count;
}
Aggregations