use of gate.creole.annic.Pattern in project gate-core by GateNLP.
the class LuceneSearcher method next.
/**
* Return the next numberOfHits -1 indicates all
*/
@Override
public Hit[] next(int numberOfHits) throws SearchException {
annicPatterns = new ArrayList<Pattern>();
if (!success) {
this.annicPatterns = new ArrayList<Pattern>();
return getHits();
}
if (fwdIterationEnded) {
this.annicPatterns = new ArrayList<Pattern>();
return getHits();
}
try {
if (wasDeleteQuery) {
List<String> docIDs = new ArrayList<String>();
List<String> setNames = new ArrayList<String>();
for (int i = 0; i < luceneHits.length(); i++) {
Document luceneDoc = luceneHits.doc(i);
String documentID = luceneDoc.get(Constants.DOCUMENT_ID);
String annotationSetID = luceneDoc.get(Constants.ANNOTATION_SET_ID);
int index = docIDs.indexOf(documentID);
if (index == -1) {
docIDs.add(documentID);
setNames.add(annotationSetID);
} else {
if (!setNames.get(index).equals(annotationSetID)) {
docIDs.add(documentID);
setNames.add(annotationSetID);
}
}
}
Hit[] toReturn = new Hit[docIDs.size()];
for (int i = 0; i < toReturn.length; i++) {
toReturn[i] = new Hit(docIDs.get(i), setNames.get(i), 0, 0, "");
}
return toReturn;
}
for (; luceneSearchThreadIndex < luceneSearchThreads.size(); luceneSearchThreadIndex++) {
LuceneSearchThread lst = luceneSearchThreads.get(luceneSearchThreadIndex);
List<Pattern> results = lst.next(numberOfHits);
if (results != null) {
if (numberOfHits != -1) {
numberOfHits -= results.size();
}
this.annicPatterns.addAll(results);
if (numberOfHits == 0) {
return getHits();
}
}
}
// if we are here, there wer no sufficient patterns available
// so what we do is make success to false so that this method
// return null on next call
fwdIterationEnded = true;
return getHits();
} catch (Exception e) {
throw new SearchException(e);
}
}
use of gate.creole.annic.Pattern in project gate-core by GateNLP.
the class StatsCalculator method freqForAllValues.
/**
* Calculates frequencies for all possible values of the provided AT.feature
* @param patternsToSearchIn
* @param annotationType
* @param feature
* @param inMatchedSpan
* @param inContext
* @return returns a map where key is the unique value of AT.feature and value is the Integer object giving count for the value.
* @throws SearchException
*/
public static Map<String, Integer> freqForAllValues(List<Hit> patternsToSearchIn, String annotationType, String feature, boolean inMatchedSpan, boolean inContext) throws SearchException {
Map<String, Integer> toReturn = new HashMap<String, Integer>();
if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
return toReturn;
if (!inMatchedSpan && !inContext)
throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
for (Hit aResult1 : patternsToSearchIn) {
Pattern aResult = (Pattern) aResult1;
List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
if (inMatchedSpan && !inContext) {
annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
} else if (!inMatchedSpan && inContext) {
annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
} else {
// both matchedSpan and context are set to true
annots = Arrays.asList(aResult.getPatternAnnotations());
}
if (annots.isEmpty())
continue;
List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType, feature);
for (PatternAnnotation pa : subAnnots) {
String uniqueKey = pa.getFeatures().get(feature);
Integer counter = toReturn.get(uniqueKey);
if (counter == null) {
counter = 1;
toReturn.put(uniqueKey, counter);
} else {
counter = counter.intValue() + 1;
toReturn.put(uniqueKey, counter);
}
}
}
return toReturn;
}
use of gate.creole.annic.Pattern in project gate-core by GateNLP.
the class LuceneDataStoreSearchGUI method updateStackView.
/**
* Updates the annotation stack in the central view.
*/
protected void updateStackView() {
GridBagConstraints gbc = new GridBagConstraints();
gbc.gridx = 0;
gbc.gridy = 0;
gbc.fill = GridBagConstraints.BOTH;
if (resultTable.getSelectedRow() == -1) {
// no result is selected in the result table
centerPanel.removeAll();
if (resultTable.getRowCount() > 0) {
centerPanel.add(new JLabel("Select a row in the results table below."), gbc);
} else {
if (numberOfResultsSlider.getValue() > (numberOfResultsSlider.getMaximum() - 100)) {
centerPanel.add(new JLabel("Retrieving all results..."), gbc);
} else {
centerPanel.add(new JLabel("Retrieving " + numberOfResultsSlider.getValue() + " results..."), gbc);
}
}
centerPanel.validate();
centerPanel.repaint();
return;
}
// get information for the selected row in the results table
Pattern result = (Pattern) results.get(resultTable.rowViewToModel(resultTable.getSelectionModel().getLeadSelectionIndex()));
// initialize the annotation stack
centerPanel.setText(result.getPatternText());
centerPanel.setExpressionStartOffset(result.getStartOffset());
centerPanel.setExpressionEndOffset(result.getEndOffset());
centerPanel.setContextBeforeSize(result.getStartOffset() - result.getLeftContextStartOffset());
centerPanel.setContextAfterSize(result.getRightContextEndOffset() - result.getEndOffset());
centerPanel.setLastRowButton(configureStackViewButton);
centerPanel.setTextMouseListener(new TextMouseListener());
centerPanel.setHeaderMouseListener(new HeaderMouseListener());
centerPanel.setAnnotationMouseListener(new AnnotationMouseListener());
centerPanel.clearAllRows();
// add each row to the annotation stack
for (int row = 0; row < numStackRows; row++) {
if (stackRows[row][DISPLAY].equals("false")) {
continue;
}
String type = stackRows[row][ANNOTATION_TYPE];
String feature = stackRows[row][FEATURE];
String shortcut = stackRows[row][SHORTCUT];
// remove button displayed at the end of each row
JButton removeRowButton = new ButtonBorder(new Color(250, 250, 250), new Insets(0, 3, 0, 3), true);
removeRowButton.setIcon(MainFrame.getIcon("Remove"));
removeRowButton.setToolTipText("Hide this row.");
final String typeFinal = type;
final String featureFinal = feature;
removeRowButton.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent ie) {
int row = findStackRow(ANNOTATION_TYPE, typeFinal, FEATURE, featureFinal);
if (row >= 0) {
stackRows[row][DISPLAY] = "false";
saveStackViewConfiguration();
}
updateStackView();
}
});
int crop;
if (stackRows[row][CROP].equals("Crop start")) {
crop = AnnotationStack.CROP_START;
} else if (stackRows[row][CROP].equals("Crop end")) {
crop = AnnotationStack.CROP_END;
} else {
crop = AnnotationStack.CROP_MIDDLE;
}
centerPanel.addRow(null, type, feature, removeRowButton, shortcut, crop);
// annotations for this row
PatternAnnotation[] annotations = result.getPatternAnnotations(type);
if (annotations != null && annotations.length > 0) {
for (PatternAnnotation annotation : annotations) {
FeatureMap features = Factory.newFeatureMap();
features.putAll(annotation.getFeatures());
centerPanel.addAnnotation(annotation.getStartOffset(), annotation.getEndOffset(), annotation.getType(), features);
}
}
}
// draw the annotation stack
centerPanel.drawStack();
}
use of gate.creole.annic.Pattern in project gate-core by GateNLP.
the class LuceneSearchThread method locatePatterns.
/**
* Locates the valid patterns in token stream and discards the invalid
* first term positions returned by the lucene searcher.
*/
private List<Pattern> locatePatterns(String docID, String annotationSetName, List<List<PatternAnnotation>> gateAnnotations, List<?> firstTermPositions, List<Integer> patternLength, String queryString) {
// patterns
List<Pattern> pats = new ArrayList<Pattern>();
for (int i = 0; i < gateAnnotations.size(); i++) {
// each element in the tokens stream is a pattern
List<PatternAnnotation> annotations = gateAnnotations.get(i);
if (annotations.size() == 0) {
continue;
}
// from this annotations we need to create a text string
// so lets find out the smallest and the highest offsets
int smallest = Integer.MAX_VALUE;
int highest = -1;
for (int j = 0; j < annotations.size(); j++) {
// each annotation is an instance of GateAnnotation
PatternAnnotation ga = annotations.get(j);
if (ga.getStartOffset() < smallest) {
smallest = ga.getStartOffset();
}
if (ga.getEndOffset() > highest) {
highest = ga.getEndOffset();
}
}
// we have smallest and highest offsets
char[] patternText = new char[highest - smallest];
for (int j = 0; j < patternText.length; j++) {
patternText[j] = ' ';
}
// and now place the text
for (int j = 0; j < annotations.size(); j++) {
// each annotation is an instance of GateAnnotation
PatternAnnotation ga = annotations.get(j);
if (ga.getText() == null) {
// this is to avoid annotations such as split
continue;
}
for (int k = ga.getStartOffset() - smallest, m = 0; m < ga.getText().length() && k < patternText.length; m++, k++) {
patternText[k] = ga.getText().charAt(m);
}
// we will initiate the annotTypes as well
if (luceneSearcher.annotationTypesMap.keySet().contains(ga.getType())) {
List<String> aFeatures = luceneSearcher.annotationTypesMap.get(ga.getType());
Map<String, String> features = ga.getFeatures();
if (features != null) {
Iterator<String> fSet = features.keySet().iterator();
while (fSet.hasNext()) {
String feature = fSet.next();
if (!aFeatures.contains(feature)) {
aFeatures.add(feature);
}
}
}
luceneSearcher.annotationTypesMap.put(ga.getType(), aFeatures);
} else {
Map<String, String> features = ga.getFeatures();
List<String> aFeatures = new ArrayList<String>();
aFeatures.add("All");
if (features != null) {
aFeatures.addAll(features.keySet());
}
luceneSearcher.annotationTypesMap.put(ga.getType(), aFeatures);
}
// end of initializing annotationTypes for the comboBox
}
// we have the text
// smallest is the textStOffset
// highest is the textEndOffset
// how to find the patternStartOffset
int stPos = ((Integer) firstTermPositions.get(i)).intValue();
int endOffset = patternLength.get(i).intValue();
int patStart = Integer.MAX_VALUE;
for (int j = 0; j < annotations.size(); j++) {
// each annotation is an instance of GateAnnotation
PatternAnnotation ga = annotations.get(j);
if (ga.getPosition() == stPos) {
if (ga.getStartOffset() < patStart) {
patStart = ga.getStartOffset();
}
}
}
if (patStart == Integer.MAX_VALUE) {
continue;
}
if (patStart < smallest || endOffset > highest) {
continue;
}
// now create the pattern for this
Pattern ap = new Pattern(docID, annotationSetName, new String(patternText), patStart, endOffset, smallest, highest, annotations, queryString);
pats.add(ap);
}
return pats;
}
use of gate.creole.annic.Pattern in project gate-core by GateNLP.
the class LuceneSearcher method search.
/**
* Method retunrs true/false indicating whether results were found or not.
*/
@SuppressWarnings("unchecked")
@Override
public boolean search(String query, Map<String, Object> parameters) throws SearchException {
luceneHits = null;
annicPatterns = new ArrayList<Pattern>();
annotationTypesMap = new HashMap<String, List<String>>();
luceneSearchThreads = new ArrayList<LuceneSearchThread>();
luceneSearchThreadIndex = 0;
success = false;
fwdIterationEnded = false;
wasDeleteQuery = false;
if (parameters == null)
throw new SearchException("Parameters cannot be null");
this.parameters = parameters;
/*
* lets first check if the query is to search the document names This is
* used when we only wants to search for documents stored under the specific
* corpus
*/
if (parameters.size() == 2 && parameters.get(Constants.INDEX_LOCATION_URL) != null) {
String corpusID = (String) parameters.get(Constants.CORPUS_ID);
String indexLocation = null;
try {
indexLocation = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
} catch (URISyntaxException use) {
indexLocation = new File(((URL) parameters.get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
}
if (corpusID != null && indexLocation != null) {
wasDeleteQuery = true;
Term term = new Term(Constants.CORPUS_ID, corpusID);
TermQuery tq = new TermQuery(term);
try {
gate.creole.annic.apache.lucene.search.Searcher searcher = new IndexSearcher(indexLocation);
// and now execute the query
// result of which will be stored in hits
luceneHits = searcher.search(tq);
success = luceneHits.length() > 0 ? true : false;
return success;
} catch (IOException ioe) {
ioe.printStackTrace();
throw new SearchException(ioe);
}
}
}
// check for index locations
if (parameters.get(Constants.INDEX_LOCATIONS) == null) {
String indexLocation;
try {
indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).toURI()).getAbsolutePath();
} catch (URISyntaxException use) {
indexLocation = new File(((URL) datastore.getIndexer().getParameters().get(Constants.INDEX_LOCATION_URL)).getFile()).getAbsolutePath();
}
ArrayList<String> indexLocations = new ArrayList<String>();
indexLocations.add(indexLocation);
parameters.put(Constants.INDEX_LOCATIONS, indexLocations);
}
indexLocations = new ArrayList<String>((List<? extends String>) parameters.get(Constants.INDEX_LOCATIONS));
if (indexLocations.size() == 0)
throw new SearchException("Corpus is not initialized");
// check for valid context window
if (parameters.get(Constants.CONTEXT_WINDOW) == null)
throw new SearchException("Parameter " + Constants.CONTEXT_WINDOW + " is not provided!");
contextWindow = ((Integer) parameters.get(Constants.CONTEXT_WINDOW)).intValue();
if (getContextWindow().intValue() <= 0)
throw new SearchException("Context Window must be atleast 1 or > 1");
if (query == null)
throw new SearchException("Query is not initialized");
this.query = query;
this.corpusToSearchIn = (String) parameters.get(Constants.CORPUS_ID);
this.annotationSetToSearchIn = (String) parameters.get(Constants.ANNOTATION_SET_ID);
annicPatterns = new ArrayList<Pattern>();
annotationTypesMap = new HashMap<String, List<String>>();
luceneSearchThreads = new ArrayList<LuceneSearchThread>();
// TODO: is this really useful or used to have several indexLocations ?
for (int indexCounter = 0; indexCounter < indexLocations.size(); indexCounter++) {
String location = indexLocations.get(indexCounter);
// we create a separate Thread for each index
LuceneSearchThread lst = new LuceneSearchThread();
if (lst.search(query, contextWindow, location, corpusToSearchIn, annotationSetToSearchIn, this)) {
luceneSearchThreads.add(lst);
}
}
success = luceneSearchThreads.size() > 0 ? true : false;
return success;
}
Aggregations