use of gate.creole.annic.PatternAnnotation in project gate-core by GateNLP.
the class StatsCalculator method freqForAllValues.
/**
* Calculates frequencies for all possible values of the provided AT.feature
* @param patternsToSearchIn
* @param annotationType
* @param feature
* @param inMatchedSpan
* @param inContext
* @return returns a map where key is the unique value of AT.feature and value is the Integer object giving count for the value.
* @throws SearchException
*/
public static Map<String, Integer> freqForAllValues(List<Hit> patternsToSearchIn, String annotationType, String feature, boolean inMatchedSpan, boolean inContext) throws SearchException {
Map<String, Integer> toReturn = new HashMap<String, Integer>();
if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
return toReturn;
if (!inMatchedSpan && !inContext)
throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
for (Hit aResult1 : patternsToSearchIn) {
Pattern aResult = (Pattern) aResult1;
List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
if (inMatchedSpan && !inContext) {
annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
} else if (!inMatchedSpan && inContext) {
annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
} else {
// both matchedSpan and context are set to true
annots = Arrays.asList(aResult.getPatternAnnotations());
}
if (annots.isEmpty())
continue;
List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType, feature);
for (PatternAnnotation pa : subAnnots) {
String uniqueKey = pa.getFeatures().get(feature);
Integer counter = toReturn.get(uniqueKey);
if (counter == null) {
counter = 1;
toReturn.put(uniqueKey, counter);
} else {
counter = counter.intValue() + 1;
toReturn.put(uniqueKey, counter);
}
}
}
return toReturn;
}
use of gate.creole.annic.PatternAnnotation in project gate-core by GateNLP.
the class LuceneDataStoreSearchGUI method updateStackView.
/**
* Updates the annotation stack in the central view.
*/
protected void updateStackView() {
GridBagConstraints gbc = new GridBagConstraints();
gbc.gridx = 0;
gbc.gridy = 0;
gbc.fill = GridBagConstraints.BOTH;
if (resultTable.getSelectedRow() == -1) {
// no result is selected in the result table
centerPanel.removeAll();
if (resultTable.getRowCount() > 0) {
centerPanel.add(new JLabel("Select a row in the results table below."), gbc);
} else {
if (numberOfResultsSlider.getValue() > (numberOfResultsSlider.getMaximum() - 100)) {
centerPanel.add(new JLabel("Retrieving all results..."), gbc);
} else {
centerPanel.add(new JLabel("Retrieving " + numberOfResultsSlider.getValue() + " results..."), gbc);
}
}
centerPanel.validate();
centerPanel.repaint();
return;
}
// get information for the selected row in the results table
Pattern result = (Pattern) results.get(resultTable.rowViewToModel(resultTable.getSelectionModel().getLeadSelectionIndex()));
// initialize the annotation stack
centerPanel.setText(result.getPatternText());
centerPanel.setExpressionStartOffset(result.getStartOffset());
centerPanel.setExpressionEndOffset(result.getEndOffset());
centerPanel.setContextBeforeSize(result.getStartOffset() - result.getLeftContextStartOffset());
centerPanel.setContextAfterSize(result.getRightContextEndOffset() - result.getEndOffset());
centerPanel.setLastRowButton(configureStackViewButton);
centerPanel.setTextMouseListener(new TextMouseListener());
centerPanel.setHeaderMouseListener(new HeaderMouseListener());
centerPanel.setAnnotationMouseListener(new AnnotationMouseListener());
centerPanel.clearAllRows();
// add each row to the annotation stack
for (int row = 0; row < numStackRows; row++) {
if (stackRows[row][DISPLAY].equals("false")) {
continue;
}
String type = stackRows[row][ANNOTATION_TYPE];
String feature = stackRows[row][FEATURE];
String shortcut = stackRows[row][SHORTCUT];
// remove button displayed at the end of each row
JButton removeRowButton = new ButtonBorder(new Color(250, 250, 250), new Insets(0, 3, 0, 3), true);
removeRowButton.setIcon(MainFrame.getIcon("Remove"));
removeRowButton.setToolTipText("Hide this row.");
final String typeFinal = type;
final String featureFinal = feature;
removeRowButton.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent ie) {
int row = findStackRow(ANNOTATION_TYPE, typeFinal, FEATURE, featureFinal);
if (row >= 0) {
stackRows[row][DISPLAY] = "false";
saveStackViewConfiguration();
}
updateStackView();
}
});
int crop;
if (stackRows[row][CROP].equals("Crop start")) {
crop = AnnotationStack.CROP_START;
} else if (stackRows[row][CROP].equals("Crop end")) {
crop = AnnotationStack.CROP_END;
} else {
crop = AnnotationStack.CROP_MIDDLE;
}
centerPanel.addRow(null, type, feature, removeRowButton, shortcut, crop);
// annotations for this row
PatternAnnotation[] annotations = result.getPatternAnnotations(type);
if (annotations != null && annotations.length > 0) {
for (PatternAnnotation annotation : annotations) {
FeatureMap features = Factory.newFeatureMap();
features.putAll(annotation.getFeatures());
centerPanel.addAnnotation(annotation.getStartOffset(), annotation.getEndOffset(), annotation.getType(), features);
}
}
}
// draw the annotation stack
centerPanel.drawStack();
}
use of gate.creole.annic.PatternAnnotation in project gate-core by GateNLP.
the class LuceneSearchThread method getPatternResult.
/**
* This method returns the valid patterns back and the respective
* GateAnnotations
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
private PatternResult getPatternResult(List<gate.creole.annic.apache.lucene.analysis.Token> subTokens, String annotationSetName, int patLen, int patWindow, String query, String baseTokenAnnotationType, int noOfResultsToFetch) {
List<List<PatternAnnotation>> tokens = new ArrayList<List<PatternAnnotation>>();
List<Integer> patLens = new ArrayList<Integer>();
ftpIndex++;
// Phrase Query
// consider only one pattern at a time
// first term position index at the begining
int ftpIndexATB = ftpIndex;
mainForLoop: for (; ftpIndex < ftp.size() && (noOfResultsToFetch == -1 || noOfResultsToFetch > 0); ftpIndex++) {
// find out the position of the first term
int pos = ((Integer) ftp.get(ftpIndex)).intValue();
// find out the token with pos
int j = 0;
for (; j < subTokens.size(); j++) {
gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(j);
if (token.getPosition() == pos) {
break;
}
}
int counter = 0;
int leftstart = -1;
/*
* ok so we need to go back to find out the first token of the
* left context
*/
int k = j - 1;
for (; k >= 0; k--) {
gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
if (token.getPosition() < pos && token.termText().equals(baseTokenAnnotationType) && token.type().equals("*")) {
counter++;
leftstart = token.startOffset();
j = k;
}
if (counter == patWindow) {
break;
}
}
// j holds the start of the left context
// now we want to search for the end of left context
pos--;
k = j;
if (leftstart > -1) {
boolean breakNow = false;
for (; k < subTokens.size(); k++) {
gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
if (token.getPosition() == pos) {
breakNow = true;
} else {
if (breakNow) {
break;
}
}
}
}
// now k holds the begining of the pattern
// leftEnd holds the position of the last token in left context
int leftEnd = leftstart == -1 ? -1 : k - 1;
/*
* we need to validate this pattern. As a result of query, we get
* the positions of the first term. We need to locate the full
* pattern along with all its other annotations. This is done by
* using the ValidatePattern class. This class provides a method,
* which takes as arguments the query Tokens, the position in the
* tokenStream from where to start searching and returns the end
* offset of the last annotation in the found pattern. We then
* search for this endoffset in our current tokenStream to
* retrieve the wanted annotations.
*/
int upto = -1;
int tempPos = 0;
if (this.queryParser.needValidation()) {
try {
List<String> queryTokens = luceneSearcher.getQueryTokens(query);
if (queryTokens == null) {
queryTokens = new QueryParser().findTokens(query);
luceneSearcher.addQueryTokens(query, queryTokens);
}
/*
* validate method returns the endoffset of the last token of
* the middle pattern returns -1 if pattern could not be
* located at that location
*/
PatternValidator vp = new PatternValidator();
// here k is the position where the first token should occur
upto = vp.validate(queryTokens, subTokens, k, new QueryParser());
if (upto == -1) {
/*
* if the validatePAttern class could not find the valid
* pattern it returns -1 and therefore we should remove the
* position of the invalid pattern
*/
ftp.remove(ftpIndex);
ftpIndex--;
continue mainForLoop;
} else {
/*
* now we need to locate the token whose endPosition is upto
*/
int jj = leftEnd + 1;
boolean breaknow = false;
tempPos = subTokens.get(jj).getPosition();
for (; jj < subTokens.size(); jj++) {
gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(jj);
if (token.endOffset() == upto) {
tempPos = token.getPosition();
breaknow = true;
} else if (breaknow) {
break;
}
}
// we send the endoffset to our GUI class
patLens.add(upto);
/*
* k holds the position of the first token in right context
*/
k = jj;
}
} catch (Exception e) {
e.printStackTrace();
}
} else {
/*
* the query contains all tokens, which is already validated at
* the time of creating query the pointer k points to the
* begining of our patern we need to travel patLen into the
* right direction to obtain the pattern
*/
for (counter = 0; counter < patLen && k < subTokens.size(); k++) {
gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
if (token.termText().equals(baseTokenAnnotationType) && token.type().equals("*")) {
counter++;
upto = token.endOffset();
tempPos = token.getPosition();
}
}
patLens.add(upto);
k++;
}
int maxEndOffset = upto;
/*
* so now search for the token with the position == tempPos + 1 in
* other words search for the first term of the right context
*/
for (; k < subTokens.size(); k++) {
gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
if (token.getPosition() == tempPos + 1) {
break;
}
}
// and now we need to locate the right context pattern
counter = 0;
for (; k < subTokens.size(); k++) {
gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
if (token.startOffset() >= upto && token.termText().equals(baseTokenAnnotationType) && token.type().equals("*")) {
counter++;
maxEndOffset = token.endOffset();
}
if (counter == patWindow) {
break;
}
}
// if there are any sub-tokens left
if (k < subTokens.size()) {
/*
* now we would search for the position untill we see it having
* the same position
*/
tempPos = subTokens.get(k).getPosition();
for (; k < subTokens.size(); k++) {
gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
if (token.getPosition() != tempPos) {
break;
}
}
}
if (k >= subTokens.size()) {
// we used all sub-tokens - set k to maximum size
k = subTokens.size() - 1;
}
/*
* so k is the position til where we need to search for each
* annotation and every feature in it at the time of creating
* index were converted into separate tokens we need to convert
* them back into annotations
*/
List<PatternAnnotation> patternGateAnnotations = new ArrayList<PatternAnnotation>();
PatternAnnotation ga = null;
for (int m = j; m <= k; m++) {
gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(m);
String text = token.termText();
int st = token.startOffset();
int end = token.endOffset();
String type = token.type();
int position = token.getPosition();
// if this is a new annotation Type
if (type.equals("*")) {
ga = new PatternAnnotation();
ga.setType(text);
ga.setStOffset(st);
ga.setEnOffset(end);
ga.setPosition(position);
if (ga.getEndOffset() <= maxEndOffset) {
patternGateAnnotations.add(ga);
}
continue;
} else if (type.equals("**")) {
continue;
}
// and from here all are the features
int index = type.indexOf(".");
String feature = type.substring(index + 1, type.length());
/*
* we need to compare the type1 each annotation has string
* feature in index so text will be definitely going to be
* initialized
*/
if (feature.equals("string")) {
ga.setText(text);
}
ga.addFeature(feature, text);
}
tokens.add(patternGateAnnotations);
if (noOfResultsToFetch != -1)
noOfResultsToFetch--;
}
if (noOfResultsToFetch == 0 && ftpIndex < ftp.size())
ftpIndex--;
// finally create an instance of PatternResult
PatternResult pr = new PatternResult();
pr.annotationSetName = annotationSetName;
pr.gateAnnotations = tokens;
pr.firstTermPositions = new ArrayList();
for (int i = 0; i < pr.gateAnnotations.size(); i++) {
pr.firstTermPositions.add(ftp.get(i + ftpIndexATB));
}
pr.patternLegths = patLens;
pr.numberOfPatterns = pr.gateAnnotations.size();
return pr;
}
use of gate.creole.annic.PatternAnnotation in project gate-core by GateNLP.
the class LuceneSearchThread method locatePatterns.
/**
* Locates the valid patterns in token stream and discards the invalid
* first term positions returned by the lucene searcher.
*/
private List<Pattern> locatePatterns(String docID, String annotationSetName, List<List<PatternAnnotation>> gateAnnotations, List<?> firstTermPositions, List<Integer> patternLength, String queryString) {
// patterns
List<Pattern> pats = new ArrayList<Pattern>();
for (int i = 0; i < gateAnnotations.size(); i++) {
// each element in the tokens stream is a pattern
List<PatternAnnotation> annotations = gateAnnotations.get(i);
if (annotations.size() == 0) {
continue;
}
// from this annotations we need to create a text string
// so lets find out the smallest and the highest offsets
int smallest = Integer.MAX_VALUE;
int highest = -1;
for (int j = 0; j < annotations.size(); j++) {
// each annotation is an instance of GateAnnotation
PatternAnnotation ga = annotations.get(j);
if (ga.getStartOffset() < smallest) {
smallest = ga.getStartOffset();
}
if (ga.getEndOffset() > highest) {
highest = ga.getEndOffset();
}
}
// we have smallest and highest offsets
char[] patternText = new char[highest - smallest];
for (int j = 0; j < patternText.length; j++) {
patternText[j] = ' ';
}
// and now place the text
for (int j = 0; j < annotations.size(); j++) {
// each annotation is an instance of GateAnnotation
PatternAnnotation ga = annotations.get(j);
if (ga.getText() == null) {
// this is to avoid annotations such as split
continue;
}
for (int k = ga.getStartOffset() - smallest, m = 0; m < ga.getText().length() && k < patternText.length; m++, k++) {
patternText[k] = ga.getText().charAt(m);
}
// we will initiate the annotTypes as well
if (luceneSearcher.annotationTypesMap.keySet().contains(ga.getType())) {
List<String> aFeatures = luceneSearcher.annotationTypesMap.get(ga.getType());
Map<String, String> features = ga.getFeatures();
if (features != null) {
Iterator<String> fSet = features.keySet().iterator();
while (fSet.hasNext()) {
String feature = fSet.next();
if (!aFeatures.contains(feature)) {
aFeatures.add(feature);
}
}
}
luceneSearcher.annotationTypesMap.put(ga.getType(), aFeatures);
} else {
Map<String, String> features = ga.getFeatures();
List<String> aFeatures = new ArrayList<String>();
aFeatures.add("All");
if (features != null) {
aFeatures.addAll(features.keySet());
}
luceneSearcher.annotationTypesMap.put(ga.getType(), aFeatures);
}
// end of initializing annotationTypes for the comboBox
}
// we have the text
// smallest is the textStOffset
// highest is the textEndOffset
// how to find the patternStartOffset
int stPos = ((Integer) firstTermPositions.get(i)).intValue();
int endOffset = patternLength.get(i).intValue();
int patStart = Integer.MAX_VALUE;
for (int j = 0; j < annotations.size(); j++) {
// each annotation is an instance of GateAnnotation
PatternAnnotation ga = annotations.get(j);
if (ga.getPosition() == stPos) {
if (ga.getStartOffset() < patStart) {
patStart = ga.getStartOffset();
}
}
}
if (patStart == Integer.MAX_VALUE) {
continue;
}
if (patStart < smallest || endOffset > highest) {
continue;
}
// now create the pattern for this
Pattern ap = new Pattern(docID, annotationSetName, new String(patternText), patStart, endOffset, smallest, highest, annotations, queryString);
pats.add(ap);
}
return pats;
}
use of gate.creole.annic.PatternAnnotation in project gate-core by GateNLP.
the class StatsCalculator method freq.
/**
* @see #freq(List, String, String, String, boolean, boolean)
*/
public static int freq(List<Hit> patternsToSearchIn, String annotationType, boolean inMatchedSpan, boolean inContext) throws SearchException {
if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
return 0;
if (!inMatchedSpan && !inContext)
throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
int count = 0;
for (Hit aResult1 : patternsToSearchIn) {
Pattern aResult = (Pattern) aResult1;
List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
if (inMatchedSpan && !inContext) {
annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
} else if (!inMatchedSpan && inContext) {
annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
} else {
// both matchedSpan and context are set to true
annots = Arrays.asList(aResult.getPatternAnnotations());
}
if (annots.isEmpty())
continue;
List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType);
count += subAnnots.size();
}
return count;
}
Aggregations