Search in sources :

Example 1 with PatternAnnotation

use of gate.creole.annic.PatternAnnotation in project gate-core by GateNLP.

the class StatsCalculator method freqForAllValues.

/**
 * Calculates frequencies for all possible values of the provided AT.feature
 * @param patternsToSearchIn
 * @param annotationType
 * @param feature
 * @param inMatchedSpan
 * @param inContext
 * @return returns a map where key is the unique value of AT.feature and value is the Integer object giving count for the value.
 * @throws SearchException
 */
public static Map<String, Integer> freqForAllValues(List<Hit> patternsToSearchIn, String annotationType, String feature, boolean inMatchedSpan, boolean inContext) throws SearchException {
    Map<String, Integer> toReturn = new HashMap<String, Integer>();
    if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
        return toReturn;
    if (!inMatchedSpan && !inContext)
        throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
    for (Hit aResult1 : patternsToSearchIn) {
        Pattern aResult = (Pattern) aResult1;
        List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
        if (inMatchedSpan && !inContext) {
            annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
        } else if (!inMatchedSpan && inContext) {
            annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
            annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
        } else {
            // both matchedSpan and context are set to true
            annots = Arrays.asList(aResult.getPatternAnnotations());
        }
        if (annots.isEmpty())
            continue;
        List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType, feature);
        for (PatternAnnotation pa : subAnnots) {
            String uniqueKey = pa.getFeatures().get(feature);
            Integer counter = toReturn.get(uniqueKey);
            if (counter == null) {
                counter = 1;
                toReturn.put(uniqueKey, counter);
            } else {
                counter = counter.intValue() + 1;
                toReturn.put(uniqueKey, counter);
            }
        }
    }
    return toReturn;
}
Also used : Pattern(gate.creole.annic.Pattern) Hit(gate.creole.annic.Hit) HashMap(java.util.HashMap) PatternAnnotation(gate.creole.annic.PatternAnnotation) ArrayList(java.util.ArrayList) SearchException(gate.creole.annic.SearchException)

Example 2 with PatternAnnotation

use of gate.creole.annic.PatternAnnotation in project gate-core by GateNLP.

the class LuceneDataStoreSearchGUI method updateStackView.

/**
 * Updates the annotation stack in the central view.
 */
protected void updateStackView() {
    GridBagConstraints gbc = new GridBagConstraints();
    gbc.gridx = 0;
    gbc.gridy = 0;
    gbc.fill = GridBagConstraints.BOTH;
    if (resultTable.getSelectedRow() == -1) {
        // no result is selected in the result table
        centerPanel.removeAll();
        if (resultTable.getRowCount() > 0) {
            centerPanel.add(new JLabel("Select a row in the results table below."), gbc);
        } else {
            if (numberOfResultsSlider.getValue() > (numberOfResultsSlider.getMaximum() - 100)) {
                centerPanel.add(new JLabel("Retrieving all results..."), gbc);
            } else {
                centerPanel.add(new JLabel("Retrieving " + numberOfResultsSlider.getValue() + " results..."), gbc);
            }
        }
        centerPanel.validate();
        centerPanel.repaint();
        return;
    }
    // get information for the selected row in the results table
    Pattern result = (Pattern) results.get(resultTable.rowViewToModel(resultTable.getSelectionModel().getLeadSelectionIndex()));
    // initialize the annotation stack
    centerPanel.setText(result.getPatternText());
    centerPanel.setExpressionStartOffset(result.getStartOffset());
    centerPanel.setExpressionEndOffset(result.getEndOffset());
    centerPanel.setContextBeforeSize(result.getStartOffset() - result.getLeftContextStartOffset());
    centerPanel.setContextAfterSize(result.getRightContextEndOffset() - result.getEndOffset());
    centerPanel.setLastRowButton(configureStackViewButton);
    centerPanel.setTextMouseListener(new TextMouseListener());
    centerPanel.setHeaderMouseListener(new HeaderMouseListener());
    centerPanel.setAnnotationMouseListener(new AnnotationMouseListener());
    centerPanel.clearAllRows();
    // add each row to the annotation stack
    for (int row = 0; row < numStackRows; row++) {
        if (stackRows[row][DISPLAY].equals("false")) {
            continue;
        }
        String type = stackRows[row][ANNOTATION_TYPE];
        String feature = stackRows[row][FEATURE];
        String shortcut = stackRows[row][SHORTCUT];
        // remove button displayed at the end of each row
        JButton removeRowButton = new ButtonBorder(new Color(250, 250, 250), new Insets(0, 3, 0, 3), true);
        removeRowButton.setIcon(MainFrame.getIcon("Remove"));
        removeRowButton.setToolTipText("Hide this row.");
        final String typeFinal = type;
        final String featureFinal = feature;
        removeRowButton.addActionListener(new ActionListener() {

            @Override
            public void actionPerformed(ActionEvent ie) {
                int row = findStackRow(ANNOTATION_TYPE, typeFinal, FEATURE, featureFinal);
                if (row >= 0) {
                    stackRows[row][DISPLAY] = "false";
                    saveStackViewConfiguration();
                }
                updateStackView();
            }
        });
        int crop;
        if (stackRows[row][CROP].equals("Crop start")) {
            crop = AnnotationStack.CROP_START;
        } else if (stackRows[row][CROP].equals("Crop end")) {
            crop = AnnotationStack.CROP_END;
        } else {
            crop = AnnotationStack.CROP_MIDDLE;
        }
        centerPanel.addRow(null, type, feature, removeRowButton, shortcut, crop);
        // annotations for this row
        PatternAnnotation[] annotations = result.getPatternAnnotations(type);
        if (annotations != null && annotations.length > 0) {
            for (PatternAnnotation annotation : annotations) {
                FeatureMap features = Factory.newFeatureMap();
                features.putAll(annotation.getFeatures());
                centerPanel.addAnnotation(annotation.getStartOffset(), annotation.getEndOffset(), annotation.getType(), features);
            }
        }
    }
    // draw the annotation stack
    centerPanel.drawStack();
}
Also used : Pattern(gate.creole.annic.Pattern) GridBagConstraints(java.awt.GridBagConstraints) Insets(java.awt.Insets) ActionEvent(java.awt.event.ActionEvent) PatternAnnotation(gate.creole.annic.PatternAnnotation) Color(java.awt.Color) JButton(javax.swing.JButton) JLabel(javax.swing.JLabel) FeatureMap(gate.FeatureMap) ActionListener(java.awt.event.ActionListener)

Example 3 with PatternAnnotation

use of gate.creole.annic.PatternAnnotation in project gate-core by GateNLP.

the class LuceneSearchThread method getPatternResult.

/**
 * This method returns the valid patterns back and the respective
 * GateAnnotations
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
private PatternResult getPatternResult(List<gate.creole.annic.apache.lucene.analysis.Token> subTokens, String annotationSetName, int patLen, int patWindow, String query, String baseTokenAnnotationType, int noOfResultsToFetch) {
    List<List<PatternAnnotation>> tokens = new ArrayList<List<PatternAnnotation>>();
    List<Integer> patLens = new ArrayList<Integer>();
    ftpIndex++;
    // Phrase Query
    // consider only one pattern at a time
    // first term position index at the begining
    int ftpIndexATB = ftpIndex;
    mainForLoop: for (; ftpIndex < ftp.size() && (noOfResultsToFetch == -1 || noOfResultsToFetch > 0); ftpIndex++) {
        // find out the position of the first term
        int pos = ((Integer) ftp.get(ftpIndex)).intValue();
        // find out the token with pos
        int j = 0;
        for (; j < subTokens.size(); j++) {
            gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(j);
            if (token.getPosition() == pos) {
                break;
            }
        }
        int counter = 0;
        int leftstart = -1;
        /*
       * ok so we need to go back to find out the first token of the
       * left context
       */
        int k = j - 1;
        for (; k >= 0; k--) {
            gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
            if (token.getPosition() < pos && token.termText().equals(baseTokenAnnotationType) && token.type().equals("*")) {
                counter++;
                leftstart = token.startOffset();
                j = k;
            }
            if (counter == patWindow) {
                break;
            }
        }
        // j holds the start of the left context
        // now we want to search for the end of left context
        pos--;
        k = j;
        if (leftstart > -1) {
            boolean breakNow = false;
            for (; k < subTokens.size(); k++) {
                gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
                if (token.getPosition() == pos) {
                    breakNow = true;
                } else {
                    if (breakNow) {
                        break;
                    }
                }
            }
        }
        // now k holds the begining of the pattern
        // leftEnd holds the position of the last token in left context
        int leftEnd = leftstart == -1 ? -1 : k - 1;
        /*
       * we need to validate this pattern. As a result of query, we get
       * the positions of the first term. We need to locate the full
       * pattern along with all its other annotations. This is done by
       * using the ValidatePattern class. This class provides a method,
       * which takes as arguments the query Tokens, the position in the
       * tokenStream from where to start searching and returns the end
       * offset of the last annotation in the found pattern. We then
       * search for this endoffset in our current tokenStream to
       * retrieve the wanted annotations.
       */
        int upto = -1;
        int tempPos = 0;
        if (this.queryParser.needValidation()) {
            try {
                List<String> queryTokens = luceneSearcher.getQueryTokens(query);
                if (queryTokens == null) {
                    queryTokens = new QueryParser().findTokens(query);
                    luceneSearcher.addQueryTokens(query, queryTokens);
                }
                /*
           * validate method returns the endoffset of the last token of
           * the middle pattern returns -1 if pattern could not be
           * located at that location
           */
                PatternValidator vp = new PatternValidator();
                // here k is the position where the first token should occur
                upto = vp.validate(queryTokens, subTokens, k, new QueryParser());
                if (upto == -1) {
                    /*
             * if the validatePAttern class could not find the valid
             * pattern it returns -1 and therefore we should remove the
             * position of the invalid pattern
             */
                    ftp.remove(ftpIndex);
                    ftpIndex--;
                    continue mainForLoop;
                } else {
                    /*
             * now we need to locate the token whose endPosition is upto
             */
                    int jj = leftEnd + 1;
                    boolean breaknow = false;
                    tempPos = subTokens.get(jj).getPosition();
                    for (; jj < subTokens.size(); jj++) {
                        gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(jj);
                        if (token.endOffset() == upto) {
                            tempPos = token.getPosition();
                            breaknow = true;
                        } else if (breaknow) {
                            break;
                        }
                    }
                    // we send the endoffset to our GUI class
                    patLens.add(upto);
                    /*
             * k holds the position of the first token in right context
             */
                    k = jj;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            /*
         * the query contains all tokens, which is already validated at
         * the time of creating query the pointer k points to the
         * begining of our patern we need to travel patLen into the
         * right direction to obtain the pattern
         */
            for (counter = 0; counter < patLen && k < subTokens.size(); k++) {
                gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
                if (token.termText().equals(baseTokenAnnotationType) && token.type().equals("*")) {
                    counter++;
                    upto = token.endOffset();
                    tempPos = token.getPosition();
                }
            }
            patLens.add(upto);
            k++;
        }
        int maxEndOffset = upto;
        /*
       * so now search for the token with the position == tempPos + 1 in
       * other words search for the first term of the right context
       */
        for (; k < subTokens.size(); k++) {
            gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
            if (token.getPosition() == tempPos + 1) {
                break;
            }
        }
        // and now we need to locate the right context pattern
        counter = 0;
        for (; k < subTokens.size(); k++) {
            gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
            if (token.startOffset() >= upto && token.termText().equals(baseTokenAnnotationType) && token.type().equals("*")) {
                counter++;
                maxEndOffset = token.endOffset();
            }
            if (counter == patWindow) {
                break;
            }
        }
        // if there are any sub-tokens left
        if (k < subTokens.size()) {
            /*
         * now we would search for the position untill we see it having
         * the same position
         */
            tempPos = subTokens.get(k).getPosition();
            for (; k < subTokens.size(); k++) {
                gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(k);
                if (token.getPosition() != tempPos) {
                    break;
                }
            }
        }
        if (k >= subTokens.size()) {
            // we used all sub-tokens - set k to maximum size
            k = subTokens.size() - 1;
        }
        /*
       * so k is the position til where we need to search for each
       * annotation and every feature in it at the time of creating
       * index were converted into separate tokens we need to convert
       * them back into annotations
       */
        List<PatternAnnotation> patternGateAnnotations = new ArrayList<PatternAnnotation>();
        PatternAnnotation ga = null;
        for (int m = j; m <= k; m++) {
            gate.creole.annic.apache.lucene.analysis.Token token = subTokens.get(m);
            String text = token.termText();
            int st = token.startOffset();
            int end = token.endOffset();
            String type = token.type();
            int position = token.getPosition();
            // if this is a new annotation Type
            if (type.equals("*")) {
                ga = new PatternAnnotation();
                ga.setType(text);
                ga.setStOffset(st);
                ga.setEnOffset(end);
                ga.setPosition(position);
                if (ga.getEndOffset() <= maxEndOffset) {
                    patternGateAnnotations.add(ga);
                }
                continue;
            } else if (type.equals("**")) {
                continue;
            }
            // and from here all are the features
            int index = type.indexOf(".");
            String feature = type.substring(index + 1, type.length());
            /*
         * we need to compare the type1 each annotation has string
         * feature in index so text will be definitely going to be
         * initialized
         */
            if (feature.equals("string")) {
                ga.setText(text);
            }
            ga.addFeature(feature, text);
        }
        tokens.add(patternGateAnnotations);
        if (noOfResultsToFetch != -1)
            noOfResultsToFetch--;
    }
    if (noOfResultsToFetch == 0 && ftpIndex < ftp.size())
        ftpIndex--;
    // finally create an instance of PatternResult
    PatternResult pr = new PatternResult();
    pr.annotationSetName = annotationSetName;
    pr.gateAnnotations = tokens;
    pr.firstTermPositions = new ArrayList();
    for (int i = 0; i < pr.gateAnnotations.size(); i++) {
        pr.firstTermPositions.add(ftp.get(i + ftpIndexATB));
    }
    pr.patternLegths = patLens;
    pr.numberOfPatterns = pr.gateAnnotations.size();
    return pr;
}
Also used : PatternAnnotation(gate.creole.annic.PatternAnnotation) ArrayList(java.util.ArrayList) IOException(java.io.IOException) SearchException(gate.creole.annic.SearchException) ArrayList(java.util.ArrayList) List(java.util.List)

Example 4 with PatternAnnotation

use of gate.creole.annic.PatternAnnotation in project gate-core by GateNLP.

the class LuceneSearchThread method locatePatterns.

/**
 * Locates the valid patterns in token stream and discards the invalid
 * first term positions returned by the lucene searcher.
 */
private List<Pattern> locatePatterns(String docID, String annotationSetName, List<List<PatternAnnotation>> gateAnnotations, List<?> firstTermPositions, List<Integer> patternLength, String queryString) {
    // patterns
    List<Pattern> pats = new ArrayList<Pattern>();
    for (int i = 0; i < gateAnnotations.size(); i++) {
        // each element in the tokens stream is a pattern
        List<PatternAnnotation> annotations = gateAnnotations.get(i);
        if (annotations.size() == 0) {
            continue;
        }
        // from this annotations we need to create a text string
        // so lets find out the smallest and the highest offsets
        int smallest = Integer.MAX_VALUE;
        int highest = -1;
        for (int j = 0; j < annotations.size(); j++) {
            // each annotation is an instance of GateAnnotation
            PatternAnnotation ga = annotations.get(j);
            if (ga.getStartOffset() < smallest) {
                smallest = ga.getStartOffset();
            }
            if (ga.getEndOffset() > highest) {
                highest = ga.getEndOffset();
            }
        }
        // we have smallest and highest offsets
        char[] patternText = new char[highest - smallest];
        for (int j = 0; j < patternText.length; j++) {
            patternText[j] = ' ';
        }
        // and now place the text
        for (int j = 0; j < annotations.size(); j++) {
            // each annotation is an instance of GateAnnotation
            PatternAnnotation ga = annotations.get(j);
            if (ga.getText() == null) {
                // this is to avoid annotations such as split
                continue;
            }
            for (int k = ga.getStartOffset() - smallest, m = 0; m < ga.getText().length() && k < patternText.length; m++, k++) {
                patternText[k] = ga.getText().charAt(m);
            }
            // we will initiate the annotTypes as well
            if (luceneSearcher.annotationTypesMap.keySet().contains(ga.getType())) {
                List<String> aFeatures = luceneSearcher.annotationTypesMap.get(ga.getType());
                Map<String, String> features = ga.getFeatures();
                if (features != null) {
                    Iterator<String> fSet = features.keySet().iterator();
                    while (fSet.hasNext()) {
                        String feature = fSet.next();
                        if (!aFeatures.contains(feature)) {
                            aFeatures.add(feature);
                        }
                    }
                }
                luceneSearcher.annotationTypesMap.put(ga.getType(), aFeatures);
            } else {
                Map<String, String> features = ga.getFeatures();
                List<String> aFeatures = new ArrayList<String>();
                aFeatures.add("All");
                if (features != null) {
                    aFeatures.addAll(features.keySet());
                }
                luceneSearcher.annotationTypesMap.put(ga.getType(), aFeatures);
            }
        // end of initializing annotationTypes for the comboBox
        }
        // we have the text
        // smallest is the textStOffset
        // highest is the textEndOffset
        // how to find the patternStartOffset
        int stPos = ((Integer) firstTermPositions.get(i)).intValue();
        int endOffset = patternLength.get(i).intValue();
        int patStart = Integer.MAX_VALUE;
        for (int j = 0; j < annotations.size(); j++) {
            // each annotation is an instance of GateAnnotation
            PatternAnnotation ga = annotations.get(j);
            if (ga.getPosition() == stPos) {
                if (ga.getStartOffset() < patStart) {
                    patStart = ga.getStartOffset();
                }
            }
        }
        if (patStart == Integer.MAX_VALUE) {
            continue;
        }
        if (patStart < smallest || endOffset > highest) {
            continue;
        }
        // now create the pattern for this
        Pattern ap = new Pattern(docID, annotationSetName, new String(patternText), patStart, endOffset, smallest, highest, annotations, queryString);
        pats.add(ap);
    }
    return pats;
}
Also used : Pattern(gate.creole.annic.Pattern) PatternAnnotation(gate.creole.annic.PatternAnnotation) ArrayList(java.util.ArrayList)

Example 5 with PatternAnnotation

use of gate.creole.annic.PatternAnnotation in project gate-core by GateNLP.

the class StatsCalculator method freq.

/**
 * @see #freq(List, String, String, String, boolean, boolean)
 */
public static int freq(List<Hit> patternsToSearchIn, String annotationType, boolean inMatchedSpan, boolean inContext) throws SearchException {
    if (patternsToSearchIn == null || patternsToSearchIn.isEmpty())
        return 0;
    if (!inMatchedSpan && !inContext)
        throw new SearchException("Both inMatchedSpan and inContext cannot be set to false");
    int count = 0;
    for (Hit aResult1 : patternsToSearchIn) {
        Pattern aResult = (Pattern) aResult1;
        List<PatternAnnotation> annots = new ArrayList<PatternAnnotation>();
        if (inMatchedSpan && !inContext) {
            annots = aResult.getPatternAnnotations(aResult.getStartOffset(), aResult.getEndOffset());
        } else if (!inMatchedSpan && inContext) {
            annots = aResult.getPatternAnnotations(aResult.getLeftContextStartOffset(), aResult.getStartOffset());
            annots.addAll(aResult.getPatternAnnotations(aResult.getEndOffset(), aResult.getRightContextEndOffset()));
        } else {
            // both matchedSpan and context are set to true
            annots = Arrays.asList(aResult.getPatternAnnotations());
        }
        if (annots.isEmpty())
            continue;
        List<PatternAnnotation> subAnnots = getPatternAnnotations(annots, annotationType);
        count += subAnnots.size();
    }
    return count;
}
Also used : Pattern(gate.creole.annic.Pattern) Hit(gate.creole.annic.Hit) PatternAnnotation(gate.creole.annic.PatternAnnotation) ArrayList(java.util.ArrayList) SearchException(gate.creole.annic.SearchException)

Aggregations

PatternAnnotation (gate.creole.annic.PatternAnnotation)5 Pattern (gate.creole.annic.Pattern)4 ArrayList (java.util.ArrayList)4 SearchException (gate.creole.annic.SearchException)3 Hit (gate.creole.annic.Hit)2 FeatureMap (gate.FeatureMap)1 Color (java.awt.Color)1 GridBagConstraints (java.awt.GridBagConstraints)1 Insets (java.awt.Insets)1 ActionEvent (java.awt.event.ActionEvent)1 ActionListener (java.awt.event.ActionListener)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 List (java.util.List)1 JButton (javax.swing.JButton)1 JLabel (javax.swing.JLabel)1