Search in sources :

Example 86 with StringReader

use of java.io.StringReader in project CoreNLP by stanfordnlp.

the class DcorefBenchmarkSlowITest method getCorefResults.

public static Counter<String> getCorefResults(String resultsString) throws IOException {
    Counter<String> results = new ClassicCounter<>();
    BufferedReader r = new BufferedReader(new StringReader(resultsString));
    for (String line; (line = r.readLine()) != null; ) {
        Matcher m1 = MENTION_PATTERN.matcher(line);
        if (m1.matches()) {
            results.setCount(MENTION_TP, Double.parseDouble(m1.group(1)));
            results.setCount(MENTION_F1, Double.parseDouble(m1.group(2)));
        }
        Matcher m2 = MUC_PATTERN.matcher(line);
        if (m2.matches()) {
            results.setCount(MUC_TP, Double.parseDouble(m2.group(1)));
            results.setCount(MUC_F1, Double.parseDouble(m2.group(2)));
        }
        Matcher m3 = BCUBED_PATTERN.matcher(line);
        if (m3.matches()) {
            results.setCount(BCUBED_TP, Double.parseDouble(m3.group(1)));
            results.setCount(BCUBED_F1, Double.parseDouble(m3.group(2)));
        }
        Matcher m4 = CEAFM_PATTERN.matcher(line);
        if (m4.matches()) {
            results.setCount(CEAFM_TP, Double.parseDouble(m4.group(1)));
            results.setCount(CEAFM_F1, Double.parseDouble(m4.group(2)));
        }
        Matcher m5 = CEAFE_PATTERN.matcher(line);
        if (m5.matches()) {
            results.setCount(CEAFE_TP, Double.parseDouble(m5.group(1)));
            results.setCount(CEAFE_F1, Double.parseDouble(m5.group(2)));
        }
        Matcher m6 = BLANC_PATTERN.matcher(line);
        if (m6.matches()) {
            results.setCount(BLANC_F1, Double.parseDouble(m6.group(1)));
        }
        Matcher m7 = CONLL_PATTERN.matcher(line);
        if (m7.matches()) {
            results.setCount(CONLL_SCORE, Double.parseDouble(m7.group(1)));
        }
    }
    return results;
}
Also used : Matcher(java.util.regex.Matcher) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) BufferedReader(java.io.BufferedReader) StringReader(java.io.StringReader)

Example 87 with StringReader

use of java.io.StringReader in project CoreNLP by stanfordnlp.

the class SpanishTokenizerITest method testOffsetsSpacing.

public void testOffsetsSpacing() {
    // guide                 1         2         3         4          5         6         7           8         9         0         1         2         3
    // guide       0123456789012345678901234567890123456789012345678 90123456789012345678901234567 8 901234567890123456789012345678901234567890123456789012345
    String text = "  La   combinación consonántica ss es ajena a la\tortografía    castellana:   \n\n traigámosela, mandémoselos, escribámosela, comprémoselo.";
    final TokenizerFactory<CoreLabel> tf = SpanishTokenizer.coreLabelFactory();
    tf.setOptions("");
    tf.setOptions("splitAll=true");
    Tokenizer<CoreLabel> spanishTokenizer = tf.getTokenizer(new StringReader(text));
    List<CoreLabel> tokens = spanishTokenizer.tokenize();
    System.err.println(tokens);
    assertEquals(27, tokens.size());
    // assertEquals("  ", tokens.get(0).get(CoreAnnotations.BeforeAnnotation.class));
    // assertEquals("\t", tokens.get(8).get(CoreAnnotations.AfterAnnotation.class));
    assertEquals("Begin char offset", 2, (int) tokens.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    assertEquals("End char offset", 4, (int) tokens.get(0).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    assertEquals("La", tokens.get(0).get(CoreAnnotations.OriginalTextAnnotation.class));
    // note: after(x) and before(x+1) are the same
    // assertEquals("   ", tokens.get(0).get(CoreAnnotations.AfterAnnotation.class));
    // assertEquals("   ", tokens.get(1).get(CoreAnnotations.BeforeAnnotation.class));
    assertEquals("escribámo", tokens.get(19).get(CoreAnnotations.OriginalTextAnnotation.class));
    assertEquals("escribamos", tokens.get(19).get(CoreAnnotations.TextAnnotation.class));
    assertEquals("Begin char offset", 108, (int) tokens.get(19).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    assertEquals("End char offset", 117, (int) tokens.get(19).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    assertEquals("se", tokens.get(20).get(CoreAnnotations.OriginalTextAnnotation.class));
    assertEquals("se", tokens.get(20).get(CoreAnnotations.TextAnnotation.class));
    assertEquals("Begin char offset", 117, (int) tokens.get(20).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    assertEquals("End char offset", 119, (int) tokens.get(20).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    assertEquals("la", tokens.get(21).get(CoreAnnotations.OriginalTextAnnotation.class));
    assertEquals("la", tokens.get(21).get(CoreAnnotations.TextAnnotation.class));
    assertEquals("Begin char offset", 119, (int) tokens.get(21).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    assertEquals("End char offset", 121, (int) tokens.get(21).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    assertEquals(",", tokens.get(22).get(CoreAnnotations.OriginalTextAnnotation.class));
    assertEquals(",", tokens.get(22).get(CoreAnnotations.TextAnnotation.class));
    assertEquals("Begin char offset", 121, (int) tokens.get(22).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    assertEquals("End char offset", 122, (int) tokens.get(22).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) StringReader(java.io.StringReader) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations)

Example 88 with StringReader

use of java.io.StringReader in project CoreNLP by stanfordnlp.

the class Morphology method stem.

public String stem(String word) {
    try {
        lexer.yyreset(new StringReader(word));
        lexer.yybegin(Morpha.any);
        String wordRes = lexer.next();
        return wordRes;
    } catch (IOException e) {
        log.warning("Morphology.stem() had error on word " + word);
        return word;
    }
}
Also used : StringReader(java.io.StringReader) IOException(java.io.IOException)

Example 89 with StringReader

use of java.io.StringReader in project CoreNLP by stanfordnlp.

the class Morphology method lemmatize.

/** Lemmatize the word, being sensitive to the tag, using the
   *  passed in lexer.
   *
   *  @param lowercase If this is true, words other than proper nouns will
   *      be changed to all lowercase.
   */
private static String lemmatize(String word, String tag, Morpha lexer, boolean lowercase) {
    boolean wordHasForbiddenChar = word.indexOf('_') >= 0 || word.indexOf(' ') >= 0 || word.indexOf('\n') >= 0;
    String quotedWord = word;
    if (wordHasForbiddenChar) {
        // choose something unlikely. Classical Vedic!
        quotedWord = quotedWord.replaceAll("_", "ᳰ");
        quotedWord = quotedWord.replaceAll(" ", "ᳱ");
        quotedWord = quotedWord.replaceAll("\n", "ᳲ");
    }
    String wordtag = quotedWord + '_' + tag;
    if (DEBUG)
        log.info("Trying to normalize |" + wordtag + '|');
    try {
        lexer.setOption(1, lowercase);
        lexer.yyreset(new StringReader(wordtag));
        lexer.yybegin(Morpha.scan);
        String wordRes = lexer.next();
        // go past tag
        lexer.next();
        if (wordHasForbiddenChar) {
            if (DEBUG)
                log.info("Restoring forbidden chars");
            wordRes = wordRes.replaceAll("ᳰ", "_");
            wordRes = wordRes.replaceAll("ᳱ", " ");
            wordRes = wordRes.replaceAll("ᳲ", "\n");
        }
        return wordRes;
    } catch (IOException e) {
        log.warning("Morphology.stem() had error on word " + word + '/' + tag);
        return word;
    }
}
Also used : StringReader(java.io.StringReader) IOException(java.io.IOException)

Example 90 with StringReader

use of java.io.StringReader in project liquibase by liquibase.

the class StreamUtilTest method testGetReaderContents.

@Test
public void testGetReaderContents() throws IOException {
    String contents = "TEST";
    StringReader reader = new StringReader(contents);
    String result = StreamUtil.getReaderContents(reader);
    assertEquals(contents, result);
}
Also used : StringReader(java.io.StringReader) Test(org.junit.Test)

Aggregations

StringReader (java.io.StringReader)4150 Test (org.junit.Test)1003 IOException (java.io.IOException)589 Reader (java.io.Reader)445 InputSource (org.xml.sax.InputSource)408 BufferedReader (java.io.BufferedReader)342 TokenStream (org.apache.lucene.analysis.TokenStream)302 ArrayList (java.util.ArrayList)273 StringWriter (java.io.StringWriter)251 Tokenizer (org.apache.lucene.analysis.Tokenizer)241 Document (org.w3c.dom.Document)232 JSONReader (com.alibaba.fastjson.JSONReader)195 DocumentBuilder (javax.xml.parsers.DocumentBuilder)180 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)157 Map (java.util.Map)144 HashMap (java.util.HashMap)136 Element (org.w3c.dom.Element)134 StreamSource (javax.xml.transform.stream.StreamSource)132 ParserResult (org.jabref.logic.importer.ParserResult)130 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)120