Search in sources :

Example 91 with IntPair

use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.

the class StringTransformationTest method testReplace.

@Test
public void testReplace() {
    StringTransformation st = new StringTransformation(REPLACE);
    st.transformString(4, 5, "'");
    st.transformString(14, 15, "-");
    String modifiedStr = st.getTransformedText();
    assertEquals(REPLACE, st.getOrigText());
    assertEquals(REPLACE.length(), modifiedStr.length());
    assertEquals(MODREPLACE, modifiedStr);
    int modStart = st.computeModifiedOffsetFromOriginal(14);
    int modEnd = st.computeModifiedOffsetFromOriginal(15);
    assertEquals(14, modStart);
    assertEquals(15, modEnd);
    IntPair origOffsets = st.getOriginalOffsets(4, 5);
    assertEquals(4, origOffsets.getFirst());
    assertEquals(5, origOffsets.getSecond());
    origOffsets = st.getOriginalOffsets(14, 15);
    assertEquals(14, origOffsets.getFirst());
    assertEquals(15, origOffsets.getSecond());
}
Also used : StringTransformation(edu.illinois.cs.cogcomp.core.utilities.StringTransformation) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Test(org.junit.Test)

Example 92 with IntPair

use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.

the class StringTransformationTest method testExpand.

@Test
public void testExpand() {
    StringTransformation st = new StringTransformation(EXPAND);
    st.transformString(4, 5, "``");
    st.transformString(9, 10, "-RCB-");
    String modifiedStr = st.getTransformedText();
    assertEquals(EXPAND, st.getOrigText());
    assertEquals(EXPAND.length() + 5, modifiedStr.length());
    assertEquals(MODEXPAND, modifiedStr);
    IntPair origOffsets = st.getOriginalOffsets(4, 6);
    assertEquals(4, origOffsets.getFirst());
    assertEquals(5, origOffsets.getSecond());
    origOffsets = st.getOriginalOffsets(10, 15);
    assertEquals(9, origOffsets.getFirst());
    assertEquals(10, origOffsets.getSecond());
    int modStart = st.computeModifiedOffsetFromOriginal(9);
    int modEnd = st.computeModifiedOffsetFromOriginal(10);
    assertEquals(10, modStart);
    assertEquals(15, modEnd);
}
Also used : StringTransformation(edu.illinois.cs.cogcomp.core.utilities.StringTransformation) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Test(org.junit.Test)

Example 93 with IntPair

use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.

the class StringTransformationTest method testAbuttingEdits.

/**
     * when you delete a span next to a retained span and later try to retrieve original offsets for the retained
     *    span, StringTransformation must return the span offsets without the deleted span. However, if the edit
     *    reduced a span, the original offsets must include the deleted content -- i.e. the edit type matters.
     * This test assesses this difference in behavior, both before and after a span.
     */
@Test
public void testAbuttingEdits() {
    // "The <emph>only</emph> lonely@^@^man</doc>"
    // "The only man";
    StringTransformation st = new StringTransformation(ABUT);
    st.transformString(4, 10, "");
    st.transformString(14, 21, "");
    st.transformString(CTRLORIGOFFSETS.getFirst(), CTRLORIGOFFSETS.getSecond(), " ");
    st.transformString(35, 41, "");
    String transformedStr = st.getTransformedText();
    assertEquals(MODABUT, transformedStr);
    IntPair onlyOrig = st.getOriginalOffsets(ONLYNEWOFFSETS.getFirst(), ONLYNEWOFFSETS.getSecond());
    assertEquals(ONLYORIGOFFSETS, onlyOrig);
    IntPair lonelyOrig = st.getOriginalOffsets(LONELYNEWOFFSETS.getFirst(), LONELYNEWOFFSETS.getSecond());
    String origStr = ABUT.substring(LONELYORIGOFFSETS.getFirst(), LONELYORIGOFFSETS.getSecond());
    String newStr = transformedStr.substring(LONELYNEWOFFSETS.getFirst(), LONELYNEWOFFSETS.getSecond());
    assertEquals(origStr, newStr);
    assertEquals(LONELYORIGOFFSETS, lonelyOrig);
    int onlyNewStart = st.computeModifiedOffsetFromOriginal(ONLYORIGOFFSETS.getFirst());
    int onlyNewEnd = st.computeModifiedOffsetFromOriginal(ONLYORIGOFFSETS.getSecond());
    assertEquals(ONLYNEWOFFSETS.getFirst(), onlyNewStart);
    assertEquals(ONLYNEWOFFSETS.getSecond(), onlyNewEnd);
    IntPair ctrlOrig = st.getOriginalOffsets(CTRLNEWOFFSETS.getFirst(), CTRLNEWOFFSETS.getSecond());
    assertEquals(CTRLORIGOFFSETS.getFirst(), ctrlOrig.getFirst());
    assertEquals(CTRLORIGOFFSETS.getSecond(), ctrlOrig.getSecond());
    IntPair manOrig = st.getOriginalOffsets(MANNEWOFFSETS.getFirst(), MANNEWOFFSETS.getSecond());
    String manNewStr = MODABUT.substring(MANNEWOFFSETS.getFirst(), MANNEWOFFSETS.getSecond());
    String manOrigStr = ABUT.substring(MANORIGOFFSETS.getFirst(), MANORIGOFFSETS.getSecond());
    assertEquals(manNewStr, manOrigStr);
    assertEquals(MANORIGOFFSETS, manOrig);
}
Also used : StringTransformation(edu.illinois.cs.cogcomp.core.utilities.StringTransformation) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Test(org.junit.Test)

Example 94 with IntPair

use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.

the class StringTransformationTest method testSequentialExpand.

@Test
public void testSequentialExpand() {
    StringTransformation st = new StringTransformation(EXPAND);
    st.transformString(4, 5, "``");
    // force edits to be applied
    String modifiedStr = st.getTransformedText();
    assertEquals(EXPAND.length() + 1, modifiedStr.length());
    // subsequent transformation must work w.r.t. modified string
    st.transformString(10, 11, "-RCB-");
    modifiedStr = st.getTransformedText();
    assertEquals(EXPAND, st.getOrigText());
    assertEquals(EXPAND.length() + 5, modifiedStr.length());
    assertEquals(MODEXPAND, modifiedStr);
    int modStart = st.computeModifiedOffsetFromOriginal(9);
    int modEnd = st.computeModifiedOffsetFromOriginal(10);
    assertEquals(10, modStart);
    assertEquals(15, modEnd);
    IntPair origOffsets = st.getOriginalOffsets(4, 6);
    assertEquals(4, origOffsets.getFirst());
    assertEquals(5, origOffsets.getSecond());
    origOffsets = st.getOriginalOffsets(10, 15);
    assertEquals(9, origOffsets.getFirst());
    assertEquals(10, origOffsets.getSecond());
}
Also used : StringTransformation(edu.illinois.cs.cogcomp.core.utilities.StringTransformation) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Test(org.junit.Test)

Example 95 with IntPair

use of edu.illinois.cs.cogcomp.core.datastructures.IntPair in project cogcomp-nlp by CogComp.

the class StringTransformationTest method testSequence.

@Test
public void testSequence() {
    //        SEQUENCE= "The http://theonlyway.org {only}^@^@^@ way___";
    //        MODSEQUENCE= "The WWW -LCB-only-RCB- way-";
    StringTransformation st = new StringTransformation(SEQUENCE);
    st.transformString(4, 25, "WWW");
    st.transformString(26, 27, "-LCB-");
    st.transformString(31, 32, "-RCB-");
    st.transformString(32, 38, "");
    st.transformString(42, 45, "-");
    String modifiedStr = st.getTransformedText();
    assertEquals(SEQUENCE, st.getOrigText());
    assertEquals(SEQUENCE.length() - 18, modifiedStr.length());
    assertEquals(MODSEQUENCE, modifiedStr);
    int modStart = st.computeModifiedOffsetFromOriginal(4);
    int modEnd = st.computeModifiedOffsetFromOriginal(25);
    assertEquals(4, modStart);
    assertEquals(7, modEnd);
    String transfSeq = modifiedStr.substring(4, 7);
    String origSeq = st.getOrigText().substring(4, 25);
    assertEquals(transfSeq, "WWW");
    assertEquals(origSeq, "http://theonlyway.org");
    /*
         * what happens if we query a char in the middle of a deleted sequence?
         * -- should map to beginning of that modification
         */
    int modMid = st.computeModifiedOffsetFromOriginal(20);
    assertEquals(7, modMid);
    IntPair origOffsets = st.getOriginalOffsets(4, 7);
    assertEquals(4, origOffsets.getFirst());
    assertEquals(25, origOffsets.getSecond());
    // intermediate edit chars map to same offsets, treated like replacements
    origOffsets = st.getOriginalOffsets(1, 2);
    assertEquals(1, origOffsets.getFirst());
    assertEquals(2, origOffsets.getSecond());
    // in the middle of the replaced
    origOffsets = st.getOriginalOffsets(1, 6);
    assertEquals(6, origOffsets.getSecond());
    // check expand edit
    origOffsets = st.getOriginalOffsets(17, 22);
    assertEquals(31, origOffsets.getFirst());
    // expansion + deletion
    assertEquals(38, origOffsets.getSecond());
    transfSeq = modifiedStr.substring(17, 22);
    origSeq = st.getOrigText().substring(31, 38);
    assertEquals("-RCB-", transfSeq);
    // combines expand + delete for contiguous spans
    assertEquals("}^@^@^@", origSeq);
    // intermediate edit chars map to same offsets, treated like replacements.
    // note that this could be weird in case of multiple edits at same index
    //   (e.g. insertion, then deletion)
    // Note that these don't really make sense as substrings, and nor are the mapped substrings likely to make sense
    origOffsets = st.getOriginalOffsets(19, 20);
    assertEquals(35, origOffsets.getFirst());
    assertEquals(36, origOffsets.getSecond());
    // in the middle of the replaced
    modStart = st.computeModifiedOffsetFromOriginal(31);
    modEnd = st.computeModifiedOffsetFromOriginal(32);
    assertEquals(17, modStart);
    assertEquals(18, modEnd);
}
Also used : StringTransformation(edu.illinois.cs.cogcomp.core.utilities.StringTransformation) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Test(org.junit.Test)

Aggregations

IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)103 Pair (edu.illinois.cs.cogcomp.core.datastructures.Pair)32 Test (org.junit.Test)20 ArrayList (java.util.ArrayList)19 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)18 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)14 StringTransformation (edu.illinois.cs.cogcomp.core.utilities.StringTransformation)13 XmlDocumentProcessor (edu.illinois.cs.cogcomp.core.utilities.XmlDocumentProcessor)6 Tree (edu.illinois.cs.cogcomp.core.datastructures.trees.Tree)5 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)4 Sentence (edu.illinois.cs.cogcomp.lbjava.nlp.Sentence)4 FileNotFoundException (java.io.FileNotFoundException)4 Matcher (java.util.regex.Matcher)4 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)3 SentenceSplitter (edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter)3 LinkedVector (edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector)3 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)3 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)3 CoreLabel (edu.stanford.nlp.ling.CoreLabel)3 Annotation (edu.stanford.nlp.pipeline.Annotation)3