Search in sources :

Example 11 with StringTransformation

use of edu.illinois.cs.cogcomp.core.utilities.StringTransformation in project cogcomp-nlp by CogComp.

the class StringTransformationTest method testAbuttingEdits.

/**
     * when you delete a span next to a retained span and later try to retrieve original offsets for the retained
     *    span, StringTransformation must return the span offsets without the deleted span. However, if the edit
     *    reduced a span, the original offsets must include the deleted content -- i.e. the edit type matters.
     * This test assesses this difference in behavior, both before and after a span.
     */
@Test
public void testAbuttingEdits() {
    // "The <emph>only</emph> lonely@^@^man</doc>"
    // "The only man";
    StringTransformation st = new StringTransformation(ABUT);
    st.transformString(4, 10, "");
    st.transformString(14, 21, "");
    st.transformString(CTRLORIGOFFSETS.getFirst(), CTRLORIGOFFSETS.getSecond(), " ");
    st.transformString(35, 41, "");
    String transformedStr = st.getTransformedText();
    assertEquals(MODABUT, transformedStr);
    IntPair onlyOrig = st.getOriginalOffsets(ONLYNEWOFFSETS.getFirst(), ONLYNEWOFFSETS.getSecond());
    assertEquals(ONLYORIGOFFSETS, onlyOrig);
    IntPair lonelyOrig = st.getOriginalOffsets(LONELYNEWOFFSETS.getFirst(), LONELYNEWOFFSETS.getSecond());
    String origStr = ABUT.substring(LONELYORIGOFFSETS.getFirst(), LONELYORIGOFFSETS.getSecond());
    String newStr = transformedStr.substring(LONELYNEWOFFSETS.getFirst(), LONELYNEWOFFSETS.getSecond());
    assertEquals(origStr, newStr);
    assertEquals(LONELYORIGOFFSETS, lonelyOrig);
    int onlyNewStart = st.computeModifiedOffsetFromOriginal(ONLYORIGOFFSETS.getFirst());
    int onlyNewEnd = st.computeModifiedOffsetFromOriginal(ONLYORIGOFFSETS.getSecond());
    assertEquals(ONLYNEWOFFSETS.getFirst(), onlyNewStart);
    assertEquals(ONLYNEWOFFSETS.getSecond(), onlyNewEnd);
    IntPair ctrlOrig = st.getOriginalOffsets(CTRLNEWOFFSETS.getFirst(), CTRLNEWOFFSETS.getSecond());
    assertEquals(CTRLORIGOFFSETS.getFirst(), ctrlOrig.getFirst());
    assertEquals(CTRLORIGOFFSETS.getSecond(), ctrlOrig.getSecond());
    IntPair manOrig = st.getOriginalOffsets(MANNEWOFFSETS.getFirst(), MANNEWOFFSETS.getSecond());
    String manNewStr = MODABUT.substring(MANNEWOFFSETS.getFirst(), MANNEWOFFSETS.getSecond());
    String manOrigStr = ABUT.substring(MANORIGOFFSETS.getFirst(), MANORIGOFFSETS.getSecond());
    assertEquals(manNewStr, manOrigStr);
    assertEquals(MANORIGOFFSETS, manOrig);
}
Also used : StringTransformation(edu.illinois.cs.cogcomp.core.utilities.StringTransformation) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Test(org.junit.Test)

Example 12 with StringTransformation

use of edu.illinois.cs.cogcomp.core.utilities.StringTransformation in project cogcomp-nlp by CogComp.

the class StringTransformationTest method testDelete.

//    @Before
//    public void before() {
//    }
/**
     * sequence of edits AND accesses of transformed string -- make sure
     *    second changes use correct offsets
     * second edit makes first redundant; verify correct output
     */
@Test
public void testDelete() {
    //John's leg^@^@^@^@
    StringTransformation st = new StringTransformation(DELETE);
    st.transformString(10, 18, "");
    String modifiedStr = st.getTransformedText();
    assertEquals(DELETE, st.getOrigText());
    assertEquals(DELETE.length() - 8, modifiedStr.length());
    assertEquals(MODDELETE, modifiedStr);
}
Also used : StringTransformation(edu.illinois.cs.cogcomp.core.utilities.StringTransformation) Test(org.junit.Test)

Example 13 with StringTransformation

use of edu.illinois.cs.cogcomp.core.utilities.StringTransformation in project cogcomp-nlp by CogComp.

the class StringTransformationTest method testSequentialExpand.

@Test
public void testSequentialExpand() {
    StringTransformation st = new StringTransformation(EXPAND);
    st.transformString(4, 5, "``");
    // force edits to be applied
    String modifiedStr = st.getTransformedText();
    assertEquals(EXPAND.length() + 1, modifiedStr.length());
    // subsequent transformation must work w.r.t. modified string
    st.transformString(10, 11, "-RCB-");
    modifiedStr = st.getTransformedText();
    assertEquals(EXPAND, st.getOrigText());
    assertEquals(EXPAND.length() + 5, modifiedStr.length());
    assertEquals(MODEXPAND, modifiedStr);
    int modStart = st.computeModifiedOffsetFromOriginal(9);
    int modEnd = st.computeModifiedOffsetFromOriginal(10);
    assertEquals(10, modStart);
    assertEquals(15, modEnd);
    IntPair origOffsets = st.getOriginalOffsets(4, 6);
    assertEquals(4, origOffsets.getFirst());
    assertEquals(5, origOffsets.getSecond());
    origOffsets = st.getOriginalOffsets(10, 15);
    assertEquals(9, origOffsets.getFirst());
    assertEquals(10, origOffsets.getSecond());
}
Also used : StringTransformation(edu.illinois.cs.cogcomp.core.utilities.StringTransformation) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Test(org.junit.Test)

Example 14 with StringTransformation

use of edu.illinois.cs.cogcomp.core.utilities.StringTransformation in project cogcomp-nlp by CogComp.

the class StringTransformationTest method testSequence.

@Test
public void testSequence() {
    //        SEQUENCE= "The http://theonlyway.org {only}^@^@^@ way___";
    //        MODSEQUENCE= "The WWW -LCB-only-RCB- way-";
    StringTransformation st = new StringTransformation(SEQUENCE);
    st.transformString(4, 25, "WWW");
    st.transformString(26, 27, "-LCB-");
    st.transformString(31, 32, "-RCB-");
    st.transformString(32, 38, "");
    st.transformString(42, 45, "-");
    String modifiedStr = st.getTransformedText();
    assertEquals(SEQUENCE, st.getOrigText());
    assertEquals(SEQUENCE.length() - 18, modifiedStr.length());
    assertEquals(MODSEQUENCE, modifiedStr);
    int modStart = st.computeModifiedOffsetFromOriginal(4);
    int modEnd = st.computeModifiedOffsetFromOriginal(25);
    assertEquals(4, modStart);
    assertEquals(7, modEnd);
    String transfSeq = modifiedStr.substring(4, 7);
    String origSeq = st.getOrigText().substring(4, 25);
    assertEquals(transfSeq, "WWW");
    assertEquals(origSeq, "http://theonlyway.org");
    /*
         * what happens if we query a char in the middle of a deleted sequence?
         * -- should map to beginning of that modification
         */
    int modMid = st.computeModifiedOffsetFromOriginal(20);
    assertEquals(7, modMid);
    IntPair origOffsets = st.getOriginalOffsets(4, 7);
    assertEquals(4, origOffsets.getFirst());
    assertEquals(25, origOffsets.getSecond());
    // intermediate edit chars map to same offsets, treated like replacements
    origOffsets = st.getOriginalOffsets(1, 2);
    assertEquals(1, origOffsets.getFirst());
    assertEquals(2, origOffsets.getSecond());
    // in the middle of the replaced
    origOffsets = st.getOriginalOffsets(1, 6);
    assertEquals(6, origOffsets.getSecond());
    // check expand edit
    origOffsets = st.getOriginalOffsets(17, 22);
    assertEquals(31, origOffsets.getFirst());
    // expansion + deletion
    assertEquals(38, origOffsets.getSecond());
    transfSeq = modifiedStr.substring(17, 22);
    origSeq = st.getOrigText().substring(31, 38);
    assertEquals("-RCB-", transfSeq);
    // combines expand + delete for contiguous spans
    assertEquals("}^@^@^@", origSeq);
    // intermediate edit chars map to same offsets, treated like replacements.
    // note that this could be weird in case of multiple edits at same index
    //   (e.g. insertion, then deletion)
    // Note that these don't really make sense as substrings, and nor are the mapped substrings likely to make sense
    origOffsets = st.getOriginalOffsets(19, 20);
    assertEquals(35, origOffsets.getFirst());
    assertEquals(36, origOffsets.getSecond());
    // in the middle of the replaced
    modStart = st.computeModifiedOffsetFromOriginal(31);
    modEnd = st.computeModifiedOffsetFromOriginal(32);
    assertEquals(17, modStart);
    assertEquals(18, modEnd);
}
Also used : StringTransformation(edu.illinois.cs.cogcomp.core.utilities.StringTransformation) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Test(org.junit.Test)

Example 15 with StringTransformation

use of edu.illinois.cs.cogcomp.core.utilities.StringTransformation in project cogcomp-nlp by CogComp.

the class StringTransformationTest method testSequentialSequence.

/**
     * runs the same set of ops as testSequence, but applies edits after each transformation.
     * Ensures that the behavior is the same whether edits are done in a single pass, or over multiple passes.
     */
@Test
public void testSequentialSequence() {
    //        SEQUENCE= "The http://theonlyway.org {only}^@^@^@ way___";
    //        MODSEQUENCE= "The WWW -LCB-only-RCB- way-";
    StringTransformation st = new StringTransformation(SEQUENCE);
    st.transformString(4, 25, "WWW");
    // force edits to be flushed
    st.getTransformedText();
    st.transformString(8, 9, "-LCB-");
    st.getTransformedText();
    st.transformString(17, 18, "-RCB-");
    st.getTransformedText();
    st.transformString(22, 28, "");
    st.getTransformedText();
    st.transformString(26, 29, "-");
    st.getTransformedText();
    String modifiedStr = st.getTransformedText();
    assertEquals(SEQUENCE, st.getOrigText());
    assertEquals(SEQUENCE.length() - 18, modifiedStr.length());
    assertEquals(MODSEQUENCE, modifiedStr);
    int modStart = st.computeModifiedOffsetFromOriginal(4);
    int modEnd = st.computeModifiedOffsetFromOriginal(25);
    assertEquals(4, modStart);
    assertEquals(7, modEnd);
    String transfSeq = modifiedStr.substring(4, 7);
    String origSeq = st.getOrigText().substring(4, 25);
    assertEquals(transfSeq, "WWW");
    assertEquals(origSeq, "http://theonlyway.org");
    /*
         * what happens if we query a char in the middle of a deleted sequence?
         * -- should map to beginning of that modification
         */
    int modMid = st.computeModifiedOffsetFromOriginal(20);
    assertEquals(7, modMid);
    IntPair origOffsets = st.getOriginalOffsets(4, 7);
    assertEquals(4, origOffsets.getFirst());
    assertEquals(25, origOffsets.getSecond());
    // intermediate edit chars map to same offsets, treated like replacements
    origOffsets = st.getOriginalOffsets(1, 2);
    assertEquals(1, origOffsets.getFirst());
    assertEquals(2, origOffsets.getSecond());
    // in the middle of the replaced
    origOffsets = st.getOriginalOffsets(1, 6);
    assertEquals(6, origOffsets.getSecond());
    // check expand edit
    origOffsets = st.getOriginalOffsets(17, 22);
    assertEquals(31, origOffsets.getFirst());
    assertEquals(32, origOffsets.getSecond());
    transfSeq = modifiedStr.substring(17, 22);
    origSeq = st.getOrigText().substring(31, 32);
    assertEquals("-RCB-", transfSeq);
    // combines expand + delete for contiguous spans
    assertEquals("}", origSeq);
    // intermediate edit chars map to same offsets, treated like replacements.
    // note that this could be weird in case of multiple edits at same index
    //   (e.g. insertion, then deletion)
    // Note that these don't really make sense as substrings, and nor are the mapped substrings likely to make sense
    origOffsets = st.getOriginalOffsets(19, 20);
    assertEquals(29, origOffsets.getFirst());
    assertEquals(30, origOffsets.getSecond());
    // in the middle of the replaced
    modStart = st.computeModifiedOffsetFromOriginal(31);
    modEnd = st.computeModifiedOffsetFromOriginal(32);
    assertEquals(17, modStart);
    assertEquals(22, modEnd);
}
Also used : StringTransformation(edu.illinois.cs.cogcomp.core.utilities.StringTransformation) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Test(org.junit.Test)

Aggregations

StringTransformation (edu.illinois.cs.cogcomp.core.utilities.StringTransformation)15 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)13 Test (org.junit.Test)10 XmlDocumentProcessor (edu.illinois.cs.cogcomp.core.utilities.XmlDocumentProcessor)3 Pair (edu.illinois.cs.cogcomp.core.datastructures.Pair)1 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)1 XmlTextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation)1 TextCleanerStringTransformation (edu.illinois.cs.cogcomp.core.utilities.TextCleanerStringTransformation)1 EREMentionRelationReader (edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREMentionRelationReader)1 ERENerReader (edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.ERENerReader)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 List (java.util.List)1