Search in sources :

Example 1 with TokenType

use of in project vespa by vespa-engine.

the class LinguisticsAnnotatorTestCase method requireThatTermAnnotationsAreEmptyIfOrigIsLowerCase.

public void requireThatTermAnnotationsAreEmptyIfOrigIsLowerCase() {
    SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
    expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM));
    for (boolean specialToken : Arrays.asList(true, false)) {
        for (TokenType type : TokenType.values()) {
            if (!specialToken && !type.isIndexable()) {
            assertAnnotations(expected, "foo", newToken("foo", "foo", type, specialToken));
Also used : TokenType( Annotation( SpanTree( Test(org.junit.Test)

Example 2 with TokenType

use of in project vespa by vespa-engine.

the class SimpleTokenTestCase method requireThatTypeAccessorsWork.

public void requireThatTypeAccessorsWork() {
    SimpleToken token = new SimpleToken("foo");
    assertEquals(TokenType.UNKNOWN, token.getType());
    for (TokenType type : TokenType.values()) {
        assertEquals(type, token.getType());
    SimpleToken other = new SimpleToken("foo");
    for (TokenType type : TokenType.values()) {
        if (type == token.getType()) {
            assertEquals(token, other);
        } else {
Also used : TokenType( Test(org.junit.Test)

Example 3 with TokenType

use of in project vespa by vespa-engine.

the class LinguisticsAnnotatorTestCase method requireThatTermAnnotationsAreLowerCased.

public void requireThatTermAnnotationsAreLowerCased() {
    SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
    expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
    for (boolean specialToken : Arrays.asList(true, false)) {
        for (TokenType type : TokenType.values()) {
            if (!specialToken && !type.isIndexable()) {
            assertAnnotations(expected, "foo", newToken("foo", "BAR", type, specialToken));
Also used : TokenType( StringFieldValue( Annotation( SpanTree( Test(org.junit.Test)

Example 4 with TokenType

use of in project vespa by vespa-engine.

the class LinguisticsAnnotatorTestCase method requireThatIndexableTokenStringsAreAnnotated.

public void requireThatIndexableTokenStringsAreAnnotated() {
    SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
    expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
    for (TokenType type : TokenType.values()) {
        if (!type.isIndexable()) {
        assertAnnotations(expected, "foo", newToken("foo", "bar", type));
Also used : TokenType( StringFieldValue( Annotation( SpanTree( Test(org.junit.Test)

Example 5 with TokenType

use of in project vespa by vespa-engine.

the class LinguisticsAnnotatorTestCase method requireThatMaxTermOccurencesIsHonored.

public void requireThatMaxTermOccurencesIsHonored() {
    final String inputTerm = "foo";
    // completely different from
    final String stemmedInputTerm = "bar";
    // inputTerm for safer test
    final String paddedInputTerm = inputTerm + " ";
    final SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
    final int inputTermOccurence = AnnotatorConfig.DEFAULT_MAX_TERM_OCCURRENCES * 2;
    for (int i = 0; i < AnnotatorConfig.DEFAULT_MAX_TERM_OCCURRENCES; ++i) {
        expected.spanList().span(i * paddedInputTerm.length(), inputTerm.length()).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue(stemmedInputTerm)));
    for (TokenType type : TokenType.values()) {
        if (!type.isIndexable()) {
        StringBuilder input = new StringBuilder();
        Token[] tokens = new Token[inputTermOccurence];
        for (int i = 0; i < inputTermOccurence; ++i) {
            SimpleToken t = newToken(inputTerm, stemmedInputTerm, type);
            t.setOffset(i * paddedInputTerm.length());
            tokens[i] = t;
        assertAnnotations(expected, input.toString(), tokens);
Also used : TokenType( StringFieldValue( SimpleToken( Token( SimpleToken( Annotation( SpanTree( Test(org.junit.Test)


TokenType ( Test (org.junit.Test)7 Annotation ( SpanTree ( StringFieldValue ( Token ( SimpleToken (