Search in sources :

Example 36 with Utf8

use of org.apache.avro.util.Utf8 in project gora by apache.

the class DataStoreTestUtil method assertWebPage.

public static void assertWebPage(WebPage page, int i) throws Exception {
    assertNotNull(page);
    assertEquals(URLS[i], page.getUrl().toString());
    // 'content' is optional
    if (page.getContent() != null) {
        assertTrue("content error:" + new String(toByteArray(page.getContent()), Charset.defaultCharset()) + " actual=" + CONTENTS[i] + " i=" + i, Arrays.equals(toByteArray(page.getContent()), CONTENTS[i].getBytes(Charset.defaultCharset())));
        List<CharSequence> parsedContent = page.getParsedContent();
        assertNotNull(parsedContent);
        assertTrue(parsedContent.size() > 0);
        int j = 0;
        String[] tokens = CONTENTS[i].split(" ");
        for (CharSequence token : parsedContent) {
            assertEquals(tokens[j++], token.toString());
        }
    } else {
        // when page.getContent() is null
        assertTrue(CONTENTS[i] == null);
        List<CharSequence> parsedContent = page.getParsedContent();
        assertNotNull(parsedContent);
        assertTrue(parsedContent.size() == 0);
    }
    if (LINKS[i].length > 0) {
        assertNotNull(page.getOutlinks());
        assertTrue(page.getOutlinks().size() > 0);
        for (int k = 0; k < LINKS[i].length; k++) {
            assertEquals(ANCHORS[i][k], page.getOutlinks().get(new Utf8(URLS[LINKS[i][k]])).toString());
        }
    } else {
        assertTrue(page.getOutlinks() == null || page.getOutlinks().isEmpty());
    }
}
Also used : Utf8(org.apache.avro.util.Utf8)

Example 37 with Utf8

use of org.apache.avro.util.Utf8 in project gora by apache.

the class DataStoreTestUtil method createWebPage.

private static <K> WebPage createWebPage() {
    WebPage webpage = WebPage.newBuilder().build();
    webpage.setUrl(new Utf8("url.."));
    webpage.setContent(ByteBuffer.wrap("test content".getBytes(Charset.defaultCharset())));
    webpage.setParsedContent(new ArrayList<CharSequence>());
    Metadata metadata = Metadata.newBuilder().build();
    webpage.setMetadata(metadata);
    return webpage;
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Metadata(org.apache.gora.examples.generated.Metadata) Utf8(org.apache.avro.util.Utf8)

Example 38 with Utf8

use of org.apache.avro.util.Utf8 in project gora by apache.

the class DataStoreTestUtil method testPutArray.

public static void testPutArray(DataStore<String, WebPage> store) throws Exception {
    store.createSchema();
    WebPage page = WebPage.newBuilder().build();
    String[] tokens = { "example", "content", "in", "example.com" };
    page.setParsedContent(new ArrayList<CharSequence>());
    for (String token : tokens) {
        page.getParsedContent().add(new Utf8(token));
    }
    store.put("com.example/http", page);
    store.close();
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Utf8(org.apache.avro.util.Utf8)

Example 39 with Utf8

use of org.apache.avro.util.Utf8 in project gora by apache.

the class TestFilterList method testOperatorMustPassOne.

/**
   * Test method for verifying 
   * {@link org.apache.gora.filter.FilterList.Operator#MUST_PASS_ONE}
   * functionality.
   */
@Test
public void testOperatorMustPassOne() {
    filter1.setFieldName(WebPage.Field.OUTLINKS.toString());
    filter1.setMapKey(new Utf8("example"));
    filter1.setFilterOp(FilterOp.EQUALS);
    filter1.setFilterIfMissing(true);
    filter1.getOperands().add(new Utf8("http://example.org"));
    filter2.setFieldName(WebPage.Field.OUTLINKS.toString());
    filter2.setFilterOp(FilterOp.EQUALS);
    filter2.setFilterIfMissing(true);
    filter2.getOperands().add(new Utf8("http://example2.org"));
    WebPage page = WebPage.newBuilder().build();
    page.getOutlinks().put(new Utf8("example"), new Utf8("http://example.org"));
    filterList = new FilterList<>(Operator.MUST_PASS_ONE, fList);
    assertTrue(filterList.filter("irrelevant", page));
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Utf8(org.apache.avro.util.Utf8) Test(org.junit.Test)

Example 40 with Utf8

use of org.apache.avro.util.Utf8 in project gora by apache.

the class TestFilterList method testOperatorMustPassAll.

/**
   * Test method for verifying 
   * {@link org.apache.gora.filter.FilterList.Operator#MUST_PASS_ALL}
   * functionality.
   */
@Test
public void testOperatorMustPassAll() {
    filter1.setFieldName(WebPage.Field.OUTLINKS.toString());
    filter1.setMapKey(new Utf8("example"));
    filter1.setFilterOp(FilterOp.EQUALS);
    filter1.setFilterIfMissing(true);
    filter1.getOperands().add(new Utf8("http://example.org"));
    filter2.setFieldName(WebPage.Field.OUTLINKS.toString());
    filter2.setFilterOp(FilterOp.EQUALS);
    filter2.setFilterIfMissing(true);
    filter2.getOperands().add(new Utf8("http://example.org"));
    WebPage page = WebPage.newBuilder().build();
    page.getOutlinks().put(new Utf8("example"), new Utf8("http://example.org"));
    filterList = new FilterList<>(Operator.MUST_PASS_ALL, fList);
    assertTrue(filterList.filter("irrelevant", page));
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Utf8(org.apache.avro.util.Utf8) Test(org.junit.Test)

Aggregations

Utf8 (org.apache.avro.util.Utf8)123 Test (org.junit.Test)34 WebPage (org.apache.gora.examples.generated.WebPage)32 GenericRecord (org.apache.avro.generic.GenericRecord)17 Schema (org.apache.avro.Schema)14 GenericData (org.apache.avro.generic.GenericData)13 ByteBuffer (java.nio.ByteBuffer)12 HashMap (java.util.HashMap)12 Map (java.util.Map)12 Employee (org.apache.gora.examples.generated.Employee)11 IOException (java.io.IOException)7 ArrayList (java.util.ArrayList)7 Field (org.apache.avro.Schema.Field)6 Record (org.apache.avro.generic.GenericData.Record)5 File (java.io.File)4 SpecificDatumReader (org.apache.avro.specific.SpecificDatumReader)4 Metadata (org.apache.gora.examples.generated.Metadata)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 Iterator (java.util.Iterator)3 List (java.util.List)3