Search in sources :

Example 11 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class TestPersistentSerialization method testSerdeWebPage.

/**
   * Creates an WebPage object in-memory setting several fields to dirty.
   * Run a query over the persistent data.
   * Asserts that the results can be serialized and
   * deserialzed without loosing data. We do this by asserting
   * what we get 'before' and 'after' (de)serialization processes.
   * Also simple assertion for equal number of URL's in WebPage
   * and results.
   * @throws Exception
   */
@SuppressWarnings("unchecked")
@Test
public void testSerdeWebPage() throws Exception {
    MemStore<String, WebPage> store = DataStoreFactory.getDataStore(MemStore.class, String.class, WebPage.class, new Configuration());
    WebPageDataCreator.createWebPageData(store);
    Result<String, WebPage> result = store.newQuery().execute();
    int i = 0;
    while (result.next()) {
        WebPage page = result.get();
        TestIOUtils.testSerializeDeserialize(page);
        i++;
    }
    assertEquals(WebPageDataCreator.URLS.length, i);
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Configuration(org.apache.hadoop.conf.Configuration) Test(org.junit.Test)

Example 12 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class TestPersistentSerialization method testSerdeMultipleWebPages.

/**
   * Creates multiple WebPage objects setting several fields to dirty.
   * Asserts that the data can be serialized and
   * deserialzed without loosing data. We do this by asserting
   * what we get 'before' and 'after' (de)serialization processes.
   * @throws Exception
   */
@Test
public void testSerdeMultipleWebPages() throws Exception {
    WebPage page1 = WebPage.newBuilder().build();
    WebPage page2 = WebPage.newBuilder().build();
    WebPage page3 = WebPage.newBuilder().build();
    page1.setUrl(new Utf8("foo"));
    page2.setUrl(new Utf8("baz"));
    page3.setUrl(new Utf8("bar"));
    page1.setParsedContent(new ArrayList<CharSequence>());
    page1.getParsedContent().add(new Utf8("coo"));
    page2.setOutlinks(new HashMap<CharSequence, CharSequence>());
    page2.getOutlinks().put(new Utf8("a"), new Utf8("b"));
    TestIOUtils.testSerializeDeserialize(page1, page2, page3);
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Utf8(org.apache.avro.util.Utf8) Test(org.junit.Test)

Example 13 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class MemStoreTest method testMemStoreDeleteByQueryFields.

@Test
public void testMemStoreDeleteByQueryFields() throws Exception {
    DataStore<String, WebPage> store = new MemStore<>();
    BeanFactory<String, WebPage> beanFactory = new BeanFactoryImpl<>(String.class, WebPage.class);
    store.setBeanFactory(beanFactory);
    Query<String, WebPage> query;
    //test 5 - delete all with some fields
    WebPageDataCreator.createWebPageData(store);
    query = store.newQuery();
    query.setFields("outlinks", "parsedContent", "content");
    Query<String, WebPage> newQuery = store.newQuery();
    newQuery.setStartKey(SORTED_URLS[0]);
    newQuery.setEndKey(SORTED_URLS[9]);
    newQuery.setFields("outlinks", "parsedContent", "content");
    DataStoreTestUtil.assertNumResults(newQuery, URLS.length);
    store.deleteByQuery(query);
    store.deleteByQuery(query);
    //don't you love that HBase sometimes does not delete arbitrarily
    store.deleteByQuery(query);
    store.flush();
    DataStoreTestUtil.assertNumResults(store.newQuery(), URLS.length);
    //assert that data is deleted
    for (String SORTED_URL : SORTED_URLS) {
        WebPage page = store.get(SORTED_URL);
        assertNotNull(page);
        assertNotNull(page.getUrl());
        assertEquals(page.getUrl().toString(), SORTED_URL);
        assertEquals("Map of Outlinks should have a size of '0' as the deleteByQuery " + "not only removes the data but also the data structure.", 0, page.getOutlinks().size());
        assertEquals(0, page.getParsedContent().size());
        if (page.getContent() != null) {
            LOG.info("url:" + page.getUrl().toString());
            LOG.info("limit:" + page.getContent().limit());
        } else {
            assertNull(page.getContent());
        }
    }
    //test 6 - delete some with some fields
    WebPageDataCreator.createWebPageData(store);
    query = store.newQuery();
    query.setFields("url");
    String startKey = SORTED_URLS[NUM_KEYS];
    String endKey = SORTED_URLS[SORTED_URLS.length - NUM_KEYS];
    query.setStartKey(startKey);
    query.setEndKey(endKey);
    DataStoreTestUtil.assertNumResults(store.newQuery(), URLS.length);
    store.deleteByQuery(query);
    store.deleteByQuery(query);
    //don't you love that HBase sometimes does not delete arbitrarily
    store.deleteByQuery(query);
    store.flush();
    DataStoreTestUtil.assertNumResults(store.newQuery(), URLS.length);
    //assert that data is deleted
    for (int i = 0; i < URLS.length; i++) {
        WebPage page = store.get(URLS[i]);
        assertNotNull(page);
        if (URLS[i].compareTo(startKey) < 0 || URLS[i].compareTo(endKey) > 0) {
            //not deleted
            DataStoreTestUtil.assertWebPage(page, i);
        } else {
            //deleted
            assertNull(page.getUrl());
            assertNotNull(page.getOutlinks());
            assertNotNull(page.getParsedContent());
            assertNotNull(page.getContent());
            assertTrue(page.getOutlinks().size() > 0);
            assertTrue(page.getParsedContent().size() > 0);
        }
    }
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) BeanFactoryImpl(org.apache.gora.persistency.impl.BeanFactoryImpl) Test(org.junit.Test)

Example 14 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class TestPersistentBase method testFieldsWithTwoClasses.

/**
   * Assert that field positions as found within the SCHEMA array
   * are as we would expect by accessing them directly. 
   * This tests for both WebPage and Employee data beans.
   */
@Test
public void testFieldsWithTwoClasses() {
    WebPage page = WebPage.newBuilder().build();
    for (int i = 0; i < WebPage.SCHEMA$.getFields().toArray().length; i++) {
        int index = page.getSchema().getFields().get(i).pos();
        assertEquals(i, index);
    }
    Employee employee = Employee.newBuilder().build();
    for (int i = 0; i < Employee.SCHEMA$.getFields().toArray().length; i++) {
        int index = employee.getSchema().getFields().get(i).pos();
        assertEquals(i, index);
    }
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Employee(org.apache.gora.examples.generated.Employee) Test(org.junit.Test)

Example 15 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class TestPersistentBase method testGetField.

/**
   * Assert that individual field values are as we would
   * expect from directly accessing WebPage.SCHEMA$ values.
   */
@Test
public void testGetField() {
    WebPage page = WebPage.newBuilder().build();
    for (int i = 0; i < WebPage.SCHEMA$.getFields().toArray().length; i++) {
        Field field = page.getSchema().getFields().get(i);
        assertEquals(WebPage.SCHEMA$.getFields().get(i), field);
    }
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Field(org.apache.avro.Schema.Field) Test(org.junit.Test)

Aggregations

WebPage (org.apache.gora.examples.generated.WebPage)67 Test (org.junit.Test)33 Utf8 (org.apache.avro.util.Utf8)32 DBObject (com.mongodb.DBObject)7 Configuration (org.apache.hadoop.conf.Configuration)6 Employee (org.apache.gora.examples.generated.Employee)5 Metadata (org.apache.gora.examples.generated.Metadata)4 BeanFactoryImpl (org.apache.gora.persistency.impl.BeanFactoryImpl)4 ByteBuffer (java.nio.ByteBuffer)3 org.apache.hadoop.hbase.client (org.apache.hadoop.hbase.client)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Field (org.apache.avro.Schema.Field)2 TokenDatum (org.apache.gora.examples.generated.TokenDatum)2 FilterList (org.apache.gora.filter.FilterList)2 TableName (org.apache.hadoop.hbase.TableName)2 Job (org.apache.hadoop.mapreduce.Job)2 Properties (java.util.Properties)1