Search in sources :

Example 51 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class TestSingleFieldValueFilter method testFilterInequals.

@Test
public void testFilterInequals() {
    SingleFieldValueFilter<String, WebPage> filter = new SingleFieldValueFilter<>();
    //set filter field to url
    filter.setFieldName(WebPage.Field.URL.toString());
    filter.setFilterOp(FilterOp.NOT_EQUALS);
    filter.setFilterIfMissing(true);
    filter.getOperands().add(new Utf8("example.org"));
    WebPage page = WebPage.newBuilder().build();
    page.setUrl(new Utf8("example.org"));
    assertTrue(filter.filter("irrelevant", page));
    page.setUrl(new Utf8("something.else"));
    assertFalse(filter.filter("irrelevant", page));
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Utf8(org.apache.avro.util.Utf8) Test(org.junit.Test)

Example 52 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class DataStoreTestUtil method testUpdateWebPageRemoveMapEntry.

public static void testUpdateWebPageRemoveMapEntry(DataStore<String, WebPage> dataStore) throws Exception {
    dataStore.createSchema();
    String[] urls = { "http://a.com/a", "http://b.com/b", "http://c.com/c", "http://d.com/d", "http://e.com/e", "http://f.com/f", "http://g.com/g" };
    String anchor = "anchor";
    for (String url : urls) {
        WebPage webPage = WebPage.newBuilder().build();
        webPage.setUrl(new Utf8(url));
        for (int j = 0; j < urls.length; j++) {
            webPage.getOutlinks().put(new Utf8(anchor + j), new Utf8(urls[j]));
        }
        dataStore.put(webPage.getUrl().toString(), webPage);
    }
    dataStore.flush();
    // map entry removal test
    for (String url : urls) {
        WebPage webPage = dataStore.get(url);
        for (int j = 1; j < urls.length; j += 2) {
            webPage.getOutlinks().remove(new Utf8(anchor + j));
        }
        dataStore.put(webPage.getUrl().toString(), webPage);
    }
    dataStore.flush();
    for (String url : urls) {
        int count = 0;
        WebPage webPage = dataStore.get(url);
        for (int j = 1; j < urls.length; j += 2) {
            CharSequence link = webPage.getOutlinks().get(new Utf8(anchor + j));
            assertNull(link);
            //assertEquals(urls[j], link.toString());
            count++;
        }
        assertEquals(urls.length - count, webPage.getOutlinks().size());
    }
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Utf8(org.apache.avro.util.Utf8)

Example 53 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class DataStoreTestUtil method testGetEmployeeNested.

public static void testGetEmployeeNested(DataStore<String, Employee> dataStore) throws Exception {
    Employee employee = DataStoreTestUtil.createEmployee();
    WebPage webpage = new BeanFactoryImpl<>(String.class, WebPage.class).newPersistent();
    webpage.setUrl(new Utf8("url.."));
    webpage.setContent(ByteBuffer.wrap("test content".getBytes(Charset.defaultCharset())));
    webpage.setParsedContent(new ArrayList<CharSequence>());
    Metadata metadata = new BeanFactoryImpl<>(String.class, Metadata.class).newPersistent();
    webpage.setMetadata(metadata);
    employee.setWebpage(webpage);
    String ssn = employee.getSsn().toString();
    dataStore.put(ssn, employee);
    dataStore.flush();
    Employee after = dataStore.get(ssn, AvroUtils.getSchemaFieldNames(Employee.SCHEMA$));
    assertEqualEmployeeObjects(employee, after);
    assertEqualWebPageObjects(webpage, after.getWebpage());
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Employee(org.apache.gora.examples.generated.Employee) Metadata(org.apache.gora.examples.generated.Metadata) Utf8(org.apache.avro.util.Utf8)

Example 54 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class DataStoreTestUtil method testPutMap.

public static void testPutMap(DataStore<String, WebPage> store) throws Exception {
    store.createSchema();
    WebPage page = WebPage.newBuilder().build();
    page.setUrl(new Utf8("http://example.com"));
    page.getOutlinks().put(new Utf8("http://example2.com"), new Utf8("anchor2"));
    page.getOutlinks().put(new Utf8("http://example3.com"), new Utf8("anchor3"));
    page.getOutlinks().put(new Utf8("http://example3.com"), new Utf8("anchor4"));
    store.put("com.example/http", page);
    store.close();
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Utf8(org.apache.avro.util.Utf8)

Example 55 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class DataStoreTestUtil method testUpdateWebPagePutToNullableMap.

public static void testUpdateWebPagePutToNullableMap(DataStore<String, WebPage> dataStore) throws Exception {
    dataStore.createSchema();
    String[] urls = { "http://a.com/a", "http://b.com/b", "http://c.com/c", "http://d.com/d", "http://e.com/e", "http://f.com/f", "http://g.com/g" };
    String header = "header";
    String[] headers = { "firstHeader", "secondHeader", "thirdHeader", "fourthHeader", "fifthHeader", "sixthHeader" };
    for (String url : urls) {
        WebPage webPage = WebPage.newBuilder().build();
        webPage.setUrl(new Utf8(url));
        //test put for nullable map field
        // we put data to the 'headers' field which is a Map with default value of 'null'
        webPage.setHeaders(new HashMap<CharSequence, CharSequence>());
        for (int j = 0; j < headers.length; j += 2) {
            webPage.getHeaders().put(new Utf8(header + j), new Utf8(headers[j]));
        }
        dataStore.put(webPage.getUrl().toString(), webPage);
    }
    dataStore.flush();
    for (String url : urls) {
        WebPage webPage = dataStore.get(url);
        //webPage.getHeaders().clear(); //TODO clear method does not work
        webPage.setHeaders(new HashMap<CharSequence, CharSequence>());
        for (int j = 1; j < headers.length; j += 2) {
            webPage.getHeaders().put(new Utf8(header + j), new Utf8(headers[j]));
        }
        dataStore.put(webPage.getUrl().toString(), webPage);
    }
    dataStore.flush();
    for (String url : urls) {
        WebPage webPage = dataStore.get(url);
        int count = 0;
        for (int j = 1; j < headers.length; j += 2) {
            CharSequence headerSample = webPage.getHeaders().get(new Utf8(header + j));
            assertNotNull(headerSample);
            assertEquals(headers[j], headerSample.toString());
            count++;
        }
        assertEquals(count, webPage.getHeaders().size());
    }
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Utf8(org.apache.avro.util.Utf8)

Aggregations

WebPage (org.apache.gora.examples.generated.WebPage)67 Test (org.junit.Test)33 Utf8 (org.apache.avro.util.Utf8)32 DBObject (com.mongodb.DBObject)7 Configuration (org.apache.hadoop.conf.Configuration)6 Employee (org.apache.gora.examples.generated.Employee)5 Metadata (org.apache.gora.examples.generated.Metadata)4 BeanFactoryImpl (org.apache.gora.persistency.impl.BeanFactoryImpl)4 ByteBuffer (java.nio.ByteBuffer)3 org.apache.hadoop.hbase.client (org.apache.hadoop.hbase.client)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Field (org.apache.avro.Schema.Field)2 TokenDatum (org.apache.gora.examples.generated.TokenDatum)2 FilterList (org.apache.gora.filter.FilterList)2 TableName (org.apache.hadoop.hbase.TableName)2 Job (org.apache.hadoop.mapreduce.Job)2 Properties (java.util.Properties)1