Search in sources :

Example 21 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class TestFilterList method testOperatorMustPassAll.

/**
   * Test method for verifying 
   * {@link org.apache.gora.filter.FilterList.Operator#MUST_PASS_ALL}
   * functionality.
   */
@Test
public void testOperatorMustPassAll() {
    filter1.setFieldName(WebPage.Field.OUTLINKS.toString());
    filter1.setMapKey(new Utf8("example"));
    filter1.setFilterOp(FilterOp.EQUALS);
    filter1.setFilterIfMissing(true);
    filter1.getOperands().add(new Utf8("http://example.org"));
    filter2.setFieldName(WebPage.Field.OUTLINKS.toString());
    filter2.setFilterOp(FilterOp.EQUALS);
    filter2.setFilterIfMissing(true);
    filter2.getOperands().add(new Utf8("http://example.org"));
    WebPage page = WebPage.newBuilder().build();
    page.getOutlinks().put(new Utf8("example"), new Utf8("http://example.org"));
    filterList = new FilterList<>(Operator.MUST_PASS_ALL, fList);
    assertTrue(filterList.filter("irrelevant", page));
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Utf8(org.apache.avro.util.Utf8) Test(org.junit.Test)

Example 22 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class TestSingleFieldValueFilter method testSerialization.

@Test
public void testSerialization() throws IOException {
    SingleFieldValueFilter<String, WebPage> filter = new SingleFieldValueFilter<>();
    //set filter field to url
    filter.setFieldName(WebPage.Field.URL.toString());
    filter.setFilterOp(FilterOp.EQUALS);
    filter.setFilterIfMissing(true);
    filter.getOperands().add(new Utf8("http://example.org"));
    byte[] byteArray = WritableUtils.toByteArray(filter);
    SingleFieldValueFilter<String, WebPage> filter2 = new SingleFieldValueFilter<>();
    filter2.readFields(new DataInputStream(new ByteArrayInputStream(byteArray)));
    assertEquals(filter.getFieldName(), filter2.getFieldName());
    assertEquals(filter.getFilterOp(), filter2.getFilterOp());
    assertArrayEquals(filter.getOperands().toArray(), filter2.getOperands().toArray());
    assertEquals(filter.isFilterIfMissing(), filter2.isFilterIfMissing());
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) ByteArrayInputStream(java.io.ByteArrayInputStream) Utf8(org.apache.avro.util.Utf8) DataInputStream(java.io.DataInputStream) Test(org.junit.Test)

Example 23 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class MapReduceTestUtils method testMapReduceSerialization.

public static void testMapReduceSerialization(Configuration conf, DataStore<String, WebPage> inStore, DataStore<String, WebPage> outStore) throws Exception {
    //Datastore now has to be a Hadoop based datastore
    ((DataStoreBase<String, WebPage>) inStore).setConf(conf);
    ((DataStoreBase<String, WebPage>) outStore).setConf(conf);
    //create input
    WebPage page = WebPage.newBuilder().build();
    page.setUrl("TestURL");
    List<CharSequence> content = new ArrayList<CharSequence>();
    content.add("parsed1");
    content.add("parsed2");
    page.setParsedContent(content);
    page.setContent(ByteBuffer.wrap("content".getBytes(Charset.defaultCharset())));
    inStore.put("key1", page);
    inStore.flush();
    // expected
    WebPage expectedPage = WebPage.newBuilder().build();
    expectedPage.setUrl("hola");
    List<CharSequence> expectedContent = new ArrayList<CharSequence>();
    expectedContent.add("parsed1");
    expectedContent.add("parsed2");
    expectedPage.setParsedContent(expectedContent);
    expectedPage.setContent(ByteBuffer.wrap("content".getBytes(Charset.defaultCharset())));
    //run the job
    MapReduceSerialization mapReduceSerialization = new MapReduceSerialization(conf);
    mapReduceSerialization.mapReduceSerialization(inStore, outStore);
    Query<String, WebPage> outputQuery = outStore.newQuery();
    Result<String, WebPage> serializationResult = outStore.execute(outputQuery);
    while (serializationResult.next()) {
        assertEquals(expectedPage, serializationResult.get());
    }
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) MapReduceSerialization(org.apache.gora.examples.mapreduce.MapReduceSerialization) ArrayList(java.util.ArrayList) DataStoreBase(org.apache.gora.store.impl.DataStoreBase)

Example 24 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class DataStoreTestUtil method testQueryWebPageSingleKey.

private static void testQueryWebPageSingleKey(DataStore<String, WebPage> store, String[] fields) throws Exception {
    createWebPageData(store);
    for (int i = 0; i < URLS.length; i++) {
        Query<String, WebPage> query = store.newQuery();
        query.setFields(fields);
        query.setKey(URLS[i]);
        Result<String, WebPage> result = query.execute();
        assertTrue(result.next());
        WebPage page = result.get();
        assertWebPage(page, i);
        assertFalse(result.next());
    }
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage)

Example 25 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class DataStoreTestUtil method testQueryWebPageKeyRange.

public static void testQueryWebPageKeyRange(DataStore<String, WebPage> store, boolean setStartKeys, boolean setEndKeys) throws Exception {
    createWebPageData(store);
    //create sorted set of urls
    List<String> sortedUrls = new ArrayList<>();
    Collections.addAll(sortedUrls, URLS);
    Collections.sort(sortedUrls);
    //try all ranges
    for (int i = 0; i < sortedUrls.size(); i++) {
        for (int j = i; j < sortedUrls.size(); j++) {
            Query<String, WebPage> query = store.newQuery();
            if (setStartKeys)
                query.setStartKey(sortedUrls.get(i));
            if (setEndKeys)
                query.setEndKey(sortedUrls.get(j));
            Result<String, WebPage> result = query.execute();
            int r = 0;
            while (result.next()) {
                WebPage page = result.get();
                assertWebPage(page, URL_INDEXES.get(page.getUrl().toString()));
                r++;
            }
            int expectedLength = (setEndKeys ? j + 1 : sortedUrls.size()) - (setStartKeys ? i : 0);
            assertEquals(expectedLength, r);
            if (!setEndKeys)
                break;
        }
        if (!setStartKeys)
            break;
    }
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) ArrayList(java.util.ArrayList)

Aggregations

WebPage (org.apache.gora.examples.generated.WebPage)67 Test (org.junit.Test)33 Utf8 (org.apache.avro.util.Utf8)32 DBObject (com.mongodb.DBObject)7 Configuration (org.apache.hadoop.conf.Configuration)6 Employee (org.apache.gora.examples.generated.Employee)5 Metadata (org.apache.gora.examples.generated.Metadata)4 BeanFactoryImpl (org.apache.gora.persistency.impl.BeanFactoryImpl)4 ByteBuffer (java.nio.ByteBuffer)3 org.apache.hadoop.hbase.client (org.apache.hadoop.hbase.client)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Field (org.apache.avro.Schema.Field)2 TokenDatum (org.apache.gora.examples.generated.TokenDatum)2 FilterList (org.apache.gora.filter.FilterList)2 TableName (org.apache.hadoop.hbase.TableName)2 Job (org.apache.hadoop.mapreduce.Job)2 Properties (java.util.Properties)1