Search in sources :

Example 56 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class WebPageDataCreator method createWebPageData.

public static void createWebPageData(DataStore<String, WebPage> dataStore) throws IOException {
    try {
        WebPage page;
        log.info("creating web page data");
        for (int i = 0; i < URLS.length; i++) {
            page = WebPage.newBuilder().build();
            page.setUrl(new Utf8(URLS[i]));
            page.setParsedContent(new ArrayList<CharSequence>());
            if (CONTENTS[i] != null) {
                page.setContent(ByteBuffer.wrap(CONTENTS[i].getBytes(Charset.defaultCharset())));
                for (String token : CONTENTS[i].split(" ")) {
                    page.getParsedContent().add(new Utf8(token));
                }
            }
            for (int j = 0; j < LINKS[i].length; j++) {
                page.getOutlinks().put(new Utf8(URLS[LINKS[i][j]]), new Utf8(ANCHORS[i][j]));
            }
            Metadata metadata = Metadata.newBuilder().build();
            metadata.setVersion(1);
            metadata.getData().put(new Utf8("metakey"), new Utf8("metavalue"));
            page.setMetadata(metadata);
            dataStore.put(URLS[i], page);
        }
        dataStore.flush();
        log.info("finished creating web page data");
    } catch (Exception e) {
        log.info("error creating web page data");
    }
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Metadata(org.apache.gora.examples.generated.Metadata) Utf8(org.apache.avro.util.Utf8) IOException(java.io.IOException)

Example 57 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class WordCount method wordCount.

public int wordCount(DataStore<String, WebPage> inStore, DataStore<String, TokenDatum> outStore) throws IOException, InterruptedException, ClassNotFoundException {
    Query<String, WebPage> query = inStore.newQuery();
    Job job = createJob(query, outStore);
    return job.waitForCompletion(true) ? 0 : 1;
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Job(org.apache.hadoop.mapreduce.Job)

Example 58 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class MapReduceSerialization method run.

@Override
public int run(String[] args) throws Exception {
    DataStore<String, WebPage> inStore;
    DataStore<String, WebPage> outStore;
    Configuration conf = new Configuration();
    if (args.length > 0) {
        String dataStoreClass = args[0];
        inStore = DataStoreFactory.getDataStore(dataStoreClass, String.class, WebPage.class, conf);
        if (args.length > 1) {
            dataStoreClass = args[1];
        }
        outStore = DataStoreFactory.getDataStore(dataStoreClass, String.class, WebPage.class, conf);
    } else {
        inStore = DataStoreFactory.getDataStore(String.class, WebPage.class, conf);
        outStore = DataStoreFactory.getDataStore(String.class, WebPage.class, conf);
    }
    return mapReduceSerialization(inStore, outStore);
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Configuration(org.apache.hadoop.conf.Configuration)

Example 59 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class JCacheGoraDataStoreTest method testGetMissingValue.

@Test
public void testGetMissingValue() throws IOException {
    DataStore<String, WebPage> store = super.webPageStore;
    WebPage nullWebPage = store.get("missing", new String[0]);
    assertNull(nullWebPage);
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) Test(org.junit.Test)

Example 60 with WebPage

use of org.apache.gora.examples.generated.WebPage in project gora by apache.

the class TestHBaseStore method assertPutBytes.

/**
   * Asserts that writing bytes actually works at low level in HBase.
   * Checks writing null unions too.
   */
@Override
public void assertPutBytes(byte[] contentBytes) throws IOException {
    // Check first the parameter "contentBytes" if written+read right.
    Connection conn = ConnectionFactory.createConnection(conf);
    TableName webPageTab = TableName.valueOf("WebPage");
    Table table = conn.getTable(webPageTab);
    Get get = new Get(Bytes.toBytes("com.example/http"));
    org.apache.hadoop.hbase.client.Result result = table.get(get);
    byte[] actualBytes = result.getValue(Bytes.toBytes("content"), null);
    assertNotNull(actualBytes);
    assertTrue(Arrays.equals(contentBytes, actualBytes));
    table.close();
    // Since "content" is an optional field, we are forced to reopen the DataStore
    // to retrieve the union correctly
    // Test writing+reading a null value. FIELD in HBASE MUST become DELETED
    WebPage page = webPageStore.get("com.example/http");
    page.setContent(null);
    webPageStore.put("com.example/http", page);
    webPageStore.close();
    webPageStore = testDriver.createDataStore(String.class, WebPage.class);
    page = webPageStore.get("com.example/http");
    assertNull(page.getContent());
    // Check directly with HBase
    table = conn.getTable(webPageTab);
    get = new Get(Bytes.toBytes("com.example/http"));
    result = table.get(get);
    actualBytes = result.getValue(Bytes.toBytes("content"), null);
    assertNull(actualBytes);
    table.close();
    // Test writing+reading an empty bytes field. FIELD in HBASE MUST 
    // become EMPTY (byte[0])
    page = webPageStore.get("com.example/http");
    page.setContent(ByteBuffer.wrap("".getBytes(Charset.defaultCharset())));
    webPageStore.put("com.example/http", page);
    webPageStore.close();
    webPageStore = testDriver.createDataStore(String.class, WebPage.class);
    page = webPageStore.get("com.example/http");
    assertTrue(Arrays.equals("".getBytes(Charset.defaultCharset()), page.getContent().array()));
    // Check directly with HBase
    table = new HTable(conf, "WebPage");
    get = new Get(Bytes.toBytes("com.example/http"));
    result = table.get(get);
    actualBytes = result.getValue(Bytes.toBytes("content"), null);
    assertNotNull(actualBytes);
    assertEquals(0, actualBytes.length);
    table.close();
}
Also used : TableName(org.apache.hadoop.hbase.TableName) WebPage(org.apache.gora.examples.generated.WebPage) org.apache.hadoop.hbase.client(org.apache.hadoop.hbase.client)

Aggregations

WebPage (org.apache.gora.examples.generated.WebPage)67 Test (org.junit.Test)33 Utf8 (org.apache.avro.util.Utf8)32 DBObject (com.mongodb.DBObject)7 Configuration (org.apache.hadoop.conf.Configuration)6 Employee (org.apache.gora.examples.generated.Employee)5 Metadata (org.apache.gora.examples.generated.Metadata)4 BeanFactoryImpl (org.apache.gora.persistency.impl.BeanFactoryImpl)4 ByteBuffer (java.nio.ByteBuffer)3 org.apache.hadoop.hbase.client (org.apache.hadoop.hbase.client)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Field (org.apache.avro.Schema.Field)2 TokenDatum (org.apache.gora.examples.generated.TokenDatum)2 FilterList (org.apache.gora.filter.FilterList)2 TableName (org.apache.hadoop.hbase.TableName)2 Job (org.apache.hadoop.mapreduce.Job)2 Properties (java.util.Properties)1