use of org.apache.gora.examples.generated.WebPage in project gora by apache.
the class WebPageDataCreator method createWebPageData.
public static void createWebPageData(DataStore<String, WebPage> dataStore) throws IOException {
try {
WebPage page;
log.info("creating web page data");
for (int i = 0; i < URLS.length; i++) {
page = WebPage.newBuilder().build();
page.setUrl(new Utf8(URLS[i]));
page.setParsedContent(new ArrayList<CharSequence>());
if (CONTENTS[i] != null) {
page.setContent(ByteBuffer.wrap(CONTENTS[i].getBytes(Charset.defaultCharset())));
for (String token : CONTENTS[i].split(" ")) {
page.getParsedContent().add(new Utf8(token));
}
}
for (int j = 0; j < LINKS[i].length; j++) {
page.getOutlinks().put(new Utf8(URLS[LINKS[i][j]]), new Utf8(ANCHORS[i][j]));
}
Metadata metadata = Metadata.newBuilder().build();
metadata.setVersion(1);
metadata.getData().put(new Utf8("metakey"), new Utf8("metavalue"));
page.setMetadata(metadata);
dataStore.put(URLS[i], page);
}
dataStore.flush();
log.info("finished creating web page data");
} catch (Exception e) {
log.info("error creating web page data");
}
}
use of org.apache.gora.examples.generated.WebPage in project gora by apache.
the class WordCount method wordCount.
public int wordCount(DataStore<String, WebPage> inStore, DataStore<String, TokenDatum> outStore) throws IOException, InterruptedException, ClassNotFoundException {
Query<String, WebPage> query = inStore.newQuery();
Job job = createJob(query, outStore);
return job.waitForCompletion(true) ? 0 : 1;
}
use of org.apache.gora.examples.generated.WebPage in project gora by apache.
the class MapReduceSerialization method run.
@Override
public int run(String[] args) throws Exception {
DataStore<String, WebPage> inStore;
DataStore<String, WebPage> outStore;
Configuration conf = new Configuration();
if (args.length > 0) {
String dataStoreClass = args[0];
inStore = DataStoreFactory.getDataStore(dataStoreClass, String.class, WebPage.class, conf);
if (args.length > 1) {
dataStoreClass = args[1];
}
outStore = DataStoreFactory.getDataStore(dataStoreClass, String.class, WebPage.class, conf);
} else {
inStore = DataStoreFactory.getDataStore(String.class, WebPage.class, conf);
outStore = DataStoreFactory.getDataStore(String.class, WebPage.class, conf);
}
return mapReduceSerialization(inStore, outStore);
}
use of org.apache.gora.examples.generated.WebPage in project gora by apache.
the class JCacheGoraDataStoreTest method testGetMissingValue.
@Test
public void testGetMissingValue() throws IOException {
DataStore<String, WebPage> store = super.webPageStore;
WebPage nullWebPage = store.get("missing", new String[0]);
assertNull(nullWebPage);
}
use of org.apache.gora.examples.generated.WebPage in project gora by apache.
the class TestHBaseStore method assertPutBytes.
/**
* Asserts that writing bytes actually works at low level in HBase.
* Checks writing null unions too.
*/
@Override
public void assertPutBytes(byte[] contentBytes) throws IOException {
// Check first the parameter "contentBytes" if written+read right.
Connection conn = ConnectionFactory.createConnection(conf);
TableName webPageTab = TableName.valueOf("WebPage");
Table table = conn.getTable(webPageTab);
Get get = new Get(Bytes.toBytes("com.example/http"));
org.apache.hadoop.hbase.client.Result result = table.get(get);
byte[] actualBytes = result.getValue(Bytes.toBytes("content"), null);
assertNotNull(actualBytes);
assertTrue(Arrays.equals(contentBytes, actualBytes));
table.close();
// Since "content" is an optional field, we are forced to reopen the DataStore
// to retrieve the union correctly
// Test writing+reading a null value. FIELD in HBASE MUST become DELETED
WebPage page = webPageStore.get("com.example/http");
page.setContent(null);
webPageStore.put("com.example/http", page);
webPageStore.close();
webPageStore = testDriver.createDataStore(String.class, WebPage.class);
page = webPageStore.get("com.example/http");
assertNull(page.getContent());
// Check directly with HBase
table = conn.getTable(webPageTab);
get = new Get(Bytes.toBytes("com.example/http"));
result = table.get(get);
actualBytes = result.getValue(Bytes.toBytes("content"), null);
assertNull(actualBytes);
table.close();
// Test writing+reading an empty bytes field. FIELD in HBASE MUST
// become EMPTY (byte[0])
page = webPageStore.get("com.example/http");
page.setContent(ByteBuffer.wrap("".getBytes(Charset.defaultCharset())));
webPageStore.put("com.example/http", page);
webPageStore.close();
webPageStore = testDriver.createDataStore(String.class, WebPage.class);
page = webPageStore.get("com.example/http");
assertTrue(Arrays.equals("".getBytes(Charset.defaultCharset()), page.getContent().array()));
// Check directly with HBase
table = new HTable(conf, "WebPage");
get = new Get(Bytes.toBytes("com.example/http"));
result = table.get(get);
actualBytes = result.getValue(Bytes.toBytes("content"), null);
assertNotNull(actualBytes);
assertEquals(0, actualBytes.length);
table.close();
}
Aggregations