use of org.apache.avro.util.Utf8 in project gora by apache.
the class DataStoreTestUtil method assertWebPage.
public static void assertWebPage(WebPage page, int i) throws Exception {
assertNotNull(page);
assertEquals(URLS[i], page.getUrl().toString());
// 'content' is optional
if (page.getContent() != null) {
assertTrue("content error:" + new String(toByteArray(page.getContent()), Charset.defaultCharset()) + " actual=" + CONTENTS[i] + " i=" + i, Arrays.equals(toByteArray(page.getContent()), CONTENTS[i].getBytes(Charset.defaultCharset())));
List<CharSequence> parsedContent = page.getParsedContent();
assertNotNull(parsedContent);
assertTrue(parsedContent.size() > 0);
int j = 0;
String[] tokens = CONTENTS[i].split(" ");
for (CharSequence token : parsedContent) {
assertEquals(tokens[j++], token.toString());
}
} else {
// when page.getContent() is null
assertTrue(CONTENTS[i] == null);
List<CharSequence> parsedContent = page.getParsedContent();
assertNotNull(parsedContent);
assertTrue(parsedContent.size() == 0);
}
if (LINKS[i].length > 0) {
assertNotNull(page.getOutlinks());
assertTrue(page.getOutlinks().size() > 0);
for (int k = 0; k < LINKS[i].length; k++) {
assertEquals(ANCHORS[i][k], page.getOutlinks().get(new Utf8(URLS[LINKS[i][k]])).toString());
}
} else {
assertTrue(page.getOutlinks() == null || page.getOutlinks().isEmpty());
}
}
use of org.apache.avro.util.Utf8 in project gora by apache.
the class DataStoreTestUtil method createWebPage.
private static <K> WebPage createWebPage() {
WebPage webpage = WebPage.newBuilder().build();
webpage.setUrl(new Utf8("url.."));
webpage.setContent(ByteBuffer.wrap("test content".getBytes(Charset.defaultCharset())));
webpage.setParsedContent(new ArrayList<CharSequence>());
Metadata metadata = Metadata.newBuilder().build();
webpage.setMetadata(metadata);
return webpage;
}
use of org.apache.avro.util.Utf8 in project gora by apache.
the class DataStoreTestUtil method testPutArray.
public static void testPutArray(DataStore<String, WebPage> store) throws Exception {
store.createSchema();
WebPage page = WebPage.newBuilder().build();
String[] tokens = { "example", "content", "in", "example.com" };
page.setParsedContent(new ArrayList<CharSequence>());
for (String token : tokens) {
page.getParsedContent().add(new Utf8(token));
}
store.put("com.example/http", page);
store.close();
}
use of org.apache.avro.util.Utf8 in project gora by apache.
the class TestFilterList method testOperatorMustPassOne.
/**
* Test method for verifying
* {@link org.apache.gora.filter.FilterList.Operator#MUST_PASS_ONE}
* functionality.
*/
@Test
public void testOperatorMustPassOne() {
filter1.setFieldName(WebPage.Field.OUTLINKS.toString());
filter1.setMapKey(new Utf8("example"));
filter1.setFilterOp(FilterOp.EQUALS);
filter1.setFilterIfMissing(true);
filter1.getOperands().add(new Utf8("http://example.org"));
filter2.setFieldName(WebPage.Field.OUTLINKS.toString());
filter2.setFilterOp(FilterOp.EQUALS);
filter2.setFilterIfMissing(true);
filter2.getOperands().add(new Utf8("http://example2.org"));
WebPage page = WebPage.newBuilder().build();
page.getOutlinks().put(new Utf8("example"), new Utf8("http://example.org"));
filterList = new FilterList<>(Operator.MUST_PASS_ONE, fList);
assertTrue(filterList.filter("irrelevant", page));
}
use of org.apache.avro.util.Utf8 in project gora by apache.
the class TestFilterList method testOperatorMustPassAll.
/**
* Test method for verifying
* {@link org.apache.gora.filter.FilterList.Operator#MUST_PASS_ALL}
* functionality.
*/
@Test
public void testOperatorMustPassAll() {
filter1.setFieldName(WebPage.Field.OUTLINKS.toString());
filter1.setMapKey(new Utf8("example"));
filter1.setFilterOp(FilterOp.EQUALS);
filter1.setFilterIfMissing(true);
filter1.getOperands().add(new Utf8("http://example.org"));
filter2.setFieldName(WebPage.Field.OUTLINKS.toString());
filter2.setFilterOp(FilterOp.EQUALS);
filter2.setFilterIfMissing(true);
filter2.getOperands().add(new Utf8("http://example.org"));
WebPage page = WebPage.newBuilder().build();
page.getOutlinks().put(new Utf8("example"), new Utf8("http://example.org"));
filterList = new FilterList<>(Operator.MUST_PASS_ALL, fList);
assertTrue(filterList.filter("irrelevant", page));
}
Aggregations