use of org.apache.flume.Channel in project phoenix by apache.
the class PhoenixSink method process.
@Override
public Status process() throws EventDeliveryException {
Status status = Status.READY;
Channel channel = getChannel();
Transaction transaction = null;
List<Event> events = Lists.newArrayListWithExpectedSize(this.batchSize);
long startTime = System.nanoTime();
try {
transaction = channel.getTransaction();
transaction.begin();
for (long i = 0; i < this.batchSize; i++) {
Event event = channel.take();
if (event == null) {
status = Status.BACKOFF;
if (i == 0) {
sinkCounter.incrementBatchEmptyCount();
} else {
sinkCounter.incrementBatchUnderflowCount();
}
break;
} else {
events.add(event);
}
}
if (!events.isEmpty()) {
if (events.size() == this.batchSize) {
sinkCounter.incrementBatchCompleteCount();
} else {
sinkCounter.incrementBatchUnderflowCount();
status = Status.BACKOFF;
}
// save to Hbase
serializer.upsertEvents(events);
sinkCounter.addToEventDrainSuccessCount(events.size());
} else {
logger.debug("no events to process ");
sinkCounter.incrementBatchEmptyCount();
status = Status.BACKOFF;
}
transaction.commit();
} catch (ChannelException e) {
transaction.rollback();
status = Status.BACKOFF;
sinkCounter.incrementConnectionFailedCount();
} catch (SQLException e) {
sinkCounter.incrementConnectionFailedCount();
transaction.rollback();
logger.error("exception while persisting to Hbase ", e);
throw new EventDeliveryException("Failed to persist message to Hbase", e);
} catch (Throwable e) {
transaction.rollback();
logger.error("exception while processing in Phoenix Sink", e);
throw new EventDeliveryException("Failed to persist message", e);
} finally {
logger.info(String.format("Time taken to process [%s] events was [%s] seconds", events.size(), TimeUnit.SECONDS.convert(System.nanoTime() - startTime, TimeUnit.NANOSECONDS)));
if (transaction != null) {
transaction.close();
}
}
return status;
}
use of org.apache.flume.Channel in project phoenix by apache.
the class RegexEventSerializerIT method testBatchEvents.
@Test
public void testBatchEvents() throws EventDeliveryException, SQLException {
final String fullTableName = generateUniqueName();
initSinkContextWithDefaults(fullTableName);
sink = new PhoenixSink();
Configurables.configure(sink, sinkContext);
assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
final Channel channel = this.initChannel();
sink.setChannel(channel);
sink.start();
int numEvents = 150;
String col1 = "val1";
String col2 = "val2";
String eventBody = null;
List<Event> eventList = Lists.newArrayListWithCapacity(numEvents);
for (int i = 0; i < eventList.size(); i++) {
eventBody = (col1 + i) + "\t" + (col2 + i);
Event event = EventBuilder.withBody(Bytes.toBytes(eventBody));
eventList.add(event);
}
// put event in channel
Transaction transaction = channel.getTransaction();
transaction.begin();
for (Event event : eventList) {
channel.put(event);
}
transaction.commit();
transaction.close();
sink.process();
int rowsInDb = countRows(fullTableName);
assertEquals(eventList.size(), rowsInDb);
sink.stop();
assertEquals(LifecycleState.STOP, sink.getLifecycleState());
}
use of org.apache.flume.Channel in project phoenix by apache.
the class RegexEventSerializerIT method testApacheLogRegex.
@Test
public void testApacheLogRegex() throws Exception {
sinkContext = new Context();
final String fullTableName = generateUniqueName();
final String logRegex = "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) \"([^ ]+) ([^ ]+)" + " ([^\"]+)\" (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\")" + " ([^ \"]*|\"[^\"]*\"))?";
final String columns = "host,identity,user,time,method,request,protocol,status,size,referer,agent";
String ddl = "CREATE TABLE " + fullTableName + " (uid VARCHAR NOT NULL, user VARCHAR, time varchar, host varchar , identity varchar, method varchar, request varchar , protocol varchar," + " status integer , size integer , referer varchar , agent varchar CONSTRAINT pk PRIMARY KEY (uid))\n";
sinkContext.put(FlumeConstants.CONFIG_TABLE, fullTableName);
sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
sinkContext.put(FlumeConstants.CONFIG_SERIALIZER, EventSerializers.REGEX.name());
sinkContext.put(FlumeConstants.CONFIG_TABLE_DDL, ddl);
sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_REGULAR_EXPRESSION, logRegex);
sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES, columns);
sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR, DefaultKeyGenerator.UUID.name());
String message1 = "33.22.11.00 - user1 [12/Dec/2013:07:01:19 +0000] " + "\"GET /wp-admin/css/install.css HTTP/1.0\" 200 813 " + "\"http://www.google.com\" \"Mozilla/5.0 (comp" + "atible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)\"";
String message2 = "192.168.20.1 - user2 [13/Dec/2013:06:05:19 +0000] " + "\"GET /wp-admin/css/install.css HTTP/1.0\" 400 363 " + "\"http://www.salesforce.com/in/?ir=1\" \"Mozilla/5.0 (comp" + "atible;)\"";
sink = new PhoenixSink();
Configurables.configure(sink, sinkContext);
assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
final Channel channel = this.initChannel();
sink.setChannel(channel);
sink.start();
final Event event1 = EventBuilder.withBody(Bytes.toBytes(message1));
final Event event2 = EventBuilder.withBody(Bytes.toBytes(message2));
final Transaction transaction = channel.getTransaction();
transaction.begin();
channel.put(event1);
channel.put(event2);
transaction.commit();
transaction.close();
sink.process();
final String query = " SELECT * FROM \n " + fullTableName;
Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
final ResultSet rs;
final Connection conn = DriverManager.getConnection(getUrl(), props);
try {
rs = conn.createStatement().executeQuery(query);
assertTrue(rs.next());
assertTrue(rs.next());
} finally {
if (conn != null) {
conn.close();
}
}
sink.stop();
assertEquals(LifecycleState.STOP, sink.getLifecycleState());
}
use of org.apache.flume.Channel in project phoenix by apache.
the class PhoenixSinkIT method testInvalidTable.
@Test
public void testInvalidTable() {
sinkContext = new Context();
sinkContext.put(FlumeConstants.CONFIG_TABLE, "flume_test");
sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
sinkContext.put(FlumeConstants.CONFIG_SERIALIZER, EventSerializers.REGEX.name());
sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES, "col1,col2");
sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR, DefaultKeyGenerator.TIMESTAMP.name());
sink = new PhoenixSink();
Configurables.configure(sink, sinkContext);
final Channel channel = this.initChannel();
sink.setChannel(channel);
try {
sink.start();
fail();
} catch (Exception e) {
assertTrue(e.getMessage(), e.getMessage().contains("ERROR 1012 (42M03): Table undefined."));
}
}
use of org.apache.flume.Channel in project phoenix by apache.
the class PhoenixSinkIT method testSinkLifecycle.
@Test
public void testSinkLifecycle() {
String tableName = generateUniqueName();
String ddl = "CREATE TABLE " + tableName + " (flume_time timestamp not null, col1 varchar , col2 varchar" + " CONSTRAINT pk PRIMARY KEY (flume_time))\n";
sinkContext = new Context();
sinkContext.put(FlumeConstants.CONFIG_TABLE, tableName);
sinkContext.put(FlumeConstants.CONFIG_JDBC_URL, getUrl());
sinkContext.put(FlumeConstants.CONFIG_SERIALIZER, EventSerializers.REGEX.name());
sinkContext.put(FlumeConstants.CONFIG_TABLE_DDL, ddl);
sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_REGULAR_EXPRESSION, "^([^\t]+)\t([^\t]+)$");
sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_COLUMN_NAMES, "col1,col2");
sinkContext.put(FlumeConstants.CONFIG_SERIALIZER_PREFIX + FlumeConstants.CONFIG_ROWKEY_TYPE_GENERATOR, DefaultKeyGenerator.TIMESTAMP.name());
sink = new PhoenixSink();
Configurables.configure(sink, sinkContext);
Assert.assertEquals(LifecycleState.IDLE, sink.getLifecycleState());
final Channel channel = this.initChannel();
sink.setChannel(channel);
sink.start();
Assert.assertEquals(LifecycleState.START, sink.getLifecycleState());
sink.stop();
Assert.assertEquals(LifecycleState.STOP, sink.getLifecycleState());
}
Aggregations