Search in sources :

Example 41 with Tuple

use of org.apache.storm.tuple.Tuple in project storm by apache.

the class AbstractHdfsBolt method execute.

@Override
public final void execute(Tuple tuple) {
    synchronized (this.writeLock) {
        boolean forceSync = false;
        AbstractHDFSWriter writer = null;
        String writerKey = null;
        if (TupleUtils.isTick(tuple)) {
            LOG.debug("TICK! forcing a file system flush");
            this.collector.ack(tuple);
            forceSync = true;
        } else {
            writerKey = getHashKeyForTuple(tuple);
            try {
                writer = getOrCreateWriter(writerKey, tuple);
                this.offset = writer.write(tuple);
                tupleBatch.add(tuple);
            } catch (IOException e) {
                //If the write failed, try to sync anything already written
                LOG.info("Tuple failed to write, forcing a flush of existing data.");
                this.collector.reportError(e);
                forceSync = true;
                this.collector.fail(tuple);
            }
        }
        if (this.syncPolicy.mark(tuple, this.offset) || (forceSync && tupleBatch.size() > 0)) {
            int attempts = 0;
            boolean success = false;
            IOException lastException = null;
            // a runtime exception.  The filesystem is presumably in a very bad state.
            while (success == false && attempts < fileRetryCount) {
                attempts += 1;
                try {
                    syncAllWriters();
                    LOG.debug("Data synced to filesystem. Ack'ing [{}] tuples", tupleBatch.size());
                    for (Tuple t : tupleBatch) {
                        this.collector.ack(t);
                    }
                    tupleBatch.clear();
                    syncPolicy.reset();
                    success = true;
                } catch (IOException e) {
                    LOG.warn("Data could not be synced to filesystem on attempt [{}]", attempts);
                    this.collector.reportError(e);
                    lastException = e;
                }
            }
            // If unsuccesful fail the pending tuples
            if (success == false) {
                LOG.warn("Data could not be synced to filesystem, failing this batch of tuples");
                for (Tuple t : tupleBatch) {
                    this.collector.fail(t);
                }
                tupleBatch.clear();
                throw new RuntimeException("Sync failed [" + attempts + "] times.", lastException);
            }
        }
        if (writer != null && writer.needsRotation()) {
            doRotationAndRemoveWriter(writerKey, writer);
        }
    }
}
Also used : AbstractHDFSWriter(org.apache.storm.hdfs.common.AbstractHDFSWriter) IOException(java.io.IOException) Tuple(org.apache.storm.tuple.Tuple)

Example 42 with Tuple

use of org.apache.storm.tuple.Tuple in project storm by apache.

the class TestHdfsBolt method testPartitionedOutput.

@Test
public void testPartitionedOutput() throws IOException {
    HdfsBolt bolt = makeHdfsBolt(hdfsURI, 1, 1000f);
    Partitioner partitoner = new Partitioner() {

        @Override
        public String getPartitionPath(Tuple tuple) {
            return Path.SEPARATOR + tuple.getStringByField("city");
        }
    };
    bolt.prepare(new Config(), topologyContext, collector);
    bolt.withPartitioner(partitoner);
    bolt.execute(tuple1);
    bolt.execute(tuple2);
    verify(collector).ack(tuple1);
    verify(collector).ack(tuple2);
    Assert.assertEquals(1, countNonZeroLengthFiles(testRoot + "/SFO"));
    Assert.assertEquals(1, countNonZeroLengthFiles(testRoot + "/SJO"));
}
Also used : Config(org.apache.storm.Config) Partitioner(org.apache.storm.hdfs.common.Partitioner) Tuple(org.apache.storm.tuple.Tuple) Test(org.junit.Test)

Example 43 with Tuple

use of org.apache.storm.tuple.Tuple in project storm by apache.

the class KafkaBoltTest method testSimple.

@SuppressWarnings({ "unchecked", "serial" })
@Test
public void testSimple() {
    final KafkaProducer<String, String> producer = mock(KafkaProducer.class);
    when(producer.send(any(), any())).thenAnswer(new Answer<Object>() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            Callback c = (Callback) invocation.getArguments()[1];
            c.onCompletion(null, null);
            return null;
        }
    });
    KafkaBolt<String, String> bolt = new KafkaBolt<String, String>() {

        @Override
        protected KafkaProducer<String, String> mkProducer(Properties props) {
            return producer;
        }
    };
    bolt.withTopicSelector("MY_TOPIC");
    OutputCollector collector = mock(OutputCollector.class);
    TopologyContext context = mock(TopologyContext.class);
    Map<String, Object> conf = new HashMap<>();
    bolt.prepare(conf, context, collector);
    MkTupleParam param = new MkTupleParam();
    param.setFields("key", "message");
    Tuple testTuple = Testing.testTuple(Arrays.asList("KEY", "VALUE"), param);
    bolt.execute(testTuple);
    verify(producer).send(argThat(new ArgumentMatcher<ProducerRecord<String, String>>() {

        @Override
        public boolean matches(Object argument) {
            LOG.info("GOT {} ->", argument);
            ProducerRecord<String, String> arg = (ProducerRecord<String, String>) argument;
            LOG.info("  {} {} {}", arg.topic(), arg.key(), arg.value());
            return "MY_TOPIC".equals(arg.topic()) && "KEY".equals(arg.key()) && "VALUE".equals(arg.value());
        }
    }), any(Callback.class));
    verify(collector).ack(testTuple);
}
Also used : OutputCollector(org.apache.storm.task.OutputCollector) HashMap(java.util.HashMap) Properties(java.util.Properties) Callback(org.apache.kafka.clients.producer.Callback) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ArgumentMatcher(org.mockito.ArgumentMatcher) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) MkTupleParam(org.apache.storm.testing.MkTupleParam) TopologyContext(org.apache.storm.task.TopologyContext) Tuple(org.apache.storm.tuple.Tuple) Test(org.junit.Test)

Example 44 with Tuple

use of org.apache.storm.tuple.Tuple in project storm by apache.

the class TestHiveBolt method testWithTimeformat.

@Test
public void testWithTimeformat() throws Exception {
    String[] partNames1 = { "dt" };
    String timeFormat = "yyyy/MM/dd";
    HiveSetupUtil.dropDB(conf, dbName1);
    HiveSetupUtil.createDbAndTable(conf, dbName1, tblName1, null, colNames, colTypes, partNames1, dbLocation);
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withTimeAsPartitionField(timeFormat);
    HiveOptions hiveOptions = new HiveOptions(metaStoreURI, dbName1, tblName1, mapper).withTxnsPerBatch(2).withBatchSize(1).withMaxOpenConnections(1);
    bolt = new HiveBolt(hiveOptions);
    bolt.prepare(config, null, collector);
    Integer id = 100;
    String msg = "test-123";
    Date d = new Date();
    SimpleDateFormat parseDate = new SimpleDateFormat(timeFormat);
    String today = parseDate.format(d.getTime());
    checkRecordCountInTable(tblName1, dbName1, 0);
    Set<Tuple> tupleSet = new HashSet<Tuple>();
    for (int i = 0; i < 2; i++) {
        Tuple tuple = generateTestTuple(id, msg, null, null);
        tupleSet.add(tuple);
        bolt.execute(tuple);
    }
    for (Tuple t : tupleSet) verify(collector).ack(t);
    checkDataWritten(tblName1, dbName1, "100,test-123," + today, "100,test-123," + today);
    bolt.cleanup();
}
Also used : DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) Date(java.util.Date) Fields(org.apache.storm.tuple.Fields) HiveOptions(org.apache.storm.hive.common.HiveOptions) SimpleDateFormat(java.text.SimpleDateFormat) Tuple(org.apache.storm.tuple.Tuple) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 45 with Tuple

use of org.apache.storm.tuple.Tuple in project storm by apache.

the class TestHiveWriter method testWriteMultiFlush.

@Test
public void testWriteMultiFlush() throws Exception {
    DelimitedRecordHiveMapper mapper = new DelimitedRecordHiveMapper().withColumnFields(new Fields(colNames)).withPartitionFields(new Fields(partNames));
    HiveEndPoint endPoint = new HiveEndPoint(metaStoreURI, dbName, tblName, Arrays.asList(partitionVals));
    HiveWriter writer = new HiveWriter(endPoint, 10, true, timeout, callTimeoutPool, mapper, ugi);
    Tuple tuple = generateTestTuple("1", "abc");
    writer.write(mapper.mapRecord(tuple));
    tuple = generateTestTuple("2", "def");
    writer.write(mapper.mapRecord(tuple));
    Assert.assertEquals(writer.getTotalRecords(), 2);
    checkRecordCountInTable(dbName, tblName, 0);
    writer.flush(true);
    Assert.assertEquals(writer.getTotalRecords(), 0);
    tuple = generateTestTuple("3", "ghi");
    writer.write(mapper.mapRecord(tuple));
    writer.flush(true);
    tuple = generateTestTuple("4", "klm");
    writer.write(mapper.mapRecord(tuple));
    writer.flush(true);
    writer.close();
    checkRecordCountInTable(dbName, tblName, 4);
}
Also used : Fields(org.apache.storm.tuple.Fields) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) DelimitedRecordHiveMapper(org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper) Tuple(org.apache.storm.tuple.Tuple) Test(org.junit.Test)

Aggregations

Tuple (org.apache.storm.tuple.Tuple)85 Test (org.junit.Test)30 Fields (org.apache.storm.tuple.Fields)13 OutputCollector (org.apache.storm.task.OutputCollector)11 Values (org.apache.storm.tuple.Values)11 ArrayList (java.util.ArrayList)10 HiveOptions (org.apache.storm.hive.common.HiveOptions)10 TupleWindow (org.apache.storm.windowing.TupleWindow)9 HashMap (java.util.HashMap)7 Test (org.testng.annotations.Test)7 GlobalStreamId (org.apache.storm.generated.GlobalStreamId)6 DelimitedRecordHiveMapper (org.apache.storm.hive.bolt.mapper.DelimitedRecordHiveMapper)6 HashSet (java.util.HashSet)5 JsonRecordHiveMapper (org.apache.storm.hive.bolt.mapper.JsonRecordHiveMapper)5 TopologyContext (org.apache.storm.task.TopologyContext)5 TupleImpl (org.apache.storm.tuple.TupleImpl)5 BasicOutputCollector (org.apache.storm.topology.BasicOutputCollector)4 Map (java.util.Map)3 Callback (org.apache.kafka.clients.producer.Callback)3 ProducerRecord (org.apache.kafka.clients.producer.ProducerRecord)3