use of org.apache.metron.integration.components.KafkaComponent in project metron by apache.
the class ElasticsearchIndexingIntegrationTest method getProcessor.
@Override
public Processor<List<Map<String, Object>>> getProcessor(final List<byte[]> inputMessages) {
return new Processor<List<Map<String, Object>>>() {
List<Map<String, Object>> docs = null;
List<byte[]> errors = null;
final AtomicInteger missCount = new AtomicInteger(0);
@Override
public ReadinessState process(ComponentRunner runner) {
ElasticSearchComponent elasticSearchComponent = runner.getComponent("search", ElasticSearchComponent.class);
KafkaComponent kafkaComponent = runner.getComponent("kafka", KafkaComponent.class);
if (elasticSearchComponent.hasIndex(index)) {
try {
docs = elasticSearchComponent.getAllIndexedDocs(index, testSensorType + "_doc");
} catch (IOException e) {
throw new IllegalStateException("Unable to retrieve indexed documents.", e);
}
if (docs.size() < inputMessages.size()) {
errors = kafkaComponent.readMessages(ERROR_TOPIC);
if (errors.size() > 0 && errors.size() + docs.size() == inputMessages.size()) {
return ReadinessState.READY;
}
return ReadinessState.NOT_READY;
} else {
return ReadinessState.READY;
}
} else {
return ReadinessState.NOT_READY;
}
}
@Override
public ProcessorResult<List<Map<String, Object>>> getResult() {
ProcessorResult.Builder<List<Map<String, Object>>> builder = new ProcessorResult.Builder();
return builder.withResult(docs).withProcessErrors(errors).build();
}
};
}
use of org.apache.metron.integration.components.KafkaComponent in project metron by apache.
the class PcapTopologyIntegrationTest method testTopology.
public void testTopology(Function<Properties, Void> updatePropertiesCallback, SendEntries sendPcapEntriesCallback, boolean withHeaders) throws Exception {
if (!new File(topologiesDir).exists()) {
topologiesDir = UnitTestHelper.findDir("topologies");
}
targetDir = UnitTestHelper.findDir("target");
final File outDir = getOutDir(targetDir);
final File queryDir = getQueryDir(targetDir);
clearOutDir(outDir);
clearOutDir(queryDir);
File baseDir = new File(new File(targetDir), BASE_DIR);
// Assert.assertEquals(0, numFiles(outDir));
Assert.assertNotNull(topologiesDir);
Assert.assertNotNull(targetDir);
Path pcapFile = new Path("../metron-integration-test/src/main/sample/data/SampleInput/PCAPExampleOutput");
final List<Map.Entry<byte[], byte[]>> pcapEntries = Lists.newArrayList(readPcaps(pcapFile, withHeaders));
Assert.assertTrue(Iterables.size(pcapEntries) > 0);
final Properties topologyProperties = new Properties() {
{
setProperty("topology.workers", "1");
setProperty("topology.worker.childopts", "");
setProperty("spout.kafka.topic.pcap", KAFKA_TOPIC);
setProperty("kafka.pcap.start", "EARLIEST");
setProperty("kafka.pcap.out", outDir.getAbsolutePath());
setProperty("kafka.pcap.numPackets", "2");
setProperty("kafka.pcap.maxTimeMS", "200000000");
setProperty("kafka.pcap.ts_granularity", "NANOSECONDS");
setProperty("kafka.spout.parallelism", "1");
setProperty("topology.auto-credentials", "[]");
setProperty("kafka.security.protocol", "PLAINTEXT");
setProperty("hdfs.sync.every", "1");
setProperty("hdfs.replication.factor", "-1");
}
};
updatePropertiesCallback.apply(topologyProperties);
final ZKServerComponent zkServerComponent = getZKServerComponent(topologyProperties);
final KafkaComponent kafkaComponent = getKafkaComponent(topologyProperties, Collections.singletonList(new KafkaComponent.Topic(KAFKA_TOPIC, 1)));
final MRComponent mr = new MRComponent().withBasePath(baseDir.getAbsolutePath());
FluxTopologyComponent fluxComponent = new FluxTopologyComponent.Builder().withTopologyLocation(new File(topologiesDir + "/pcap/remote.yaml")).withTopologyName("pcap").withTopologyProperties(topologyProperties).build();
// UnitTestHelper.verboseLogging();
ComponentRunner runner = new ComponentRunner.Builder().withComponent("mr", mr).withComponent("zk", zkServerComponent).withComponent("kafka", kafkaComponent).withComponent("storm", fluxComponent).withMaxTimeMS(-1).withMillisecondsBetweenAttempts(2000).withNumRetries(10).withCustomShutdownOrder(new String[] { "storm", "kafka", "zk", "mr" }).build();
try {
runner.start();
fluxComponent.submitTopology();
sendPcapEntriesCallback.send(kafkaComponent, pcapEntries);
runner.process(new Processor<Void>() {
@Override
public ReadinessState process(ComponentRunner runner) {
int numFiles = numFiles(outDir, mr.getConfiguration());
int expectedNumFiles = pcapEntries.size() / 2;
if (numFiles == expectedNumFiles) {
return ReadinessState.READY;
} else {
return ReadinessState.NOT_READY;
}
}
@Override
public ProcessorResult<Void> getResult() {
return null;
}
});
PcapJob job = new PcapJob();
{
// Ensure that only two pcaps are returned when we look at 4 and 5
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(4, pcapEntries), getTimestamp(5, pcapEntries), 10, new HashMap<>(), new Configuration(), FileSystem.get(new Configuration()), new FixedPcapFilter.Configurator());
assertInOrder(results);
Assert.assertEquals(Iterables.size(results), 2);
}
{
// Ensure that only two pcaps are returned when we look at 4 and 5
// test with empty query filter
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(4, pcapEntries), getTimestamp(5, pcapEntries), 10, "", new Configuration(), FileSystem.get(new Configuration()), new QueryPcapFilter.Configurator());
assertInOrder(results);
Assert.assertEquals(Iterables.size(results), 2);
}
{
// ensure that none get returned since that destination IP address isn't in the dataset
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(0, pcapEntries), getTimestamp(1, pcapEntries), 10, new HashMap<String, String>() {
{
put(Constants.Fields.DST_ADDR.getName(), "207.28.210.1");
}
}, new Configuration(), FileSystem.get(new Configuration()), new FixedPcapFilter.Configurator());
assertInOrder(results);
Assert.assertEquals(Iterables.size(results), 0);
}
{
// ensure that none get returned since that destination IP address isn't in the dataset
// test with query filter
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(0, pcapEntries), getTimestamp(1, pcapEntries), 10, "ip_dst_addr == '207.28.210.1'", new Configuration(), FileSystem.get(new Configuration()), new QueryPcapFilter.Configurator());
assertInOrder(results);
Assert.assertEquals(Iterables.size(results), 0);
}
{
// same with protocol as before with the destination addr
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(0, pcapEntries), getTimestamp(1, pcapEntries), 10, new HashMap<String, String>() {
{
put(Constants.Fields.PROTOCOL.getName(), "foo");
}
}, new Configuration(), FileSystem.get(new Configuration()), new FixedPcapFilter.Configurator());
assertInOrder(results);
Assert.assertEquals(Iterables.size(results), 0);
}
{
// same with protocol as before with the destination addr
// test with query filter
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(0, pcapEntries), getTimestamp(1, pcapEntries), 10, "protocol == 'foo'", new Configuration(), FileSystem.get(new Configuration()), new QueryPcapFilter.Configurator());
assertInOrder(results);
Assert.assertEquals(Iterables.size(results), 0);
}
{
// make sure I get them all.
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(0, pcapEntries), getTimestamp(pcapEntries.size() - 1, pcapEntries) + 1, 10, new HashMap<>(), new Configuration(), FileSystem.get(new Configuration()), new FixedPcapFilter.Configurator());
assertInOrder(results);
Assert.assertEquals(Iterables.size(results), pcapEntries.size());
}
{
// make sure I get them all.
// with query filter
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(0, pcapEntries), getTimestamp(pcapEntries.size() - 1, pcapEntries) + 1, 10, "", new Configuration(), FileSystem.get(new Configuration()), new QueryPcapFilter.Configurator());
assertInOrder(results);
Assert.assertEquals(Iterables.size(results), pcapEntries.size());
}
{
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(0, pcapEntries), getTimestamp(pcapEntries.size() - 1, pcapEntries) + 1, 10, new HashMap<String, String>() {
{
put(Constants.Fields.DST_PORT.getName(), "22");
}
}, new Configuration(), FileSystem.get(new Configuration()), new FixedPcapFilter.Configurator());
assertInOrder(results);
Assert.assertTrue(Iterables.size(results) > 0);
Assert.assertEquals(Iterables.size(results), Iterables.size(filterPcaps(pcapEntries, new Predicate<JSONObject>() {
@Override
public boolean apply(@Nullable JSONObject input) {
Object prt = input.get(Constants.Fields.DST_PORT.getName());
return prt != null && prt.toString().equals("22");
}
}, withHeaders)));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PcapMerger.merge(baos, Iterables.partition(results, 1).iterator().next());
Assert.assertTrue(baos.toByteArray().length > 0);
}
{
// test with query filter and byte array matching
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(0, pcapEntries), getTimestamp(pcapEntries.size() - 1, pcapEntries) + 1, 10, "BYTEARRAY_MATCHER('2f56abd814bc56420489ca38e7faf8cec3d4', packet)", new Configuration(), FileSystem.get(new Configuration()), new QueryPcapFilter.Configurator());
assertInOrder(results);
Assert.assertEquals(1, Iterables.size(results));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PcapMerger.merge(baos, Iterables.partition(results, 1).iterator().next());
Assert.assertTrue(baos.toByteArray().length > 0);
}
{
// test with query filter
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(0, pcapEntries), getTimestamp(pcapEntries.size() - 1, pcapEntries) + 1, 10, "ip_dst_port == 22", new Configuration(), FileSystem.get(new Configuration()), new QueryPcapFilter.Configurator());
assertInOrder(results);
Assert.assertTrue(Iterables.size(results) > 0);
Assert.assertEquals(Iterables.size(results), Iterables.size(filterPcaps(pcapEntries, new Predicate<JSONObject>() {
@Override
public boolean apply(@Nullable JSONObject input) {
Object prt = input.get(Constants.Fields.DST_PORT.getName());
return prt != null && (Long) prt == 22;
}
}, withHeaders)));
assertInOrder(results);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PcapMerger.merge(baos, Iterables.partition(results, 1).iterator().next());
Assert.assertTrue(baos.toByteArray().length > 0);
}
{
// test with query filter
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(0, pcapEntries), getTimestamp(pcapEntries.size() - 1, pcapEntries) + 1, 10, "ip_dst_port > 20 and ip_dst_port < 55792", new Configuration(), FileSystem.get(new Configuration()), new QueryPcapFilter.Configurator());
assertInOrder(results);
Assert.assertTrue(Iterables.size(results) > 0);
Assert.assertEquals(Iterables.size(results), Iterables.size(filterPcaps(pcapEntries, new Predicate<JSONObject>() {
@Override
public boolean apply(@Nullable JSONObject input) {
Object prt = input.get(Constants.Fields.DST_PORT.getName());
return prt != null && ((Long) prt > 20 && (Long) prt < 55792);
}
}, withHeaders)));
assertInOrder(results);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PcapMerger.merge(baos, Iterables.partition(results, 1).iterator().next());
Assert.assertTrue(baos.toByteArray().length > 0);
}
{
// test with query filter
Iterable<byte[]> results = job.query(new Path(outDir.getAbsolutePath()), new Path(queryDir.getAbsolutePath()), getTimestamp(0, pcapEntries), getTimestamp(pcapEntries.size() - 1, pcapEntries) + 1, 10, "ip_dst_port > 55790", new Configuration(), FileSystem.get(new Configuration()), new QueryPcapFilter.Configurator());
assertInOrder(results);
Assert.assertTrue(Iterables.size(results) > 0);
Assert.assertEquals(Iterables.size(results), Iterables.size(filterPcaps(pcapEntries, new Predicate<JSONObject>() {
@Override
public boolean apply(@Nullable JSONObject input) {
Object prt = input.get(Constants.Fields.DST_PORT.getName());
return prt != null && (Long) prt > 55790;
}
}, withHeaders)));
assertInOrder(results);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PcapMerger.merge(baos, Iterables.partition(results, 1).iterator().next());
Assert.assertTrue(baos.toByteArray().length > 0);
}
System.out.println("Ended");
} finally {
runner.stop();
clearOutDir(outDir);
clearOutDir(queryDir);
}
}
use of org.apache.metron.integration.components.KafkaComponent in project metron by apache.
the class ProfilerIntegrationTest method setupBeforeClass.
@BeforeClass
public static void setupBeforeClass() throws UnableToStartException {
columnBuilder = new ValueOnlyColumnBuilder(columnFamily);
// storm topology properties
final Properties topologyProperties = new Properties() {
{
// storm settings
setProperty("profiler.workers", "1");
setProperty("profiler.executors", "0");
setProperty("storm.auto.credentials", "[]");
setProperty("topology.auto-credentials", "[]");
setProperty("topology.message.timeout.secs", "60");
setProperty("topology.max.spout.pending", "100000");
// kafka settings
setProperty("profiler.input.topic", inputTopic);
setProperty("profiler.output.topic", outputTopic);
setProperty("kafka.start", "UNCOMMITTED_EARLIEST");
setProperty("kafka.security.protocol", "PLAINTEXT");
// hbase settings
setProperty("profiler.hbase.salt.divisor", Integer.toString(saltDivisor));
setProperty("profiler.hbase.table", tableName);
setProperty("profiler.hbase.column.family", columnFamily);
setProperty("profiler.hbase.batch", "10");
setProperty("profiler.hbase.flush.interval.seconds", "1");
setProperty("hbase.provider.impl", "" + MockHBaseTableProvider.class.getName());
// profile settings
setProperty("profiler.period.duration", Long.toString(periodDurationMillis));
setProperty("profiler.period.duration.units", "MILLISECONDS");
setProperty("profiler.ttl", Long.toString(profileTimeToLiveMillis));
setProperty("profiler.ttl.units", "MILLISECONDS");
setProperty("profiler.window.duration", Long.toString(windowDurationMillis));
setProperty("profiler.window.duration.units", "MILLISECONDS");
setProperty("profiler.window.lag", Long.toString(windowLagMillis));
setProperty("profiler.window.lag.units", "MILLISECONDS");
setProperty("profiler.max.routes.per.bolt", Long.toString(maxRoutesPerBolt));
}
};
// create the mock table
profilerTable = (MockHTable) MockHBaseTableProvider.addToCache(tableName, columnFamily);
zkComponent = getZKServerComponent(topologyProperties);
// create the input and output topics
kafkaComponent = getKafkaComponent(topologyProperties, Arrays.asList(new KafkaComponent.Topic(inputTopic, 1), new KafkaComponent.Topic(outputTopic, 1)));
// upload profiler configuration to zookeeper
configUploadComponent = new ConfigUploadComponent().withTopologyProperties(topologyProperties);
// load flux definition for the profiler topology
fluxComponent = new FluxTopologyComponent.Builder().withTopologyLocation(new File(FLUX_PATH)).withTopologyName("profiler").withTopologyProperties(topologyProperties).build();
// start all components
runner = new ComponentRunner.Builder().withComponent("zk", zkComponent).withComponent("kafka", kafkaComponent).withComponent("config", configUploadComponent).withComponent("storm", fluxComponent).withMillisecondsBetweenAttempts(15000).withNumRetries(10).withCustomShutdownOrder(new String[] { "storm", "config", "kafka", "zk" }).build();
runner.start();
}
use of org.apache.metron.integration.components.KafkaComponent in project metron by apache.
the class WriterBoltIntegrationTest method test.
@Test
public void test() throws UnableToStartException, IOException, ParseException {
UnitTestHelper.setLog4jLevel(CSVParser.class, org.apache.log4j.Level.FATAL);
final String sensorType = "dummy";
final List<byte[]> inputMessages = new ArrayList<byte[]>() {
{
add(Bytes.toBytes("valid,foo"));
add(Bytes.toBytes("invalid,foo"));
add(Bytes.toBytes("error"));
}
};
final Properties topologyProperties = new Properties();
final ZKServerComponent zkServerComponent = getZKServerComponent(topologyProperties);
final KafkaComponent kafkaComponent = getKafkaComponent(topologyProperties, new ArrayList<KafkaComponent.Topic>() {
{
add(new KafkaComponent.Topic(sensorType, 1));
add(new KafkaComponent.Topic(ERROR_TOPIC, 1));
add(new KafkaComponent.Topic(Constants.ENRICHMENT_TOPIC, 1));
}
});
topologyProperties.setProperty("kafka.broker", kafkaComponent.getBrokerList());
ConfigUploadComponent configUploadComponent = new ConfigUploadComponent().withTopologyProperties(topologyProperties).withGlobalConfig(globalConfig).withParserSensorConfig(sensorType, JSONUtils.INSTANCE.load(parserConfig, SensorParserConfig.class));
ParserTopologyComponent parserTopologyComponent = new ParserTopologyComponent.Builder().withSensorType(sensorType).withTopologyProperties(topologyProperties).withBrokerUrl(kafkaComponent.getBrokerList()).build();
// UnitTestHelper.verboseLogging();
ComponentRunner runner = new ComponentRunner.Builder().withComponent("zk", zkServerComponent).withComponent("kafka", kafkaComponent).withComponent("config", configUploadComponent).withComponent("org/apache/storm", parserTopologyComponent).withMillisecondsBetweenAttempts(5000).withNumRetries(10).withCustomShutdownOrder(new String[] { "org/apache/storm", "config", "kafka", "zk" }).build();
try {
runner.start();
kafkaComponent.writeMessages(sensorType, inputMessages);
ProcessorResult<Map<String, List<JSONObject>>> result = runner.process(getProcessor());
Map<String, List<JSONObject>> outputMessages = result.getResult();
Assert.assertEquals(2, outputMessages.size());
Assert.assertEquals(1, outputMessages.get(Constants.ENRICHMENT_TOPIC).size());
Assert.assertEquals("valid", outputMessages.get(Constants.ENRICHMENT_TOPIC).get(0).get("action"));
Assert.assertEquals(2, outputMessages.get(ERROR_TOPIC).size());
JSONObject invalidMessage = outputMessages.get(ERROR_TOPIC).get(0);
Assert.assertEquals(Constants.ErrorType.PARSER_INVALID.getType(), invalidMessage.get(Constants.ErrorFields.ERROR_TYPE.getName()));
JSONObject rawMessage = JSONUtils.INSTANCE.load((String) invalidMessage.get(Constants.ErrorFields.RAW_MESSAGE.getName()), JSONObject.class);
Assert.assertEquals("foo", rawMessage.get("dummy"));
Assert.assertEquals("invalid", rawMessage.get("action"));
JSONObject errorMessage = outputMessages.get(ERROR_TOPIC).get(1);
Assert.assertEquals(Constants.ErrorType.PARSER_ERROR.getType(), errorMessage.get(Constants.ErrorFields.ERROR_TYPE.getName()));
Assert.assertEquals("error", errorMessage.get(Constants.ErrorFields.RAW_MESSAGE.getName()));
// It's unclear if we need a rawMessageBytes field so commenting out for now
// Assert.assertTrue(Arrays.equals(listToBytes(errorMessage.get(Constants.ErrorFields.RAW_MESSAGE_BYTES.getName())), "error".getBytes()));
} finally {
if (runner != null) {
runner.stop();
}
}
}
use of org.apache.metron.integration.components.KafkaComponent in project metron by apache.
the class SolrIndexingIntegrationTest method getProcessor.
@Override
public Processor<List<Map<String, Object>>> getProcessor(final List<byte[]> inputMessages) {
return new Processor<List<Map<String, Object>>>() {
List<Map<String, Object>> docs = null;
List<byte[]> errors = null;
@Override
public ReadinessState process(ComponentRunner runner) {
SolrComponent solrComponent = runner.getComponent("search", SolrComponent.class);
KafkaComponent kafkaComponent = runner.getComponent("kafka", KafkaComponent.class);
if (solrComponent.hasCollection(collection)) {
docs = solrComponent.getAllIndexedDocs(collection);
if (docs.size() < inputMessages.size()) {
errors = kafkaComponent.readMessages(ERROR_TOPIC);
if (errors.size() > 0 && errors.size() + docs.size() == inputMessages.size()) {
return ReadinessState.READY;
}
return ReadinessState.NOT_READY;
} else {
return ReadinessState.READY;
}
} else {
return ReadinessState.NOT_READY;
}
}
@Override
public ProcessorResult<List<Map<String, Object>>> getResult() {
ProcessorResult.Builder<List<Map<String, Object>>> builder = new ProcessorResult.Builder();
return builder.withResult(docs).withProcessErrors(errors).build();
}
};
}
Aggregations