Search in sources :

Example 1 with LocalSummarizer

use of org.apache.metron.dataloads.nonbulk.flatfile.importer.LocalSummarizer in project metron by apache.

the class SimpleFlatFileSummarizerTest method testLineByLine.

public void testLineByLine(final int numThreads) throws IOException, InvalidWriterOutput {
    ExtractorHandler handler = ExtractorHandler.load(stellarExtractorConfigLineByLine);
    LocalSummarizer summarizer = new MockSummarizer(ImmutableMap.of("input.csv", generateData()));
    final AtomicReference<Object> finalObj = new AtomicReference<>(null);
    EnumMap<SummarizeOptions, Optional<Object>> options = new EnumMap<SummarizeOptions, Optional<Object>>(SummarizeOptions.class) {

        {
            put(SummarizeOptions.INPUT, Optional.of("input.csv"));
            put(SummarizeOptions.BATCH_SIZE, Optional.of(5));
            put(SummarizeOptions.QUIET, Optional.of(true));
            put(SummarizeOptions.OUTPUT_MODE, Optional.of(new PeekingWriter(finalObj)));
            put(SummarizeOptions.OUTPUT, Optional.of("out"));
            put(SummarizeOptions.NUM_THREADS, Optional.of(numThreads));
        }
    };
    summarizer.importData(options, handler, new Configuration());
    String expr = "MAP_GET(DOMAIN_REMOVE_TLD(domain), s) > 0";
    for (String domain : domains) {
        Boolean b = (Boolean) StellarProcessorUtils.run(expr, ImmutableMap.of("s", finalObj.get(), "domain", domain));
        Assert.assertTrue("Can't find " + domain, b);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ExtractorHandler(org.apache.metron.dataloads.extractor.ExtractorHandler) AtomicReference(java.util.concurrent.atomic.AtomicReference) LocalSummarizer(org.apache.metron.dataloads.nonbulk.flatfile.importer.LocalSummarizer)

Example 2 with LocalSummarizer

use of org.apache.metron.dataloads.nonbulk.flatfile.importer.LocalSummarizer in project metron by apache.

the class SimpleFlatFileSummarizerTest method testWholeFile.

public void testWholeFile(final int numThreads) throws IOException, InvalidWriterOutput {
    ExtractorHandler handler = ExtractorHandler.load(stellarExtractorConfigWholeFile);
    LocalSummarizer summarizer = new MockSummarizer(new HashMap<String, String>() {

        {
            for (String domain : domains) {
                put(domain, "1," + domain);
            }
        }
    });
    final AtomicReference<Object> finalObj = new AtomicReference<>(null);
    EnumMap<SummarizeOptions, Optional<Object>> options = new EnumMap<SummarizeOptions, Optional<Object>>(SummarizeOptions.class) {

        {
            put(SummarizeOptions.INPUT, Optional.of("."));
            put(SummarizeOptions.BATCH_SIZE, Optional.of(5));
            put(SummarizeOptions.QUIET, Optional.of(true));
            put(SummarizeOptions.OUTPUT_MODE, Optional.of(new PeekingWriter(finalObj)));
            put(SummarizeOptions.OUTPUT, Optional.of("out"));
            put(SummarizeOptions.NUM_THREADS, Optional.of(numThreads));
        }
    };
    summarizer.importData(options, handler, new Configuration());
    String expr = "MAP_GET(DOMAIN_REMOVE_TLD(domain), s) > 0";
    for (String domain : domains) {
        Boolean b = (Boolean) StellarProcessorUtils.run(expr, ImmutableMap.of("s", finalObj.get(), "domain", domain));
        Assert.assertTrue("Can't find " + domain, b);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ExtractorHandler(org.apache.metron.dataloads.extractor.ExtractorHandler) AtomicReference(java.util.concurrent.atomic.AtomicReference) LocalSummarizer(org.apache.metron.dataloads.nonbulk.flatfile.importer.LocalSummarizer)

Aggregations

AtomicReference (java.util.concurrent.atomic.AtomicReference)2 Configuration (org.apache.hadoop.conf.Configuration)2 ExtractorHandler (org.apache.metron.dataloads.extractor.ExtractorHandler)2 LocalSummarizer (org.apache.metron.dataloads.nonbulk.flatfile.importer.LocalSummarizer)2