Search in sources :

Example 6 with Comparator

use of java.util.Comparator in project hbase by apache.

the class TestThriftHBaseServiceHandler method assertTColumnValuesEqual.

public void assertTColumnValuesEqual(List<TColumnValue> columnValuesA, List<TColumnValue> columnValuesB) {
    assertEquals(columnValuesA.size(), columnValuesB.size());
    Comparator<TColumnValue> comparator = new Comparator<TColumnValue>() {

        @Override
        public int compare(TColumnValue o1, TColumnValue o2) {
            return Bytes.compareTo(Bytes.add(o1.getFamily(), o1.getQualifier()), Bytes.add(o2.getFamily(), o2.getQualifier()));
        }
    };
    Collections.sort(columnValuesA, comparator);
    Collections.sort(columnValuesB, comparator);
    for (int i = 0; i < columnValuesA.size(); i++) {
        TColumnValue a = columnValuesA.get(i);
        TColumnValue b = columnValuesB.get(i);
        assertTColumnValueEqual(a, b);
    }
}
Also used : TColumnValue(org.apache.hadoop.hbase.thrift2.generated.TColumnValue) Comparator(java.util.Comparator)

Example 7 with Comparator

use of java.util.Comparator in project learning-spark by databricks.

the class Functions method contentSizeStats.

@Nullable
public static final Tuple4<Long, Long, Long, Long> contentSizeStats(JavaRDD<ApacheAccessLog> accessLogRDD) {
    JavaDoubleRDD contentSizes = accessLogRDD.mapToDouble(new GetContentSize()).cache();
    long count = contentSizes.count();
    if (count == 0) {
        return null;
    }
    Object ordering = Ordering.natural();
    final Comparator<Double> cmp = (Comparator<Double>) ordering;
    return new Tuple4<>(count, contentSizes.reduce(new SumReducer()).longValue(), contentSizes.min(cmp).longValue(), contentSizes.max(cmp).longValue());
}
Also used : Tuple4(scala.Tuple4) JavaDoubleRDD(org.apache.spark.api.java.JavaDoubleRDD) Comparator(java.util.Comparator) Nullable(javax.annotation.Nullable)

Example 8 with Comparator

use of java.util.Comparator in project learning-spark by databricks.

the class LogAnalyzerTotal method processAccessLogs.

public void processAccessLogs(String outDir, JavaDStream<ApacheAccessLog> accessLogsDStream) {
    // Calculate statistics based on the content size, and update the static variables to track this.
    accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {

        public Void call(JavaRDD<ApacheAccessLog> accessLogs) {
            Tuple4<Long, Long, Long, Long> stats = Functions.contentSizeStats(accessLogs);
            if (stats != null) {
                runningCount.getAndAdd(stats._1());
                runningSum.getAndAdd(stats._2());
                runningMin.set(Math.min(runningMin.get(), stats._3()));
                runningMax.set(Math.max(runningMax.get(), stats._4()));
            }
            return null;
        }
    });
    // A DStream of Resonse Code Counts;
    JavaPairDStream<Integer, Long> responseCodeCountDStream = accessLogsDStream.transformToPair(new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<Integer, Long>>() {

        public JavaPairRDD<Integer, Long> call(JavaRDD<ApacheAccessLog> rdd) {
            return Functions.responseCodeCount(rdd);
        }
    }).updateStateByKey(new Functions.ComputeRunningSum());
    responseCodeCountDStream.foreachRDD(new Function<JavaPairRDD<Integer, Long>, Void>() {

        public Void call(JavaPairRDD<Integer, Long> rdd) {
            currentResponseCodeCounts = rdd.take(100);
            return null;
        }
    });
    // A DStream of ipAddressCounts.
    JavaPairDStream<String, Long> ipRawDStream = accessLogsDStream.transformToPair(new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<String, Long>>() {

        public JavaPairRDD<String, Long> call(JavaRDD<ApacheAccessLog> rdd) {
            return Functions.ipAddressCount(rdd);
        }
    });
    JavaPairDStream<String, Long> ipCumDStream = ipRawDStream.updateStateByKey(new Functions.ComputeRunningSum());
    // A DStream of ipAddressCounts without transform
    JavaPairDStream<String, Long> ipDStream = accessLogsDStream.mapToPair(new Functions.IpTuple());
    JavaPairDStream<String, Long> ipCountsDStream = ipDStream.reduceByKey(new Functions.LongSumReducer());
    // and joining it with the transfer amount
    JavaPairDStream<String, Long> ipBytesDStream = accessLogsDStream.mapToPair(new Functions.IpContentTuple());
    JavaPairDStream<String, Long> ipBytesSumDStream = ipBytesDStream.reduceByKey(new Functions.LongSumReducer());
    JavaPairDStream<String, Tuple2<Long, Long>> ipBytesRequestCountDStream = ipBytesSumDStream.join(ipCountsDStream);
    // Save our dstream of ip address request counts
    JavaPairDStream<Text, LongWritable> writableDStream = ipDStream.mapToPair(new PairFunction<Tuple2<String, Long>, Text, LongWritable>() {

        public Tuple2<Text, LongWritable> call(Tuple2<String, Long> e) {
            return new Tuple2(new Text(e._1()), new LongWritable(e._2()));
        }
    });
    class OutFormat extends SequenceFileOutputFormat<Text, LongWritable> {
    }
    ;
    writableDStream.saveAsHadoopFiles(outDir, "pandas", Text.class, LongWritable.class, OutFormat.class);
    // All ips more than 10
    JavaDStream<String> ipAddressDStream = ipCumDStream.transform(new Function<JavaPairRDD<String, Long>, JavaRDD<String>>() {

        public JavaRDD<String> call(JavaPairRDD<String, Long> rdd) {
            return Functions.filterIPAddress(rdd);
        }
    });
    ipAddressDStream.foreachRDD(new Function<JavaRDD<String>, Void>() {

        public Void call(JavaRDD<String> rdd) {
            List<String> currentIPAddresses = rdd.take(100);
            return null;
        }
    });
    // A DStream of endpoint to count.
    JavaPairDStream<String, Long> endpointCountsDStream = accessLogsDStream.transformToPair(new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<String, Long>>() {

        public JavaPairRDD<String, Long> call(JavaRDD<ApacheAccessLog> rdd) {
            return Functions.endpointCount(rdd);
        }
    }).updateStateByKey(new Functions.ComputeRunningSum());
    Object ordering = Ordering.natural();
    final Comparator<Long> cmp = (Comparator<Long>) ordering;
    endpointCountsDStream.foreachRDD(new Function<JavaPairRDD<String, Long>, Void>() {

        public Void call(JavaPairRDD<String, Long> rdd) {
            currentTopEndpoints = rdd.takeOrdered(10, new Functions.ValueComparator<String, Long>(cmp));
            return null;
        }
    });
}
Also used : SequenceFileOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat) Comparator(java.util.Comparator) VoidFunction(org.apache.spark.api.java.function.VoidFunction) Function(org.apache.spark.api.java.function.Function) PairFunction(org.apache.spark.api.java.function.PairFunction) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) List(java.util.List) LongWritable(org.apache.hadoop.io.LongWritable) Text(org.apache.hadoop.io.Text) JavaRDD(org.apache.spark.api.java.JavaRDD) Tuple4(scala.Tuple4) Tuple2(scala.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong)

Example 9 with Comparator

use of java.util.Comparator in project learning-spark by databricks.

the class LogAnalyzerWindowed method processAccessLogs.

public void processAccessLogs(String outDir, JavaDStream<ApacheAccessLog> accessLogsDStream) {
    JavaDStream<ApacheAccessLog> windowDStream = accessLogsDStream.window(Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval());
    JavaDStream<String> ip = accessLogsDStream.map(new Function<ApacheAccessLog, String>() {

        public String call(ApacheAccessLog entry) {
            return entry.getIpAddress();
        }
    });
    // reduceByWindow
    JavaDStream<Long> requestCountRBW = accessLogsDStream.map(new Function<ApacheAccessLog, Long>() {

        public Long call(ApacheAccessLog entry) {
            return 1L;
        }
    }).reduceByWindow(new Function2<Long, Long, Long>() {

        public Long call(Long v1, Long v2) {
            return v1 + v2;
        }
    }, new Function2<Long, Long, Long>() {

        public Long call(Long v1, Long v2) {
            return v1 - v2;
        }
    }, Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval());
    requestCountRBW.print();
    // reducebykeyandwindow
    JavaPairDStream<String, Long> ipAddressPairDStream = accessLogsDStream.mapToPair(new PairFunction<ApacheAccessLog, String, Long>() {

        public Tuple2<String, Long> call(ApacheAccessLog entry) {
            return new Tuple2(entry.getIpAddress(), 1L);
        }
    });
    JavaPairDStream<String, Long> ipCountDStream = ipAddressPairDStream.reduceByKeyAndWindow(// Adding elements in the new slice
    new Function2<Long, Long, Long>() {

        public Long call(Long v1, Long v2) {
            return v1 + v2;
        }
    }, // Removing elements from the oldest slice
    new Function2<Long, Long, Long>() {

        public Long call(Long v1, Long v2) {
            return v1 - v2;
        }
    }, Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval());
    ipCountDStream.print();
    // Use countByWindow
    JavaDStream<Long> requestCount = accessLogsDStream.countByWindow(Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval());
    JavaPairDStream<String, Long> ipAddressRequestCount = ip.countByValueAndWindow(Flags.getInstance().getWindowLength(), Flags.getInstance().getSlideInterval());
    requestCount.print();
    ipAddressRequestCount.print();
    // use a transform for the response code count
    JavaPairDStream<Integer, Long> responseCodeCountTransform = accessLogsDStream.transformToPair(new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<Integer, Long>>() {

        public JavaPairRDD<Integer, Long> call(JavaRDD<ApacheAccessLog> logs) {
            return Functions.responseCodeCount(logs);
        }
    });
    windowDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {

        public Void call(JavaRDD<ApacheAccessLog> accessLogs) {
            Tuple4<Long, Long, Long, Long> contentSizeStats = Functions.contentSizeStats(accessLogs);
            List<Tuple2<Integer, Long>> responseCodeToCount = Functions.responseCodeCount(accessLogs).take(100);
            JavaPairRDD<String, Long> ipAddressCounts = Functions.ipAddressCount(accessLogs);
            List<String> ip = Functions.filterIPAddress(ipAddressCounts).take(100);
            Object ordering = Ordering.natural();
            Comparator<Long> cmp = (Comparator<Long>) ordering;
            List<Tuple2<String, Long>> topEndpoints = Functions.endpointCount(accessLogs).top(10, new Functions.ValueComparator<String, Long>(cmp));
            logStatistics = new LogStatistics(contentSizeStats, responseCodeToCount, ip, topEndpoints);
            return null;
        }
    });
}
Also used : Comparator(java.util.Comparator) Function(org.apache.spark.api.java.function.Function) PairFunction(org.apache.spark.api.java.function.PairFunction) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) List(java.util.List) JavaRDD(org.apache.spark.api.java.JavaRDD) Tuple4(scala.Tuple4) Tuple2(scala.Tuple2)

Example 10 with Comparator

use of java.util.Comparator in project druid by druid-io.

the class CountAggregatorTest method testComparator.

@Test
public void testComparator() {
    CountAggregator agg = new CountAggregator();
    Object first = agg.get();
    agg.aggregate();
    Comparator comp = new CountAggregatorFactory("null").getComparator();
    Assert.assertEquals(-1, comp.compare(first, agg.get()));
    Assert.assertEquals(0, comp.compare(first, first));
    Assert.assertEquals(0, comp.compare(agg.get(), agg.get()));
    Assert.assertEquals(1, comp.compare(agg.get(), first));
}
Also used : Comparator(java.util.Comparator) Test(org.junit.Test)

Aggregations

Comparator (java.util.Comparator)322 ArrayList (java.util.ArrayList)123 List (java.util.List)58 Test (org.junit.Test)58 HashMap (java.util.HashMap)50 IOException (java.io.IOException)36 Map (java.util.Map)35 File (java.io.File)24 HashSet (java.util.HashSet)23 TreeSet (java.util.TreeSet)20 Set (java.util.Set)18 Iterator (java.util.Iterator)15 Method (java.lang.reflect.Method)14 Collections (java.util.Collections)14 Date (java.util.Date)14 TreeMap (java.util.TreeMap)14 ArrayMap (android.util.ArrayMap)12 Collection (java.util.Collection)11 LinkedList (java.util.LinkedList)11 SimpleDateFormat (java.text.SimpleDateFormat)10