Search in sources :

Example 1 with Files

use of au.gov.amsa.util.Files in project risky by amsa-code.

the class EffectiveSpeedFailuresMain method main.

public static void main(String[] args) {
    Pattern pattern = Pattern.compile(".*\\.track");
    List<File> files = Files.find(new File("/media/an/binary-fixes-5-minute/2015"), pattern);
    int count = Observable.from(files).filter(file -> !file.getName().equals("0.track")).flatMap(file -> BinaryFixes.from(file).lift(new OperatorEffectiveSpeedChecker(SegmentOptions.builder().acceptAnyFixHours(12L).maxSpeedKnots(50).build())).filter(check -> !check.isOk()).reduce(new MmsiCount(0, 0), (mc, fix) -> new MmsiCount(fix.fix().mmsi(), mc.count + 1)).filter(mc -> mc.count >= 1000)).toSortedList((a, b) -> Long.compare(b.count, a.count)).flatMapIterable(x -> x).doOnNext(mc -> System.out.println(mc.mmsi)).count().toBlocking().single();
    System.out.println(count);
}
Also used : List(java.util.List) BinaryFixes(au.gov.amsa.risky.format.BinaryFixes) SegmentOptions(au.gov.amsa.geo.model.SegmentOptions) OperatorEffectiveSpeedChecker(au.gov.amsa.geo.distance.OperatorEffectiveSpeedChecker) Pattern(java.util.regex.Pattern) Files(au.gov.amsa.util.Files) File(java.io.File) Observable(rx.Observable) Pattern(java.util.regex.Pattern) OperatorEffectiveSpeedChecker(au.gov.amsa.geo.distance.OperatorEffectiveSpeedChecker) File(java.io.File)

Example 2 with Files

use of au.gov.amsa.util.Files in project risky by amsa-code.

the class VoyageDatasetProducer method produce.

public static void produce(File output, File fixesOutput, List<File> list) throws Exception {
    // reset output directories
    output.delete();
    FileUtils.deleteDirectory(fixesOutput);
    int numFiles = list.size();
    System.out.println(numFiles + "binary fix files");
    AtomicInteger fileNumber = new AtomicInteger(0);
    Collection<Port> ports = loadPorts();
    Collection<EezWaypoint> eezWaypoints = readEezWaypoints();
    Shapefile eezLine = Eez.loadEezLine();
    Shapefile eezPolygon = Eez.loadEezPolygon();
    System.out.println("loaded eez shapefiles");
    long t = System.currentTimeMillis();
    AtomicLong failedCheck = new AtomicLong();
    AtomicLong fixCount = new AtomicLong();
    Map<Integer, Integer> mmsisWithFailedChecks = new TreeMap<>();
    Persister persister = new Persister(fixesOutput);
    try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output)))) {
        // Note that in the observable below we don't employ parallel techniques
        // this is because the runtime is acceptable
        // 
        Observable.from(list).groupBy(// 
        f -> mmsiFromFilename(f)).flatMap(files -> {
            String mmsi = files.getKey();
            if (!isShipMmsi(mmsi)) {
                return Observable.empty();
            } else {
                return // 
                files.compose(// 
                o -> logPercentCompleted(numFiles, t, o, fileNumber)).concatMap(// 
                BinaryFixes::from).lift(new OperatorEffectiveSpeedChecker(SegmentOptions.builder().acceptAnyFixHours(24L).maxSpeedKnots(50).build())).doOnNext(// 
                check -> updatedCounts(failedCheck, fixCount, mmsisWithFailedChecks, check)).filter(// 
                check -> check.isOk()).map(// 
                check -> check.fix()).doOnNext(fix -> persister.persist(fix)).compose(// 
                o -> toLegs(eezLine, eezPolygon, ports, eezWaypoints, o)).filter(x -> includeLeg(x));
            }
        }).sorted(// 
        (a, b) -> compareByMmsiThenLegStartTime(a, b)).doOnNext(// 
        x -> write(writer, x)).doOnTerminate(// 
        Checked.a0(() -> persister.close())).toBlocking().subscribe();
        System.out.println((System.currentTimeMillis() - t) + "ms");
        System.out.println("total fixes=" + fixCount.get());
        System.out.println("num fixes rejected due failed effective speed check=" + failedCheck.get());
        System.out.println("num mmsis with failed effective speed checks=" + mmsisWithFailedChecks.size());
        try (PrintStream p = new PrintStream("target/info.txt")) {
            p.println("total fixes=" + fixCount.get());
            p.println("num fixes rejected due failed effective speed check=" + failedCheck.get());
            p.println("num mmsis with failed effective speed checks=" + mmsisWithFailedChecks.size());
        }
        try (PrintStream p = new PrintStream("target/failures.txt")) {
            p.println("failures mmsi <TAB> number of rejected fixes");
            for (Integer mmsi : mmsisWithFailedChecks.keySet()) {
                p.println(mmsi + "\t" + mmsisWithFailedChecks.get(mmsi));
            }
        }
    }
}
Also used : SegmentOptions(au.gov.amsa.geo.model.SegmentOptions) ZonedDateTime(java.time.ZonedDateTime) OperatorEffectiveSpeedChecker(au.gov.amsa.geo.distance.OperatorEffectiveSpeedChecker) Preconditions(com.github.davidmoten.guavamini.Preconditions) BufferedOutputStream(java.io.BufferedOutputStream) ArrayList(java.util.ArrayList) Observable(rx.Observable) BinaryFixes(au.gov.amsa.risky.format.BinaryFixes) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Fix(au.gov.amsa.risky.format.Fix) OutputStreamWriter(java.io.OutputStreamWriter) ZoneOffset(java.time.ZoneOffset) OutputStream(java.io.OutputStream) PrintStream(java.io.PrintStream) BufferedWriter(java.io.BufferedWriter) Collection(java.util.Collection) DecimalFormat(java.text.DecimalFormat) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) EffectiveSpeedCheck(au.gov.amsa.geo.distance.EffectiveSpeedCheck) Reader(java.io.Reader) Checked(com.github.davidmoten.rx.Checked) Instant(java.time.Instant) InputStreamReader(java.io.InputStreamReader) Files(au.gov.amsa.util.Files) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) BinaryFixesFormat(au.gov.amsa.risky.format.BinaryFixesFormat) Strings(au.gov.amsa.streams.Strings) Position(com.github.davidmoten.grumpy.core.Position) VisibleForTesting(com.github.davidmoten.guavamini.annotations.VisibleForTesting) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) TreeMap(java.util.TreeMap) Closeable(java.io.Closeable) DateTimeFormatter(java.time.format.DateTimeFormatter) Optional(java.util.Optional) Shapefile(au.gov.amsa.gt.Shapefile) Pattern(java.util.regex.Pattern) PrintStream(java.io.PrintStream) OperatorEffectiveSpeedChecker(au.gov.amsa.geo.distance.OperatorEffectiveSpeedChecker) TreeMap(java.util.TreeMap) BufferedWriter(java.io.BufferedWriter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FileOutputStream(java.io.FileOutputStream) Shapefile(au.gov.amsa.gt.Shapefile) BinaryFixes(au.gov.amsa.risky.format.BinaryFixes) OutputStreamWriter(java.io.OutputStreamWriter)

Example 3 with Files

use of au.gov.amsa.util.Files in project risky by amsa-code.

the class Formats method transform.

public static Observable<Integer> transform(final File input, final File output, Pattern pattern, final Transformer<HasFix, HasFix> transformer, final Action2<List<HasFix>, File> fixesWriter, final Func1<String, String> renamer) {
    Preconditions.checkNotNull(input);
    Preconditions.checkNotNull(output);
    Preconditions.checkNotNull(pattern);
    Preconditions.checkNotNull(transformer);
    final List<File> files = Files.find(input, pattern);
    long n = 0;
    for (File file : files) n += file.length();
    final long totalSizeBytes = n;
    log.info("transforming " + new DecimalFormat("0.000").format(totalSizeBytes / 1000000.0) + "MB");
    final Action1<File> logger = new Action1<File>() {

        final AtomicInteger count = new AtomicInteger();

        final long startTime = System.currentTimeMillis();

        final AtomicLong size = new AtomicLong();

        @Override
        public void call(File f) {
            long t = System.currentTimeMillis();
            int n = count.incrementAndGet();
            long bytes = size.getAndAdd(f.length());
            double timeToFinishMins;
            if (n > 1) {
                timeToFinishMins = (t - startTime) / (double) (bytes) * (totalSizeBytes - bytes) / 1000.0 / 60.0;
            } else
                timeToFinishMins = -1;
            DecimalFormat df = new DecimalFormat("0.000");
            log.info("transforming " + n + " of " + files.size() + ":" + f + ", sizeMB=" + df.format(f.length() / 1000000.0) + ", finish in mins=" + df.format(timeToFinishMins));
        }
    };
    log.info("converting " + files.size() + " files" + " in " + input);
    return Observable.from(files).flatMap(file -> {
        final File outputFile = rebase(file, input, output);
        outputFile.getParentFile().mkdirs();
        logger.call(file);
        return BinaryFixes.from(file, true, BinaryFixesFormat.WITHOUT_MMSI).toList().flatMapIterable(Functions.<List<Fix>>identity()).compose(transformer).toList().doOnNext(list -> {
            File f = new File(outputFile.getParentFile(), renamer.call(outputFile.getName()));
            fixesWriter.call(list, f);
        }).count();
    });
}
Also used : Logger(org.slf4j.Logger) Transformer(rx.Observable.Transformer) DecimalFormat(java.text.DecimalFormat) LoggerFactory(org.slf4j.LoggerFactory) Action1(rx.functions.Action1) Files(au.gov.amsa.util.Files) Action2(rx.functions.Action2) File(java.io.File) Observable(rx.Observable) VisibleForTesting(com.github.davidmoten.guavamini.annotations.VisibleForTesting) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Func1(rx.functions.Func1) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Preconditions(com.github.davidmoten.util.Preconditions) Pattern(java.util.regex.Pattern) Functions(com.github.davidmoten.rx.Functions) AtomicLong(java.util.concurrent.atomic.AtomicLong) Action1(rx.functions.Action1) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DecimalFormat(java.text.DecimalFormat) File(java.io.File)

Example 4 with Files

use of au.gov.amsa.util.Files in project risky by amsa-code.

the class BinaryFixesMain method main.

public static void main(String[] args) {
    // perform a speed test for loading BinaryFixes from disk
    FixImpl.validate = false;
    final ConcurrentHashMap<Long, List<FixImpl>> map = new ConcurrentHashMap<Long, List<FixImpl>>();
    // -downsample-5-mins
    List<File> files = Files.find(new File("/media/an/binary-fixes/2014-year-downsample-5-mins"), Pattern.compile(".*\\.track"));
    long t = System.currentTimeMillis();
    long count = Observable.from(files).buffer(Math.max(1, files.size() / Runtime.getRuntime().availableProcessors())).flatMap(list -> {
        return Observable.from(list).concatMap(file -> BinaryFixes.from(file).countLong()).subscribeOn(Schedulers.computation());
    }).scan(0L, (a, b) -> a + b).lift(Logging.<Long>logger().showCount().prefix("records=").showMemory().every(1000).log()).last().toBlocking().single();
    long elapsed = System.currentTimeMillis() - t;
    System.out.println("Map size = " + map.size());
    System.out.println("Total records = " + count + ", numPerSecond=" + count * 1000.0 / elapsed + ", timeMs=" + elapsed);
}
Also used : List(java.util.List) Logging(com.github.davidmoten.rx.slf4j.Logging) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Schedulers(rx.schedulers.Schedulers) Pattern(java.util.regex.Pattern) Files(au.gov.amsa.util.Files) File(java.io.File) Observable(rx.Observable) List(java.util.List) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) File(java.io.File)

Example 5 with Files

use of au.gov.amsa.util.Files in project risky by amsa-code.

the class VesselsInGbrMain method main.

public static void main(String[] args) throws IOException {
    long t = System.currentTimeMillis();
    File out = new File("target/mmsi.txt");
    out.delete();
    try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(out)))) {
        Pattern pattern = Pattern.compile(".*\\.track");
        List<File> list = new ArrayList<File>();
        list.addAll(Files.find(new File("/media/an/binary-fixes-5-minute/2014"), pattern));
        list.addAll(Files.find(new File("/media/an/binary-fixes-5-minute/2015"), pattern));
        list.addAll(Files.find(new File("/media/an/binary-fixes-5-minute/2016"), pattern));
        AtomicInteger count = new AtomicInteger();
        // 
        Observable.from(list).groupBy(f -> count.getAndIncrement() % // 
        Runtime.getRuntime().availableProcessors()).flatMap(// 
        files -> vesselsInGbr(files, Schedulers.computation())).distinct(// 
        fix -> fix.mmsi() + fix.aisClass().name()).sorted(// 
        (a, b) -> Integer.compare(a.mmsi(), b.mmsi())).filter(// 
        fix -> MmsiValidator2.INSTANCE.isValid((long) fix.mmsi())).doOnNext(// 
        fix -> write(writer, fix)).toBlocking().subscribe();
    }
    System.out.println((System.currentTimeMillis() - t) + "ms");
}
Also used : BufferedWriter(java.io.BufferedWriter) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) Scheduler(rx.Scheduler) Files(au.gov.amsa.util.Files) File(java.io.File) ArrayList(java.util.ArrayList) Observable(rx.Observable) List(java.util.List) GroupedObservable(rx.observables.GroupedObservable) BinaryFixes(au.gov.amsa.risky.format.BinaryFixes) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Fix(au.gov.amsa.risky.format.Fix) Schedulers(rx.schedulers.Schedulers) OutputStreamWriter(java.io.OutputStreamWriter) MmsiValidator2(au.gov.amsa.util.identity.MmsiValidator2) Pattern(java.util.regex.Pattern) Pattern(java.util.regex.Pattern) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) BufferedWriter(java.io.BufferedWriter)

Aggregations

Files (au.gov.amsa.util.Files)13 File (java.io.File)13 Pattern (java.util.regex.Pattern)13 Observable (rx.Observable)13 List (java.util.List)12 BinaryFixes (au.gov.amsa.risky.format.BinaryFixes)10 Fix (au.gov.amsa.risky.format.Fix)7 IOException (java.io.IOException)7 Schedulers (rx.schedulers.Schedulers)6 FileOutputStream (java.io.FileOutputStream)5 Instant (java.time.Instant)5 DateTimeFormatter (java.time.format.DateTimeFormatter)5 ArrayList (java.util.ArrayList)5 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)5 Logger (org.slf4j.Logger)5 LoggerFactory (org.slf4j.LoggerFactory)5 BufferedWriter (java.io.BufferedWriter)4 OutputStreamWriter (java.io.OutputStreamWriter)4 DecimalFormat (java.text.DecimalFormat)4 ZonedDateTime (java.time.ZonedDateTime)4