use of au.gov.amsa.util.Files in project risky by amsa-code.
the class EffectiveSpeedFailuresMain method main.
public static void main(String[] args) {
Pattern pattern = Pattern.compile(".*\\.track");
List<File> files = Files.find(new File("/media/an/binary-fixes-5-minute/2015"), pattern);
int count = Observable.from(files).filter(file -> !file.getName().equals("0.track")).flatMap(file -> BinaryFixes.from(file).lift(new OperatorEffectiveSpeedChecker(SegmentOptions.builder().acceptAnyFixHours(12L).maxSpeedKnots(50).build())).filter(check -> !check.isOk()).reduce(new MmsiCount(0, 0), (mc, fix) -> new MmsiCount(fix.fix().mmsi(), mc.count + 1)).filter(mc -> mc.count >= 1000)).toSortedList((a, b) -> Long.compare(b.count, a.count)).flatMapIterable(x -> x).doOnNext(mc -> System.out.println(mc.mmsi)).count().toBlocking().single();
System.out.println(count);
}
use of au.gov.amsa.util.Files in project risky by amsa-code.
the class VoyageDatasetProducer method produce.
public static void produce(File output, File fixesOutput, List<File> list) throws Exception {
// reset output directories
output.delete();
FileUtils.deleteDirectory(fixesOutput);
int numFiles = list.size();
System.out.println(numFiles + "binary fix files");
AtomicInteger fileNumber = new AtomicInteger(0);
Collection<Port> ports = loadPorts();
Collection<EezWaypoint> eezWaypoints = readEezWaypoints();
Shapefile eezLine = Eez.loadEezLine();
Shapefile eezPolygon = Eez.loadEezPolygon();
System.out.println("loaded eez shapefiles");
long t = System.currentTimeMillis();
AtomicLong failedCheck = new AtomicLong();
AtomicLong fixCount = new AtomicLong();
Map<Integer, Integer> mmsisWithFailedChecks = new TreeMap<>();
Persister persister = new Persister(fixesOutput);
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output)))) {
// Note that in the observable below we don't employ parallel techniques
// this is because the runtime is acceptable
//
Observable.from(list).groupBy(//
f -> mmsiFromFilename(f)).flatMap(files -> {
String mmsi = files.getKey();
if (!isShipMmsi(mmsi)) {
return Observable.empty();
} else {
return //
files.compose(//
o -> logPercentCompleted(numFiles, t, o, fileNumber)).concatMap(//
BinaryFixes::from).lift(new OperatorEffectiveSpeedChecker(SegmentOptions.builder().acceptAnyFixHours(24L).maxSpeedKnots(50).build())).doOnNext(//
check -> updatedCounts(failedCheck, fixCount, mmsisWithFailedChecks, check)).filter(//
check -> check.isOk()).map(//
check -> check.fix()).doOnNext(fix -> persister.persist(fix)).compose(//
o -> toLegs(eezLine, eezPolygon, ports, eezWaypoints, o)).filter(x -> includeLeg(x));
}
}).sorted(//
(a, b) -> compareByMmsiThenLegStartTime(a, b)).doOnNext(//
x -> write(writer, x)).doOnTerminate(//
Checked.a0(() -> persister.close())).toBlocking().subscribe();
System.out.println((System.currentTimeMillis() - t) + "ms");
System.out.println("total fixes=" + fixCount.get());
System.out.println("num fixes rejected due failed effective speed check=" + failedCheck.get());
System.out.println("num mmsis with failed effective speed checks=" + mmsisWithFailedChecks.size());
try (PrintStream p = new PrintStream("target/info.txt")) {
p.println("total fixes=" + fixCount.get());
p.println("num fixes rejected due failed effective speed check=" + failedCheck.get());
p.println("num mmsis with failed effective speed checks=" + mmsisWithFailedChecks.size());
}
try (PrintStream p = new PrintStream("target/failures.txt")) {
p.println("failures mmsi <TAB> number of rejected fixes");
for (Integer mmsi : mmsisWithFailedChecks.keySet()) {
p.println(mmsi + "\t" + mmsisWithFailedChecks.get(mmsi));
}
}
}
}
use of au.gov.amsa.util.Files in project risky by amsa-code.
the class Formats method transform.
public static Observable<Integer> transform(final File input, final File output, Pattern pattern, final Transformer<HasFix, HasFix> transformer, final Action2<List<HasFix>, File> fixesWriter, final Func1<String, String> renamer) {
Preconditions.checkNotNull(input);
Preconditions.checkNotNull(output);
Preconditions.checkNotNull(pattern);
Preconditions.checkNotNull(transformer);
final List<File> files = Files.find(input, pattern);
long n = 0;
for (File file : files) n += file.length();
final long totalSizeBytes = n;
log.info("transforming " + new DecimalFormat("0.000").format(totalSizeBytes / 1000000.0) + "MB");
final Action1<File> logger = new Action1<File>() {
final AtomicInteger count = new AtomicInteger();
final long startTime = System.currentTimeMillis();
final AtomicLong size = new AtomicLong();
@Override
public void call(File f) {
long t = System.currentTimeMillis();
int n = count.incrementAndGet();
long bytes = size.getAndAdd(f.length());
double timeToFinishMins;
if (n > 1) {
timeToFinishMins = (t - startTime) / (double) (bytes) * (totalSizeBytes - bytes) / 1000.0 / 60.0;
} else
timeToFinishMins = -1;
DecimalFormat df = new DecimalFormat("0.000");
log.info("transforming " + n + " of " + files.size() + ":" + f + ", sizeMB=" + df.format(f.length() / 1000000.0) + ", finish in mins=" + df.format(timeToFinishMins));
}
};
log.info("converting " + files.size() + " files" + " in " + input);
return Observable.from(files).flatMap(file -> {
final File outputFile = rebase(file, input, output);
outputFile.getParentFile().mkdirs();
logger.call(file);
return BinaryFixes.from(file, true, BinaryFixesFormat.WITHOUT_MMSI).toList().flatMapIterable(Functions.<List<Fix>>identity()).compose(transformer).toList().doOnNext(list -> {
File f = new File(outputFile.getParentFile(), renamer.call(outputFile.getName()));
fixesWriter.call(list, f);
}).count();
});
}
use of au.gov.amsa.util.Files in project risky by amsa-code.
the class BinaryFixesMain method main.
public static void main(String[] args) {
// perform a speed test for loading BinaryFixes from disk
FixImpl.validate = false;
final ConcurrentHashMap<Long, List<FixImpl>> map = new ConcurrentHashMap<Long, List<FixImpl>>();
// -downsample-5-mins
List<File> files = Files.find(new File("/media/an/binary-fixes/2014-year-downsample-5-mins"), Pattern.compile(".*\\.track"));
long t = System.currentTimeMillis();
long count = Observable.from(files).buffer(Math.max(1, files.size() / Runtime.getRuntime().availableProcessors())).flatMap(list -> {
return Observable.from(list).concatMap(file -> BinaryFixes.from(file).countLong()).subscribeOn(Schedulers.computation());
}).scan(0L, (a, b) -> a + b).lift(Logging.<Long>logger().showCount().prefix("records=").showMemory().every(1000).log()).last().toBlocking().single();
long elapsed = System.currentTimeMillis() - t;
System.out.println("Map size = " + map.size());
System.out.println("Total records = " + count + ", numPerSecond=" + count * 1000.0 / elapsed + ", timeMs=" + elapsed);
}
use of au.gov.amsa.util.Files in project risky by amsa-code.
the class VesselsInGbrMain method main.
public static void main(String[] args) throws IOException {
long t = System.currentTimeMillis();
File out = new File("target/mmsi.txt");
out.delete();
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(out)))) {
Pattern pattern = Pattern.compile(".*\\.track");
List<File> list = new ArrayList<File>();
list.addAll(Files.find(new File("/media/an/binary-fixes-5-minute/2014"), pattern));
list.addAll(Files.find(new File("/media/an/binary-fixes-5-minute/2015"), pattern));
list.addAll(Files.find(new File("/media/an/binary-fixes-5-minute/2016"), pattern));
AtomicInteger count = new AtomicInteger();
//
Observable.from(list).groupBy(f -> count.getAndIncrement() % //
Runtime.getRuntime().availableProcessors()).flatMap(//
files -> vesselsInGbr(files, Schedulers.computation())).distinct(//
fix -> fix.mmsi() + fix.aisClass().name()).sorted(//
(a, b) -> Integer.compare(a.mmsi(), b.mmsi())).filter(//
fix -> MmsiValidator2.INSTANCE.isValid((long) fix.mmsi())).doOnNext(//
fix -> write(writer, fix)).toBlocking().subscribe();
}
System.out.println((System.currentTimeMillis() - t) + "ms");
}
Aggregations