use of org.opennms.newts.gsod.FileIterable.KeyedIterable in project newts by OpenNMS.
the class FileIterableTest method testCombine.
@Test
public void testCombine() {
Path root = new File("ftp.ncdc.noaa.gov/pub/data/gsod/1988").toPath();
FluentIterable<KeyedIterable<Path, Path>> iterables = FileIterable.groupFilesByDir(root);
for (Iterable<Path> it : iterables) {
System.err.println("Next Iterable");
for (Path p : it) {
System.err.println(p);
}
}
}
use of org.opennms.newts.gsod.FileIterable.KeyedIterable in project newts by OpenNMS.
the class MergeSort method execute.
public void execute(String... args) throws IOException {
CmdLineParser parser = createCmdLineParser();
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
// handling of wrong arguments
System.err.println(e.getMessage());
parser.printUsage(System.err);
return;
}
final MetricRegistry metrics = new MetricRegistry();
ConsoleReporter reporter = ConsoleReporter.forRegistry(metrics).outputTo(System.err).convertRatesTo(SECONDS).convertDurationsTo(MILLISECONDS).build();
reporter.start(10, SECONDS);
Meter linesMeter = metrics.meter("lines");
Meter filesMeter = metrics.meter("files");
Meter dirsMeter = metrics.meter("dirs");
Meter batchMeter = metrics.meter("batches");
Path root = m_source.toPath();
if (m_targetDir == null) {
m_targetDir = Files.createTempDir();
System.err.println("Working Directory: " + m_targetDir);
}
LOG.debug("Scanning {} for GSOD data files...", root);
FluentIterable<KeyedIterable<Path, Path>> dirs = FileIterable.groupFilesByDir(root);
for (KeyedIterable<Path, Path> filesInDir : dirs) {
Path subdir = root.relativize(filesInDir.getKey());
String dirName = subdir.getFileName().toString();
System.err.println("Sorted dir: " + subdir);
FluentIterable<Iterable<String>> contentIterables = filesInDir.transform(this.<Path>meter(filesMeter)).transform(lines("YEARMODA"));
FluentIterable<List<Iterable<String>>> batches = FluentIterable.from(Iterables.partition(contentIterables, m_mergeCount));
FluentIterable<Iterable<GSODLine>> sortedBatches = batches.transform(lift2GsodLines()).transform(mergeSorter());
Path sortedDir = m_targetDir.toPath().resolve(subdir);
sortedDir.toFile().mkdirs();
int count = 1;
for (Iterable<GSODLine> batch : sortedBatches) {
Path sortedFile = sortedDir.resolve(dirName + "-batch-" + (count++) + ".gz");
System.err.println("Creating " + sortedFile);
try (PrintStream out = open(sortedFile)) {
out.println(HDR);
for (GSODLine line : batch) {
out.println(line);
linesMeter.mark();
}
}
batchMeter.mark();
}
dirsMeter.mark();
}
}
Aggregations