use of org.gridfour.gvrs.GvrsFileSpecification in project gridfour by gwlucastrig.
the class ExperimentalImageStorage method main.
/**
* Process the specified file storing it in different image formats
* as a way of testing data processing concepts for GVRS.
*
* @param args the command line arguments, the first of which give the path
* to the input file
* @throws java.io.IOException in the event of an unhandled IO exception
* @throws org.apache.commons.imaging.ImageReadException in the event of an
* unhandled exception reading an image
*/
public static void main(String[] args) throws IOException, ImageReadException {
File input = new File(args[0]);
Date date = new Date();
SimpleDateFormat sdFormat = new SimpleDateFormat("dd MMM yyyy HH:mm", Locale.getDefault());
sdFormat.setTimeZone(new SimpleTimeZone(0, "UTC"));
System.out.format("Processing image from %s%n", input.getName());
System.out.format("Date/time of test: %s (UTC)%n", sdFormat.format(date));
System.out.println("");
long time0, time1;
// -------------------------------------------------------------
// Load the specified file to obtain sample data for processing.
time0 = System.currentTimeMillis();
BufferedImage bImage = Imaging.getBufferedImage(input);
time1 = System.currentTimeMillis();
int width = bImage.getWidth();
int height = bImage.getHeight();
int nPixels = width * height;
// GVRS API uses row, column as grid coordinates
int nRows = height;
int nCols = width;
System.out.println("Image loaded");
System.out.format(" Width: %7d%n", width);
System.out.format(" Height: %7d%n", height);
report(time0, time1, input, nPixels);
int[] argb = new int[width * height];
bImage.getRGB(0, 0, width, height, argb, 0, width);
bImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
bImage.setRGB(0, 0, width, height, argb, 0, width);
// ---------------------------------------------------------------
// As a basis for comparison, store the image as a PNG
// and report the time required to do so.
File refPNG = new File("ReferenceImage.png");
if (refPNG.exists()) {
refPNG.delete();
}
time0 = System.currentTimeMillis();
ImageIO.write(bImage, "PNG", refPNG);
time1 = System.currentTimeMillis();
System.out.println("ImageIO writing PNG");
report(time0, time1, refPNG, nPixels);
// ---------------------------------------------------------------
File refJPEG = new File("ReferenceImage.jpg");
if (refJPEG.exists()) {
refJPEG.delete();
}
time0 = System.currentTimeMillis();
ImageIO.write(bImage, "JPEG", refJPEG);
time1 = System.currentTimeMillis();
System.out.println("ImageIO writing JPEG");
report(time0, time1, refJPEG, nPixels);
// Note:
// In the following code blocks, there are references to the GvrsFile
// summarize() method. These are commented out because they would
// interfere with timing measurements and also because they produce
// considerable output text which would clutter the report.
// Note also that calls to flush() are not normally required since
// GvrsFile performs a flush as part of its close() operation.
// ---------------------------------------------------------------
GvrsFileSpecification gvrsFileSpec;
// The first test stores the specified data in an uncompressed format.
// This is the fastest option for processing pixel data and is recommended
// for high-performance processing.
System.out.println("Storing pixels as integers in uncompressed GVRS file");
gvrsFileSpec = new GvrsFileSpecification(nRows, nCols, 200, 200);
GvrsElementSpecification pSpec = new GvrsElementSpecificationInt("pixel");
gvrsFileSpec.addElementSpecification(pSpec);
File output0 = new File("IntPixelNoComp.gvrs");
time0 = System.currentTimeMillis();
try (GvrsFile gvrs = new GvrsFile(output0, gvrsFileSpec)) {
gvrs.setTileCacheSize(GvrsCacheSize.Large);
GvrsElement pixel = gvrs.getElement("pixel");
for (int iRow = 0; iRow < nRows; iRow++) {
for (int iCol = 0; iCol < nCols; iCol++) {
int index = iRow * nCols + iCol;
pixel.writeValueInt(iRow, iCol, argb[index]);
}
}
}
time1 = System.currentTimeMillis();
report(time0, time1, output0, nPixels);
// ---------------------------------------------------------------
// Store the pixels in compressed format, but do not make any special
// processing to improve the results. In most cases, compression using
// this approach will not yield a substantial saving in storage.
System.out.println("Storing pixels as integers in compressed GVRS file");
File output1 = new File("IntPixelComp.gvrs");
gvrsFileSpec.setDataCompressionEnabled(true);
LsCodecUtility.addLsopToSpecification(gvrsFileSpec, true);
time0 = System.currentTimeMillis();
try (GvrsFile gvrs = new GvrsFile(output1, gvrsFileSpec)) {
gvrs.setTileCacheSize(GvrsCacheSize.Large);
GvrsElement pixel = gvrs.getElement("pixel");
for (int iRow = 0; iRow < nRows; iRow++) {
for (int iCol = 0; iCol < nCols; iCol++) {
int index = iRow * nCols + iCol;
pixel.writeValueInt(iRow, iCol, argb[index]);
}
}
// gvrs.flush();
// gvrs.summarize(System.out, true);
}
time1 = System.currentTimeMillis();
report(time0, time1, output1, nPixels);
// ---------------------------------------------------------------
// Separate the pixels into separate RGB components, store each component
// in a separate GVRS Element. This approach should improve compression
// ratios.
System.out.println("Storing RGB components in compressed GVRS file");
gvrsFileSpec = new GvrsFileSpecification(nRows, nCols, 200, 200);
gvrsFileSpec.setDataCompressionEnabled(true);
LsCodecUtility.addLsopToSpecification(gvrsFileSpec, true);
gvrsFileSpec.addElementSpecification(new GvrsElementSpecificationInt("r"));
gvrsFileSpec.addElementSpecification(new GvrsElementSpecificationInt("g"));
gvrsFileSpec.addElementSpecification(new GvrsElementSpecificationInt("b"));
File output2 = new File("PixelsCompRGB.gvrs");
gvrsFileSpec.setDataCompressionEnabled(true);
time0 = System.currentTimeMillis();
try (GvrsFile gvrs = new GvrsFile(output2, gvrsFileSpec)) {
gvrs.setTileCacheSize(GvrsCacheSize.Large);
GvrsElement rElem = gvrs.getElement("r");
GvrsElement gElem = gvrs.getElement("g");
GvrsElement bElem = gvrs.getElement("b");
for (int iRow = 0; iRow < nRows; iRow++) {
for (int iCol = 0; iCol < nCols; iCol++) {
int rgb = argb[iRow * nCols + iCol];
int r = (rgb >> 16) & 0xff;
int g = (rgb >> 8) & 0xff;
int b = rgb & 0xff;
rElem.writeValueInt(iRow, iCol, r);
gElem.writeValueInt(iRow, iCol, g);
bElem.writeValueInt(iRow, iCol, b);
}
}
// gvrs.flush();
// gvrs.summarize(System.out, true);
}
time1 = System.currentTimeMillis();
report(time0, time1, output2, nPixels);
// ---------------------------------------------------------------
// Convert RGB color values to the YCoCg-R color space before storage.
// For photographic images, this approach should further reduce storage
// size. For charts, graphs, line drawings, and other such graphic art
// this approach will usually not produce a gain and sometimes degrades
// compression.
System.out.println("Storing YCoCg-R components in compressed GVRS file");
gvrsFileSpec = new GvrsFileSpecification(nRows, nCols, 200, 200);
gvrsFileSpec.setDataCompressionEnabled(true);
LsCodecUtility.addLsopToSpecification(gvrsFileSpec, true);
gvrsFileSpec.addElementSpecification(new GvrsElementSpecificationInt("Y"));
gvrsFileSpec.addElementSpecification(new GvrsElementSpecificationInt("Co"));
gvrsFileSpec.addElementSpecification(new GvrsElementSpecificationInt("Cg"));
File output3 = new File("PixelsCompYCoCg.gvrs");
time0 = System.currentTimeMillis();
try (GvrsFile gvrs = new GvrsFile(output3, gvrsFileSpec)) {
gvrs.setTileCacheSize(GvrsCacheSize.Large);
GvrsElement YElem = gvrs.getElement("Y");
GvrsElement CoElem = gvrs.getElement("Co");
GvrsElement CgElem = gvrs.getElement("Cg");
for (int iRow = 0; iRow < nRows; iRow++) {
for (int iCol = 0; iCol < nCols; iCol++) {
int rgb = argb[iRow * nCols + iCol];
int r = (rgb >> 16) & 0xff;
int g = (rgb >> 8) & 0xff;
int b = rgb & 0xff;
int Co = r - b;
// Co>>1 is equivalent to Co/2
int tmp = b + (Co >> 1);
int Cg = g - tmp;
int Y = tmp + (Cg >> 1);
YElem.writeValueInt(iRow, iCol, Y);
CoElem.writeValueInt(iRow, iCol, Co);
CgElem.writeValueInt(iRow, iCol, Cg);
}
}
// gvrs.flush();
// gvrs.summarize(System.out, true);
}
time1 = System.currentTimeMillis();
report(time0, time1, output3, nPixels);
// ---------------------------------------------------------------
// Finally, test the time required to load a YCoCg image
// Then write the results to a JPEG file for inspection.
// This test code also illustrates how the YCoCg values may be
// mapped back to RGB.
System.out.println("Testing time to read the YCoCg compressed file");
time0 = System.currentTimeMillis();
try (GvrsFile gvrs = new GvrsFile(output3, "r")) {
gvrs.setTileCacheSize(GvrsCacheSize.Large);
GvrsElement YElem = gvrs.getElement("Y");
GvrsElement CoElem = gvrs.getElement("Co");
GvrsElement CgElem = gvrs.getElement("Cg");
for (int iRow = 0; iRow < nRows; iRow++) {
for (int iCol = 0; iCol < nCols; iCol++) {
int Y = YElem.readValueInt(iRow, iCol);
int Co = CoElem.readValueInt(iRow, iCol);
int Cg = CgElem.readValueInt(iRow, iCol);
int tmp = Y - (Cg >> 1);
int g = Cg + tmp;
int b = tmp - (Co >> 1);
int r = b + Co;
argb[iRow * nCols + iCol] = (((0xff00 | r) << 8 | g) << 8) | b;
}
}
}
time1 = System.currentTimeMillis();
report(time0, time1, output3, nPixels);
File resultsJPEG = new File("ResultsForInspection.jpg");
if (resultsJPEG.exists()) {
resultsJPEG.delete();
}
bImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
bImage.setRGB(0, 0, width, height, argb, 0, width);
time0 = System.currentTimeMillis();
ImageIO.write(bImage, "JPEG", resultsJPEG);
time1 = System.currentTimeMillis();
System.out.println("ImageIO writing JPEG");
}
use of org.gridfour.gvrs.GvrsFileSpecification in project gridfour by gwlucastrig.
the class ReadGvrs method main.
public static void main(String[] args) throws IOException {
PrintStream ps = System.out;
long time0, time1;
if (args.length == 0) {
System.out.println("No input file specified");
System.exit(0);
}
File file = new File(args[0]);
System.out.println("Reading file " + file.getPath());
boolean oneTestPerTile = args.length > 1;
// Open the file. The time required to open the file depends, in part,
// on whether a supplemental index file (.gvrs) is available. To test the
// difference, simply delete the .gvrx file. Deleting the index file
// will also allow you to test whether the .gvrs file can be opened
// successfully when an index file is not availble.
time0 = System.nanoTime();
GvrsFile gvrs = new GvrsFile(file, "r");
time1 = System.nanoTime();
double timeForOpeningFile = (time1 - time0) / 1.0e+6;
// GvrsFile implements a method that allows an application to obtain
// a safe copy of the specification that was used to create the
// original GVRS file. The specification element is the primary
// method for obtaining descriptive metadata about the organization
// of the file. The example that follows demonstrates the use of
// the specification to get some descriptive data.
// Of course, if an application just wants to print that
// metadata, the summarize function is the most efficient way of
// doing so.
GvrsFileSpecification spec = gvrs.getSpecification();
int nRows = spec.getRowsInGrid();
int nCols = spec.getColumnsInGrid();
int nRowsOfTiles = spec.getRowsOfTilesInGrid();
int nColsOfTiles = spec.getColumnsOfTilesInGrid();
int nTiles = nRowsOfTiles * nColsOfTiles;
ps.format("File dimensions%n");
ps.format(" Rows: %8d%n", nRows);
ps.format(" Columns: %8d%n", nCols);
ps.format(" Tiles: %8d%n", nTiles);
ps.format("Time to read header and index %10.1f ms%n", timeForOpeningFile);
gvrs.summarize(ps, true);
// Variable length records can contain either binary or text data.
// The VLR's are read during initial access, though their payload
// (which may be quite large) is not read until requested by the
// application code.
ps.println("\n\nGVRS Metadata");
ps.println("------------------------------------------------");
List<GvrsMetadata> metadataList = gvrs.readMetadata();
for (GvrsMetadata metadata : metadataList) {
String description = metadata.getDescription();
ps.format(" %-24.24s %6d: %s%n", metadata.getName(), metadata.getRecordID(), description == null ? "" : description);
}
gvrs.close();
// we collect a sum of the samples. we don't really care about
// this value, but we collect it to ensure that Java doesn't optimize
// away the actions inside the loop by telling it that we want a
// computed value.
int nTest = 4;
double sumSample = 0;
long nSample = 0;
int rowStep = 1;
int colStep = 1;
if (oneTestPerTile) {
rowStep = spec.getRowsInTile();
colStep = spec.getColumnsInTile();
}
for (int iTest = 0; iTest < nTest; iTest++) {
time0 = System.nanoTime();
gvrs = new GvrsFile(file, "r");
List<GvrsElement> elementList = gvrs.getElements();
GvrsElement zElement = elementList.get(0);
gvrs.setTileCacheSize(GvrsCacheSize.Large);
for (int iRow = 0; iRow < nRows; iRow += rowStep) {
for (int iCol = 0; iCol < nCols; iCol += colStep) {
double sample = zElement.readValue(iRow, iCol);
sumSample += sample;
nSample++;
}
}
time1 = System.nanoTime();
double timeForReadingFile = (time1 - time0) / 1.0e+6;
System.out.format("Time to read all tiles %10.1f ms%n", timeForReadingFile);
if (iTest == nTest - 1) {
// on the last test, summarize
gvrs.summarize(ps, false);
}
gvrs.close();
}
ps.println("Avg Samples " + sumSample / (double) nSample);
}
use of org.gridfour.gvrs.GvrsFileSpecification in project gridfour by gwlucastrig.
the class PackageData method process.
void process(PrintStream ps, TestOptions options, String[] args) throws IOException {
// The packaging of data in a Gvrs file can be thought of in terms of
// the steps shown below.
//
// 0. Obtain descriptive parameters about source data. In this
// case, the application is packing data from a NetCDF source
// and most of the descriptive parameters follow the pattern
// established in the earlier ExtractData.java demonstration
//
// 1. Define the fixed metadata about the file (it's dimensions,
// data type, tile organization, etc.) using a GvrsFileSpecification
// object.
//
// 2. Open a new GvrsFile object using the settings created in step 1.
// Adjust any run-time parameters (such as the tile-cache size)
// according to the needs of the application.
//
// 3. Extract the data from its source and store in the Gvrs file.
//
ps.format("%nGvrs Packaging Application for NetCDF-format Global DEM files%n");
Locale locale = Locale.getDefault();
Date date = new Date();
SimpleDateFormat sdFormat = new SimpleDateFormat("dd MMM yyyy HH:mm z", locale);
ps.format("Date of Execution: %s%n", sdFormat.format(date));
String inputPath = options.getInputFile().getPath();
File outputFile = options.getOutputFile();
if (outputFile == null) {
ps.format("Missing specification for output file%n");
ps.format("Packaging application terminated%n");
return;
}
ps.format("Input file: %s%n", inputPath);
ps.format("Output file: %s%n", outputFile.getPath());
boolean[] matched = new boolean[args.length];
boolean useLsop = options.scanBooleanOption(args, "-lsop", matched, false);
// Open the NetCDF file -----------------------------------
ps.println("Opening NetCDF input file");
NetcdfFile ncfile = NetcdfFile.open(inputPath);
// Identify which Variable instances carry information about the
// geographic (latitude/longitude) coordinate system and also which
// carry information for elevation and bathymetry.
// the Variable that carries row-latitude information
Variable lat;
// the Variable that carries column-longitude information
Variable lon;
// the variable that carries elevation and bathymetry
Variable z;
lat = ncfile.findVariable("lat");
lon = ncfile.findVariable("lon");
z = ncfile.findVariable("elevation");
int[] tileSize;
// Use the input file name to format a product label
File inputFile = new File(inputPath);
String productLabel = inputFile.getName();
if (productLabel.toLowerCase().endsWith(".nc")) {
productLabel = productLabel.substring(0, productLabel.length() - 3);
}
if (lat == null) {
// ETOPO1 specification
tileSize = options.getTileSize(90, 120);
lat = ncfile.findVariable("y");
lon = ncfile.findVariable("x");
z = ncfile.findVariable("z");
} else {
tileSize = options.getTileSize(90, 120);
}
if (lat == null || lon == null || z == null) {
throw new IllegalArgumentException("Input does not contain valid lat,lon, and elevation Variables");
}
// using the variables from above, extract coordinate system
// information for the product and print it to the output.
ExtractionCoordinates extractionCoords = new ExtractionCoordinates(lat, lon);
extractionCoords.summarizeCoordinates(ps);
// Get the dimensions of the raster (grid) elevation/bathymetry data.
// should be 1.
int rank = z.getRank();
int[] shape = z.getShape();
int nRows = shape[0];
int nCols = shape[1];
ps.format("Rows: %8d%n", nRows);
ps.format("Columns: %8d%n", nCols);
int nRowsInTile = tileSize[0];
int nColsInTile = tileSize[1];
// Initialize the specification used to initialize the Gvrs file -------
GvrsFileSpecification spec = new GvrsFileSpecification(nRows, nCols, nRowsInTile, nColsInTile);
spec.setLabel(productLabel);
// Initialize the data type. If a zScale option was specified,
// use integer-coded floats. Otherwise, pick the data type
// based on whether the NetCDF file gives integral or floating point
// data.
boolean isZScaleSpecified = options.isZScaleSpecified();
float zScale = (float) options.getZScale();
float zOffset = (float) options.getZOffset();
// data type from NetCDF file
DataType sourceDataType = z.getDataType();
GvrsElementSpecification elementSpec = null;
GvrsElementType gvrsDataType;
if (isZScaleSpecified) {
// the options define our data type
int encodedLimitDepth = (int) ((LIMIT_DEPTH - zOffset) * zScale);
int encodedLimitElev = (int) ((LIMIT_ELEVATION - zOffset) * zScale);
elementSpec = new GvrsElementSpecificationIntCodedFloat("z", zScale, zOffset, encodedLimitDepth, encodedLimitElev, Integer.MIN_VALUE, true);
spec.addElementSpecification(elementSpec);
gvrsDataType = GvrsElementType.INT_CODED_FLOAT;
} else if (sourceDataType.isIntegral()) {
elementSpec = new GvrsElementSpecificationShort("z", LIMIT_DEPTH, LIMIT_ELEVATION, FILL_VALUE);
spec.addElementSpecification(elementSpec);
gvrsDataType = GvrsElementType.SHORT;
} else {
elementSpec = new GvrsElementSpecificationFloat("z", LIMIT_DEPTH, LIMIT_ELEVATION, Float.NaN);
spec.addElementSpecification(elementSpec);
gvrsDataType = GvrsElementType.FLOAT;
}
elementSpec.setDescription("Elevation (positive values) or depth (negative), in meters");
elementSpec.setUnitOfMeasure("m");
// Example with special character
elementSpec.setLabel("die H\u00f6henlage");
ps.println("Source date type " + sourceDataType + ", stored as " + gvrsDataType);
ps.println("");
// Determine whether data compression is used -------------------
boolean compressionEnabled = options.isCompressionEnabled();
spec.setDataCompressionEnabled(compressionEnabled);
boolean checksumsEnalbed = options.isChecksumComputationEnabled();
spec.setChecksumEnabled(checksumsEnalbed);
boolean bigAddressSpaceEnabled = options.isBigAddressSpaceEnabled();
spec.setExtendedFileSizeEnabled(bigAddressSpaceEnabled);
double[] geoCoords = extractionCoords.getGeographicCoordinateBounds();
spec.setGeographicCoordinates(geoCoords[0], geoCoords[1], geoCoords[2], geoCoords[3]);
// Check to verify that the geographic coordinates and grid coordinate
// are correctly implemented. This test is not truly part of the packaging
// process (since it should always work), but is included here as a
// diagnostic.
extractionCoords.checkSpecificationTransform(ps, spec);
// is enabled and the data type is integral.
if (useLsop) {
LsCodecUtility.addLsopToSpecification(spec, false);
}
// Create the output file and store the content from the input file.
if (outputFile.exists()) {
ps.println("Output file exists. Removing old file");
boolean status = outputFile.delete();
if (!status) {
ps.println("Removal attempt failed");
return;
}
}
ps.println("Begin processing");
double zMin = Double.POSITIVE_INFINITY;
double zMax = Double.NEGATIVE_INFINITY;
double zSum = 0;
long nSum = 0;
try (GvrsFile gvrs = new GvrsFile(outputFile, spec)) {
gvrs.writeMetadata(GvrsMnc.Copyright, "This data is in the public domain and may be used free of charge");
gvrs.writeMetadata(GvrsMnc.TermsOfUse, "This data should not be used for navigation");
GvrsElement zElement = gvrs.getElement("z");
gvrs.setTileCacheSize(GvrsCacheSize.Large);
storeGeoreferencingInformation(gvrs);
// Initialize data-statistics collection ---------------------------
// we happen to know the range of values for the global DEM a-priori.
// it ranges from about -11000 to 8650. This allows us to tabulate counts
// of which values we find in the data source. We can use this information
// to estimate the entropy of the source data and make a realistic
// assessment of how many bytes would be needed to store them.
InputDataStatCollector stats = new InputDataStatCollector(-11000, 8650, zScale);
int[] readOrigin = new int[rank];
int[] readShape = new int[rank];
// -----------------------------------------------------------------
// Package the data
long time0 = System.currentTimeMillis();
for (int iRow = 0; iRow < nRows; iRow++) {
if (iRow % 1000 == 999) {
long time1 = System.currentTimeMillis();
double deltaT = time1 - time0;
// rows per millis
double rate = (iRow + 1) / deltaT;
int nRemaining = nRows - iRow;
long remainingT = (long) (nRemaining / rate);
Date d = new Date(time1 + remainingT);
ps.format("Completed %d rows, %4.1f%% of total, est completion at %s%n", iRow + 1, 100.0 * (double) iRow / (nRows - 1.0), d);
ps.flush();
}
int row0 = iRow;
int col0 = 0;
readOrigin[0] = row0;
readOrigin[1] = col0;
readShape[0] = 1;
readShape[1] = nCols;
// happen in this application unless the input file is corrupt.
try {
Array array = z.read(readOrigin, readShape);
// and store it in the Gvrs file.
switch(gvrsDataType) {
case INTEGER:
case SHORT:
for (int iCol = 0; iCol < nCols; iCol++) {
int sample = array.getInt(iCol);
zElement.writeValueInt(iRow, iCol, sample);
stats.addSample(sample);
if (sample < zMin) {
zMin = sample;
}
if (sample > zMax) {
zMax = sample;
}
zSum += sample;
nSum++;
}
break;
case INT_CODED_FLOAT:
case FLOAT:
default:
for (int iCol = 0; iCol < nCols; iCol++) {
float sample = array.getFloat(iCol);
zElement.writeValue(iRow, iCol, sample);
stats.addSample(sample);
if (sample < zMin) {
zMin = sample;
}
if (sample > zMax) {
zMax = sample;
}
zSum += sample;
nSum++;
}
}
} catch (InvalidRangeException irex) {
throw new IOException(irex.getMessage(), irex);
}
}
gvrs.flush();
long time1 = System.currentTimeMillis();
double timeToProcess = (time1 - time0) / 1000.0;
ps.format("Finished processing file in %4.1f seconds%n", timeToProcess);
ps.format("Entropy for input data %4.1f bits/sample%n", stats.getEntropy());
long outputSize = outputFile.length();
long nCells = (long) nRows * (long) nCols;
double bitsPerSymbol = 8.0 * (double) outputSize / (double) nCells;
ps.format("Storage used (including overhead) %6.4f bits/sample%n", bitsPerSymbol);
ps.format("%nSummary of file content and packaging actions------------%n");
gvrs.summarize(ps, true);
ps.format("Range of z values:%n");
ps.format(" Min z: %8.3f%n", zMin);
ps.format(" Max z: %8.3f%n", zMax);
ps.format(" Avg z: %8.3f%n", zSum / (nSum > 0 ? nSum : 1));
}
// to those of the source data.
if (options.isVerificationEnabled()) {
int[] readOrigin = new int[rank];
int[] readShape = new int[rank];
ps.println("\nTesting product for data consistency with source");
ps.println("Opening gvrs file for reading");
long time0 = System.currentTimeMillis();
try (GvrsFile gvrs = new GvrsFile(outputFile, "r")) {
long time1 = System.currentTimeMillis();
ps.println("Opening complete in " + (time1 - time0) + " ms");
GvrsFileSpecification testSpec = gvrs.getSpecification();
String testLabel = testSpec.getLabel();
ps.println("Label: " + testLabel);
GvrsMetadata m = gvrs.readMetadata("Copyright", 0);
if (m != null) {
ps.println("Copyright: " + m.getString());
}
GvrsElement zElement = gvrs.getElement("z");
ps.println("Element: " + zElement.getName() + ", " + zElement.getDescription());
gvrs.setTileCacheSize(GvrsCacheSize.Large);
for (int iRow = 0; iRow < nRows; iRow++) {
if (iRow % 10000 == 9999) {
time1 = System.currentTimeMillis();
double deltaT = time1 - time0;
// rows per millis
double rate = (iRow + 1) / deltaT;
int nRemaining = nRows - iRow;
long remainingT = (long) (nRemaining / rate);
Date d = new Date(time1 + remainingT);
ps.format("Completed %d rows, %4.1f%% of total, est completion at %s%n", iRow + 1, 100.0 * (double) iRow / (nRows - 1.0), d);
ps.flush();
}
int row0 = iRow;
int col0 = 0;
readOrigin[0] = row0;
readOrigin[1] = col0;
readShape[0] = 1;
readShape[1] = nCols;
try {
Array array = z.read(readOrigin, readShape);
switch(gvrsDataType) {
case INTEGER:
for (int iCol = 0; iCol < nCols; iCol++) {
int sample = array.getInt(iCol);
int test = zElement.readValueInt(iRow, iCol);
if (sample != test) {
ps.println("Failure at " + iRow + ", " + iCol);
test = zElement.readValueInt(iRow, iCol);
System.exit(-1);
}
}
break;
case INT_CODED_FLOAT:
for (int iCol = 0; iCol < nCols; iCol++) {
double sample = array.getDouble(iCol);
int iSample = (int) ((sample - zOffset) * zScale + 0.5);
float fSample = iSample / zScale + zOffset;
float test = zElement.readValue(iRow, iCol);
double delta = Math.abs(fSample - test);
if (delta > 1.01 / zScale) {
ps.println("Failure at " + iRow + ", " + iCol);
System.exit(-1);
}
}
break;
case FLOAT:
default:
for (int iCol = 0; iCol < nCols; iCol++) {
float sample = array.getFloat(iCol);
float test = zElement.readValue(iRow, iCol);
if (sample != test) {
ps.println("Failure at " + iRow + ", " + iCol);
test = zElement.readValueInt(iRow, iCol);
System.exit(-1);
}
}
}
} catch (InvalidRangeException irex) {
throw new IOException(irex.getMessage(), irex);
}
}
time1 = System.currentTimeMillis();
ps.println("Exhaustive cross check complete in " + (time1 - time0) + " ms");
gvrs.summarize(ps, false);
}
}
ncfile.close();
}
use of org.gridfour.gvrs.GvrsFileSpecification in project gridfour by gwlucastrig.
the class EntropyTabulator method process.
/**
* Process the specified GVRS file and write a report to the specified print
* stream.
* <p>
* If configured to do so, this method will write progress reports to the
* specified print stream.
*
* @param ps a valid print stream, System.out is a valid candidate
* @param inputFile a reference to a GVRS file
* @param showProgress indicates if progress reports are to be printed during
* processing
* @return on successful completion, a valid floating-point value; otherwise,
* a Double.NaN.
*/
public double process(PrintStream ps, File inputFile, boolean showProgress) {
double entropy = Double.NaN;
ps.format("%nEntropy tabulation for GVRS files%n");
Locale locale = Locale.getDefault();
Date date = new Date();
SimpleDateFormat sdFormat = new SimpleDateFormat("dd MMM yyyy HH:mm z", locale);
ps.format("Date of Execution: %s%n", sdFormat.format(date));
String inputPath = inputFile.getPath();
ps.format("Input file: %s%n", inputPath);
File parent = inputFile.getParentFile();
File countsFile = new File(parent, TEMP_COUNT_FILE_NAME);
// Define the specs for the entropy stats file
GvrsFileSpecification countsSpec = new GvrsFileSpecification(65536, 65536, 256, 256);
countsSpec.setDataCompressionEnabled(false);
GvrsElementSpecificationInt countsElementSpec = new GvrsElementSpecificationInt("counts", 0);
countsSpec.addElementSpecification(countsElementSpec);
try (GvrsFile source = new GvrsFile(inputFile, "r");
GvrsFile counts = new GvrsFile(countsFile, countsSpec)) {
GvrsFileSpecification sourceSpec = source.getSpecification();
int nRowsInSource = sourceSpec.getRowsInGrid();
int nColsInSource = sourceSpec.getColumnsInGrid();
int nRowsOfTilesInSource = sourceSpec.getRowsOfTilesInGrid();
int nColsOfTilesInSource = sourceSpec.getColumnsOfTilesInGrid();
int nRowsInTile = sourceSpec.getRowsInTile();
int nColsInTile = sourceSpec.getColumnsInTile();
GvrsElement sourceElement = source.getElements().get(0);
GvrsElementType sourceDataType = sourceElement.getDataType();
GvrsElement countsElement = counts.getElement("counts");
long nSamples = 0;
long nSymbols = 0;
ps.println("Source File " + inputFile.getName());
ps.format(" Rows: %8d%n", nRowsInSource);
ps.format(" Columns: %8d%n", nColsInSource);
source.setTileCacheSize(GvrsCacheSize.Small);
counts.setTileCacheSize(2000);
long time0 = System.currentTimeMillis();
if (showProgress) {
ps.format("Initializing temporary entropy tabulation file %s%n", countsFile.getPath());
ps.flush();
}
// Package the data
if (showProgress) {
ps.format("Initialization done in %d ms%n", System.currentTimeMillis() - time0);
ps.println("Beginning tabulation");
}
time0 = System.currentTimeMillis();
for (int iTileRow = 0; iTileRow < nRowsOfTilesInSource; iTileRow++) {
if (showProgress && iTileRow > 0) {
long time1 = System.currentTimeMillis();
double deltaT = time1 - time0;
// rows per millis
double rate = (iTileRow + 1) / deltaT;
int nRemaining = nRowsOfTilesInSource - iTileRow;
long remainingT = (long) (nRemaining / rate);
Date d = new Date(time1 + remainingT);
ps.format("Surveyed %d rows, %4.1f%% of total, est completion at %s%n", iTileRow * nRowsInTile, 100.0 * (double) iTileRow / (nRowsOfTilesInSource - 1.0), d);
ps.flush();
}
int row0 = iTileRow * nRowsInTile;
int row1 = row0 + nRowsInTile;
if (row1 > nRowsInSource) {
row1 = nRowsInSource;
}
for (int iTileCol = 0; iTileCol < nColsOfTilesInSource; iTileCol++) {
int col0 = iTileCol * nColsInTile;
int col1 = col0 + nColsInTile;
if (col1 > nColsInSource) {
col1 = nColsInSource;
}
for (int iRow = row0; iRow < row1; iRow++) {
for (int iCol = col0; iCol < col1; iCol++) {
int bits;
if (sourceDataType == GvrsElementType.FLOAT) {
float sample = sourceElement.readValue(iRow, iCol);
bits = Float.floatToRawIntBits(sample);
} else {
bits = sourceElement.readValueInt(iRow, iCol);
}
long longIndex = ((long) bits) & 0x00ffffffffL;
long longRow = longIndex / 65536L;
long longCol = longIndex - longRow * 65536L;
int count = countsElement.readValueInt((int) longRow, (int) longCol);
countsElement.writeValueInt((int) longRow, (int) longCol, count + 1);
nSamples++;
if (count == 0) {
nSymbols++;
}
}
}
}
}
counts.flush();
long time1 = System.currentTimeMillis();
double timeToProcess = (time1 - time0) / 1000.0;
if (showProgress) {
ps.format("Finished surveying source file in %4.1f seconds%n", timeToProcess);
ps.format("Performing tabulation of count data%n");
ps.flush();
}
time0 = System.currentTimeMillis();
double nSamplesDouble = (double) nSamples;
int maxCount = 0;
long nUnique = 0;
long nRepeated = 0;
KahanSummation ks = new KahanSummation();
for (int iRow = 0; iRow < 65536; iRow++) {
if (showProgress && (iRow & 1023) == 0 && iRow > 0) {
time1 = System.currentTimeMillis();
double deltaT = time1 - time0;
// rows per millis
double rate = (iRow + 1) / deltaT;
int nRemaining = 65536 - iRow;
long remainingT = (long) (nRemaining / rate);
Date d = new Date(time1 + remainingT);
ps.format("Tabulated %d rows, %4.1f%% of total, est completion at %s%n", iRow, 100.0 * (double) iRow / 65536.0, d);
ps.flush();
}
for (int iCol = 0; iCol < 65536; iCol++) {
int count = countsElement.readValueInt(iRow, iCol);
if (count > 0) {
double p = (double) count / nSamplesDouble;
double s = -p * Math.log(p);
ks.add(s);
if (count > maxCount) {
maxCount = count;
}
if (count == 1) {
nUnique++;
} else {
nRepeated++;
}
}
}
}
// get sum of entropy calculations, and them apply
// adjustment for base 2.
entropy = ks.getSum() / Math.log(2.0);
time1 = System.currentTimeMillis();
double timeToTabulate = (time1 - time0) / 1000.0;
ps.format("Finished processing file in %4.1f seconds%n", timeToTabulate);
ps.format("Size of Counts File %12d%n", countsFile.length());
ps.format("Samples: %12d%n", nSamples);
ps.format("Unique Symbols: %12d%n", nUnique);
ps.format("Repeated Symbols: %12d%n", nRepeated);
ps.format("Total symbols: %12d%n", nSymbols);
ps.format("Max count: %12d%n", maxCount);
ps.format("Entropy: %9.5f%n ", entropy);
} catch (IOException ioex) {
ps.println("IOException accessing " + inputFile.getPath() + ", " + ioex.getMessage());
ioex.printStackTrace(ps);
}
countsFile.delete();
return entropy;
}
Aggregations