use of org.gridfour.gvrs.GvrsFile in project gridfour by gwlucastrig.
the class ReadGvrs method main.
public static void main(String[] args) throws IOException {
PrintStream ps = System.out;
long time0, time1;
if (args.length == 0) {
System.out.println("No input file specified");
System.exit(0);
}
File file = new File(args[0]);
System.out.println("Reading file " + file.getPath());
boolean oneTestPerTile = args.length > 1;
// Open the file. The time required to open the file depends, in part,
// on whether a supplemental index file (.gvrs) is available. To test the
// difference, simply delete the .gvrx file. Deleting the index file
// will also allow you to test whether the .gvrs file can be opened
// successfully when an index file is not availble.
time0 = System.nanoTime();
GvrsFile gvrs = new GvrsFile(file, "r");
time1 = System.nanoTime();
double timeForOpeningFile = (time1 - time0) / 1.0e+6;
// GvrsFile implements a method that allows an application to obtain
// a safe copy of the specification that was used to create the
// original GVRS file. The specification element is the primary
// method for obtaining descriptive metadata about the organization
// of the file. The example that follows demonstrates the use of
// the specification to get some descriptive data.
// Of course, if an application just wants to print that
// metadata, the summarize function is the most efficient way of
// doing so.
GvrsFileSpecification spec = gvrs.getSpecification();
int nRows = spec.getRowsInGrid();
int nCols = spec.getColumnsInGrid();
int nRowsOfTiles = spec.getRowsOfTilesInGrid();
int nColsOfTiles = spec.getColumnsOfTilesInGrid();
int nTiles = nRowsOfTiles * nColsOfTiles;
ps.format("File dimensions%n");
ps.format(" Rows: %8d%n", nRows);
ps.format(" Columns: %8d%n", nCols);
ps.format(" Tiles: %8d%n", nTiles);
ps.format("Time to read header and index %10.1f ms%n", timeForOpeningFile);
gvrs.summarize(ps, true);
// Variable length records can contain either binary or text data.
// The VLR's are read during initial access, though their payload
// (which may be quite large) is not read until requested by the
// application code.
ps.println("\n\nGVRS Metadata");
ps.println("------------------------------------------------");
List<GvrsMetadata> metadataList = gvrs.readMetadata();
for (GvrsMetadata metadata : metadataList) {
String description = metadata.getDescription();
ps.format(" %-24.24s %6d: %s%n", metadata.getName(), metadata.getRecordID(), description == null ? "" : description);
}
gvrs.close();
// we collect a sum of the samples. we don't really care about
// this value, but we collect it to ensure that Java doesn't optimize
// away the actions inside the loop by telling it that we want a
// computed value.
int nTest = 4;
double sumSample = 0;
long nSample = 0;
int rowStep = 1;
int colStep = 1;
if (oneTestPerTile) {
rowStep = spec.getRowsInTile();
colStep = spec.getColumnsInTile();
}
for (int iTest = 0; iTest < nTest; iTest++) {
time0 = System.nanoTime();
gvrs = new GvrsFile(file, "r");
List<GvrsElement> elementList = gvrs.getElements();
GvrsElement zElement = elementList.get(0);
gvrs.setTileCacheSize(GvrsCacheSize.Large);
for (int iRow = 0; iRow < nRows; iRow += rowStep) {
for (int iCol = 0; iCol < nCols; iCol += colStep) {
double sample = zElement.readValue(iRow, iCol);
sumSample += sample;
nSample++;
}
}
time1 = System.nanoTime();
double timeForReadingFile = (time1 - time0) / 1.0e+6;
System.out.format("Time to read all tiles %10.1f ms%n", timeForReadingFile);
if (iTest == nTest - 1) {
// on the last test, summarize
gvrs.summarize(ps, false);
}
gvrs.close();
}
ps.println("Avg Samples " + sumSample / (double) nSample);
}
use of org.gridfour.gvrs.GvrsFile in project gridfour by gwlucastrig.
the class PackageData method process.
void process(PrintStream ps, TestOptions options, String[] args) throws IOException {
// The packaging of data in a Gvrs file can be thought of in terms of
// the steps shown below.
//
// 0. Obtain descriptive parameters about source data. In this
// case, the application is packing data from a NetCDF source
// and most of the descriptive parameters follow the pattern
// established in the earlier ExtractData.java demonstration
//
// 1. Define the fixed metadata about the file (it's dimensions,
// data type, tile organization, etc.) using a GvrsFileSpecification
// object.
//
// 2. Open a new GvrsFile object using the settings created in step 1.
// Adjust any run-time parameters (such as the tile-cache size)
// according to the needs of the application.
//
// 3. Extract the data from its source and store in the Gvrs file.
//
ps.format("%nGvrs Packaging Application for NetCDF-format Global DEM files%n");
Locale locale = Locale.getDefault();
Date date = new Date();
SimpleDateFormat sdFormat = new SimpleDateFormat("dd MMM yyyy HH:mm z", locale);
ps.format("Date of Execution: %s%n", sdFormat.format(date));
String inputPath = options.getInputFile().getPath();
File outputFile = options.getOutputFile();
if (outputFile == null) {
ps.format("Missing specification for output file%n");
ps.format("Packaging application terminated%n");
return;
}
ps.format("Input file: %s%n", inputPath);
ps.format("Output file: %s%n", outputFile.getPath());
boolean[] matched = new boolean[args.length];
boolean useLsop = options.scanBooleanOption(args, "-lsop", matched, false);
// Open the NetCDF file -----------------------------------
ps.println("Opening NetCDF input file");
NetcdfFile ncfile = NetcdfFile.open(inputPath);
// Identify which Variable instances carry information about the
// geographic (latitude/longitude) coordinate system and also which
// carry information for elevation and bathymetry.
// the Variable that carries row-latitude information
Variable lat;
// the Variable that carries column-longitude information
Variable lon;
// the variable that carries elevation and bathymetry
Variable z;
lat = ncfile.findVariable("lat");
lon = ncfile.findVariable("lon");
z = ncfile.findVariable("elevation");
int[] tileSize;
// Use the input file name to format a product label
File inputFile = new File(inputPath);
String productLabel = inputFile.getName();
if (productLabel.toLowerCase().endsWith(".nc")) {
productLabel = productLabel.substring(0, productLabel.length() - 3);
}
if (lat == null) {
// ETOPO1 specification
tileSize = options.getTileSize(90, 120);
lat = ncfile.findVariable("y");
lon = ncfile.findVariable("x");
z = ncfile.findVariable("z");
} else {
tileSize = options.getTileSize(90, 120);
}
if (lat == null || lon == null || z == null) {
throw new IllegalArgumentException("Input does not contain valid lat,lon, and elevation Variables");
}
// using the variables from above, extract coordinate system
// information for the product and print it to the output.
ExtractionCoordinates extractionCoords = new ExtractionCoordinates(lat, lon);
extractionCoords.summarizeCoordinates(ps);
// Get the dimensions of the raster (grid) elevation/bathymetry data.
// should be 1.
int rank = z.getRank();
int[] shape = z.getShape();
int nRows = shape[0];
int nCols = shape[1];
ps.format("Rows: %8d%n", nRows);
ps.format("Columns: %8d%n", nCols);
int nRowsInTile = tileSize[0];
int nColsInTile = tileSize[1];
// Initialize the specification used to initialize the Gvrs file -------
GvrsFileSpecification spec = new GvrsFileSpecification(nRows, nCols, nRowsInTile, nColsInTile);
spec.setLabel(productLabel);
// Initialize the data type. If a zScale option was specified,
// use integer-coded floats. Otherwise, pick the data type
// based on whether the NetCDF file gives integral or floating point
// data.
boolean isZScaleSpecified = options.isZScaleSpecified();
float zScale = (float) options.getZScale();
float zOffset = (float) options.getZOffset();
// data type from NetCDF file
DataType sourceDataType = z.getDataType();
GvrsElementSpecification elementSpec = null;
GvrsElementType gvrsDataType;
if (isZScaleSpecified) {
// the options define our data type
int encodedLimitDepth = (int) ((LIMIT_DEPTH - zOffset) * zScale);
int encodedLimitElev = (int) ((LIMIT_ELEVATION - zOffset) * zScale);
elementSpec = new GvrsElementSpecificationIntCodedFloat("z", zScale, zOffset, encodedLimitDepth, encodedLimitElev, Integer.MIN_VALUE, true);
spec.addElementSpecification(elementSpec);
gvrsDataType = GvrsElementType.INT_CODED_FLOAT;
} else if (sourceDataType.isIntegral()) {
elementSpec = new GvrsElementSpecificationShort("z", LIMIT_DEPTH, LIMIT_ELEVATION, FILL_VALUE);
spec.addElementSpecification(elementSpec);
gvrsDataType = GvrsElementType.SHORT;
} else {
elementSpec = new GvrsElementSpecificationFloat("z", LIMIT_DEPTH, LIMIT_ELEVATION, Float.NaN);
spec.addElementSpecification(elementSpec);
gvrsDataType = GvrsElementType.FLOAT;
}
elementSpec.setDescription("Elevation (positive values) or depth (negative), in meters");
elementSpec.setUnitOfMeasure("m");
// Example with special character
elementSpec.setLabel("die H\u00f6henlage");
ps.println("Source date type " + sourceDataType + ", stored as " + gvrsDataType);
ps.println("");
// Determine whether data compression is used -------------------
boolean compressionEnabled = options.isCompressionEnabled();
spec.setDataCompressionEnabled(compressionEnabled);
boolean checksumsEnalbed = options.isChecksumComputationEnabled();
spec.setChecksumEnabled(checksumsEnalbed);
boolean bigAddressSpaceEnabled = options.isBigAddressSpaceEnabled();
spec.setExtendedFileSizeEnabled(bigAddressSpaceEnabled);
double[] geoCoords = extractionCoords.getGeographicCoordinateBounds();
spec.setGeographicCoordinates(geoCoords[0], geoCoords[1], geoCoords[2], geoCoords[3]);
// Check to verify that the geographic coordinates and grid coordinate
// are correctly implemented. This test is not truly part of the packaging
// process (since it should always work), but is included here as a
// diagnostic.
extractionCoords.checkSpecificationTransform(ps, spec);
// is enabled and the data type is integral.
if (useLsop) {
LsCodecUtility.addLsopToSpecification(spec, false);
}
// Create the output file and store the content from the input file.
if (outputFile.exists()) {
ps.println("Output file exists. Removing old file");
boolean status = outputFile.delete();
if (!status) {
ps.println("Removal attempt failed");
return;
}
}
ps.println("Begin processing");
double zMin = Double.POSITIVE_INFINITY;
double zMax = Double.NEGATIVE_INFINITY;
double zSum = 0;
long nSum = 0;
try (GvrsFile gvrs = new GvrsFile(outputFile, spec)) {
gvrs.writeMetadata(GvrsMnc.Copyright, "This data is in the public domain and may be used free of charge");
gvrs.writeMetadata(GvrsMnc.TermsOfUse, "This data should not be used for navigation");
GvrsElement zElement = gvrs.getElement("z");
gvrs.setTileCacheSize(GvrsCacheSize.Large);
storeGeoreferencingInformation(gvrs);
// Initialize data-statistics collection ---------------------------
// we happen to know the range of values for the global DEM a-priori.
// it ranges from about -11000 to 8650. This allows us to tabulate counts
// of which values we find in the data source. We can use this information
// to estimate the entropy of the source data and make a realistic
// assessment of how many bytes would be needed to store them.
InputDataStatCollector stats = new InputDataStatCollector(-11000, 8650, zScale);
int[] readOrigin = new int[rank];
int[] readShape = new int[rank];
// -----------------------------------------------------------------
// Package the data
long time0 = System.currentTimeMillis();
for (int iRow = 0; iRow < nRows; iRow++) {
if (iRow % 1000 == 999) {
long time1 = System.currentTimeMillis();
double deltaT = time1 - time0;
// rows per millis
double rate = (iRow + 1) / deltaT;
int nRemaining = nRows - iRow;
long remainingT = (long) (nRemaining / rate);
Date d = new Date(time1 + remainingT);
ps.format("Completed %d rows, %4.1f%% of total, est completion at %s%n", iRow + 1, 100.0 * (double) iRow / (nRows - 1.0), d);
ps.flush();
}
int row0 = iRow;
int col0 = 0;
readOrigin[0] = row0;
readOrigin[1] = col0;
readShape[0] = 1;
readShape[1] = nCols;
// happen in this application unless the input file is corrupt.
try {
Array array = z.read(readOrigin, readShape);
// and store it in the Gvrs file.
switch(gvrsDataType) {
case INTEGER:
case SHORT:
for (int iCol = 0; iCol < nCols; iCol++) {
int sample = array.getInt(iCol);
zElement.writeValueInt(iRow, iCol, sample);
stats.addSample(sample);
if (sample < zMin) {
zMin = sample;
}
if (sample > zMax) {
zMax = sample;
}
zSum += sample;
nSum++;
}
break;
case INT_CODED_FLOAT:
case FLOAT:
default:
for (int iCol = 0; iCol < nCols; iCol++) {
float sample = array.getFloat(iCol);
zElement.writeValue(iRow, iCol, sample);
stats.addSample(sample);
if (sample < zMin) {
zMin = sample;
}
if (sample > zMax) {
zMax = sample;
}
zSum += sample;
nSum++;
}
}
} catch (InvalidRangeException irex) {
throw new IOException(irex.getMessage(), irex);
}
}
gvrs.flush();
long time1 = System.currentTimeMillis();
double timeToProcess = (time1 - time0) / 1000.0;
ps.format("Finished processing file in %4.1f seconds%n", timeToProcess);
ps.format("Entropy for input data %4.1f bits/sample%n", stats.getEntropy());
long outputSize = outputFile.length();
long nCells = (long) nRows * (long) nCols;
double bitsPerSymbol = 8.0 * (double) outputSize / (double) nCells;
ps.format("Storage used (including overhead) %6.4f bits/sample%n", bitsPerSymbol);
ps.format("%nSummary of file content and packaging actions------------%n");
gvrs.summarize(ps, true);
ps.format("Range of z values:%n");
ps.format(" Min z: %8.3f%n", zMin);
ps.format(" Max z: %8.3f%n", zMax);
ps.format(" Avg z: %8.3f%n", zSum / (nSum > 0 ? nSum : 1));
}
// to those of the source data.
if (options.isVerificationEnabled()) {
int[] readOrigin = new int[rank];
int[] readShape = new int[rank];
ps.println("\nTesting product for data consistency with source");
ps.println("Opening gvrs file for reading");
long time0 = System.currentTimeMillis();
try (GvrsFile gvrs = new GvrsFile(outputFile, "r")) {
long time1 = System.currentTimeMillis();
ps.println("Opening complete in " + (time1 - time0) + " ms");
GvrsFileSpecification testSpec = gvrs.getSpecification();
String testLabel = testSpec.getLabel();
ps.println("Label: " + testLabel);
GvrsMetadata m = gvrs.readMetadata("Copyright", 0);
if (m != null) {
ps.println("Copyright: " + m.getString());
}
GvrsElement zElement = gvrs.getElement("z");
ps.println("Element: " + zElement.getName() + ", " + zElement.getDescription());
gvrs.setTileCacheSize(GvrsCacheSize.Large);
for (int iRow = 0; iRow < nRows; iRow++) {
if (iRow % 10000 == 9999) {
time1 = System.currentTimeMillis();
double deltaT = time1 - time0;
// rows per millis
double rate = (iRow + 1) / deltaT;
int nRemaining = nRows - iRow;
long remainingT = (long) (nRemaining / rate);
Date d = new Date(time1 + remainingT);
ps.format("Completed %d rows, %4.1f%% of total, est completion at %s%n", iRow + 1, 100.0 * (double) iRow / (nRows - 1.0), d);
ps.flush();
}
int row0 = iRow;
int col0 = 0;
readOrigin[0] = row0;
readOrigin[1] = col0;
readShape[0] = 1;
readShape[1] = nCols;
try {
Array array = z.read(readOrigin, readShape);
switch(gvrsDataType) {
case INTEGER:
for (int iCol = 0; iCol < nCols; iCol++) {
int sample = array.getInt(iCol);
int test = zElement.readValueInt(iRow, iCol);
if (sample != test) {
ps.println("Failure at " + iRow + ", " + iCol);
test = zElement.readValueInt(iRow, iCol);
System.exit(-1);
}
}
break;
case INT_CODED_FLOAT:
for (int iCol = 0; iCol < nCols; iCol++) {
double sample = array.getDouble(iCol);
int iSample = (int) ((sample - zOffset) * zScale + 0.5);
float fSample = iSample / zScale + zOffset;
float test = zElement.readValue(iRow, iCol);
double delta = Math.abs(fSample - test);
if (delta > 1.01 / zScale) {
ps.println("Failure at " + iRow + ", " + iCol);
System.exit(-1);
}
}
break;
case FLOAT:
default:
for (int iCol = 0; iCol < nCols; iCol++) {
float sample = array.getFloat(iCol);
float test = zElement.readValue(iRow, iCol);
if (sample != test) {
ps.println("Failure at " + iRow + ", " + iCol);
test = zElement.readValueInt(iRow, iCol);
System.exit(-1);
}
}
}
} catch (InvalidRangeException irex) {
throw new IOException(irex.getMessage(), irex);
}
}
time1 = System.currentTimeMillis();
ps.println("Exhaustive cross check complete in " + (time1 - time0) + " ms");
gvrs.summarize(ps, false);
}
}
ncfile.close();
}
use of org.gridfour.gvrs.GvrsFile in project gridfour by gwlucastrig.
the class GvrsReadPerformance method testTileBlockScan.
void testTileBlockScan(PrintStream ps) throws IOException {
try (GvrsFile gvrs = new GvrsFile(inputFile, "r")) {
gvrs.setTileCacheSize(GvrsCacheSize.Large);
List<GvrsElement> elementList = gvrs.getElements();
GvrsElement element = elementList.get(0);
GvrsElementType dType = element.getDataType();
double avgValue = 0;
long nSample = 0;
long time0 = System.nanoTime();
double sum = 0;
if (dType == GvrsElementType.INTEGER || dType == GvrsElementType.SHORT) {
for (int iRow = 0; iRow < nRowsOfTiles; iRow++) {
for (int iCol = 0; iCol < nColsOfTiles; iCol++) {
int row0 = iRow * nRowsInTile;
int col0 = iCol * nColsInTile;
int[] block = element.readBlockInt(row0, col0, nRowsInTile, nColsInTile);
for (int sample : block) {
sum += sample;
nSample++;
}
}
}
} else {
for (int iRow = 0; iRow < nRowsOfTiles; iRow++) {
for (int iCol = 0; iCol < nColsOfTiles; iCol++) {
int row0 = iRow * nRowsInTile;
int col0 = iCol * nColsInTile;
float[] block = element.readBlock(row0, col0, nRowsInTile, nColsInTile);
for (float sample : block) {
if (!Float.isNaN(sample)) {
sum += sample;
nSample++;
}
}
}
}
}
if (nSample > 0) {
avgValue = sum / (double) nSample;
}
long time1 = System.nanoTime();
double deltaT = (time1 - time0) / 1.0e+9;
report(ps, "Block Test", deltaT, avgValue, nSample);
}
}
use of org.gridfour.gvrs.GvrsFile in project gridfour by gwlucastrig.
the class EntropyTabulator method process.
/**
* Process the specified GVRS file and write a report to the specified print
* stream.
* <p>
* If configured to do so, this method will write progress reports to the
* specified print stream.
*
* @param ps a valid print stream, System.out is a valid candidate
* @param inputFile a reference to a GVRS file
* @param showProgress indicates if progress reports are to be printed during
* processing
* @return on successful completion, a valid floating-point value; otherwise,
* a Double.NaN.
*/
public double process(PrintStream ps, File inputFile, boolean showProgress) {
double entropy = Double.NaN;
ps.format("%nEntropy tabulation for GVRS files%n");
Locale locale = Locale.getDefault();
Date date = new Date();
SimpleDateFormat sdFormat = new SimpleDateFormat("dd MMM yyyy HH:mm z", locale);
ps.format("Date of Execution: %s%n", sdFormat.format(date));
String inputPath = inputFile.getPath();
ps.format("Input file: %s%n", inputPath);
File parent = inputFile.getParentFile();
File countsFile = new File(parent, TEMP_COUNT_FILE_NAME);
// Define the specs for the entropy stats file
GvrsFileSpecification countsSpec = new GvrsFileSpecification(65536, 65536, 256, 256);
countsSpec.setDataCompressionEnabled(false);
GvrsElementSpecificationInt countsElementSpec = new GvrsElementSpecificationInt("counts", 0);
countsSpec.addElementSpecification(countsElementSpec);
try (GvrsFile source = new GvrsFile(inputFile, "r");
GvrsFile counts = new GvrsFile(countsFile, countsSpec)) {
GvrsFileSpecification sourceSpec = source.getSpecification();
int nRowsInSource = sourceSpec.getRowsInGrid();
int nColsInSource = sourceSpec.getColumnsInGrid();
int nRowsOfTilesInSource = sourceSpec.getRowsOfTilesInGrid();
int nColsOfTilesInSource = sourceSpec.getColumnsOfTilesInGrid();
int nRowsInTile = sourceSpec.getRowsInTile();
int nColsInTile = sourceSpec.getColumnsInTile();
GvrsElement sourceElement = source.getElements().get(0);
GvrsElementType sourceDataType = sourceElement.getDataType();
GvrsElement countsElement = counts.getElement("counts");
long nSamples = 0;
long nSymbols = 0;
ps.println("Source File " + inputFile.getName());
ps.format(" Rows: %8d%n", nRowsInSource);
ps.format(" Columns: %8d%n", nColsInSource);
source.setTileCacheSize(GvrsCacheSize.Small);
counts.setTileCacheSize(2000);
long time0 = System.currentTimeMillis();
if (showProgress) {
ps.format("Initializing temporary entropy tabulation file %s%n", countsFile.getPath());
ps.flush();
}
// Package the data
if (showProgress) {
ps.format("Initialization done in %d ms%n", System.currentTimeMillis() - time0);
ps.println("Beginning tabulation");
}
time0 = System.currentTimeMillis();
for (int iTileRow = 0; iTileRow < nRowsOfTilesInSource; iTileRow++) {
if (showProgress && iTileRow > 0) {
long time1 = System.currentTimeMillis();
double deltaT = time1 - time0;
// rows per millis
double rate = (iTileRow + 1) / deltaT;
int nRemaining = nRowsOfTilesInSource - iTileRow;
long remainingT = (long) (nRemaining / rate);
Date d = new Date(time1 + remainingT);
ps.format("Surveyed %d rows, %4.1f%% of total, est completion at %s%n", iTileRow * nRowsInTile, 100.0 * (double) iTileRow / (nRowsOfTilesInSource - 1.0), d);
ps.flush();
}
int row0 = iTileRow * nRowsInTile;
int row1 = row0 + nRowsInTile;
if (row1 > nRowsInSource) {
row1 = nRowsInSource;
}
for (int iTileCol = 0; iTileCol < nColsOfTilesInSource; iTileCol++) {
int col0 = iTileCol * nColsInTile;
int col1 = col0 + nColsInTile;
if (col1 > nColsInSource) {
col1 = nColsInSource;
}
for (int iRow = row0; iRow < row1; iRow++) {
for (int iCol = col0; iCol < col1; iCol++) {
int bits;
if (sourceDataType == GvrsElementType.FLOAT) {
float sample = sourceElement.readValue(iRow, iCol);
bits = Float.floatToRawIntBits(sample);
} else {
bits = sourceElement.readValueInt(iRow, iCol);
}
long longIndex = ((long) bits) & 0x00ffffffffL;
long longRow = longIndex / 65536L;
long longCol = longIndex - longRow * 65536L;
int count = countsElement.readValueInt((int) longRow, (int) longCol);
countsElement.writeValueInt((int) longRow, (int) longCol, count + 1);
nSamples++;
if (count == 0) {
nSymbols++;
}
}
}
}
}
counts.flush();
long time1 = System.currentTimeMillis();
double timeToProcess = (time1 - time0) / 1000.0;
if (showProgress) {
ps.format("Finished surveying source file in %4.1f seconds%n", timeToProcess);
ps.format("Performing tabulation of count data%n");
ps.flush();
}
time0 = System.currentTimeMillis();
double nSamplesDouble = (double) nSamples;
int maxCount = 0;
long nUnique = 0;
long nRepeated = 0;
KahanSummation ks = new KahanSummation();
for (int iRow = 0; iRow < 65536; iRow++) {
if (showProgress && (iRow & 1023) == 0 && iRow > 0) {
time1 = System.currentTimeMillis();
double deltaT = time1 - time0;
// rows per millis
double rate = (iRow + 1) / deltaT;
int nRemaining = 65536 - iRow;
long remainingT = (long) (nRemaining / rate);
Date d = new Date(time1 + remainingT);
ps.format("Tabulated %d rows, %4.1f%% of total, est completion at %s%n", iRow, 100.0 * (double) iRow / 65536.0, d);
ps.flush();
}
for (int iCol = 0; iCol < 65536; iCol++) {
int count = countsElement.readValueInt(iRow, iCol);
if (count > 0) {
double p = (double) count / nSamplesDouble;
double s = -p * Math.log(p);
ks.add(s);
if (count > maxCount) {
maxCount = count;
}
if (count == 1) {
nUnique++;
} else {
nRepeated++;
}
}
}
}
// get sum of entropy calculations, and them apply
// adjustment for base 2.
entropy = ks.getSum() / Math.log(2.0);
time1 = System.currentTimeMillis();
double timeToTabulate = (time1 - time0) / 1000.0;
ps.format("Finished processing file in %4.1f seconds%n", timeToTabulate);
ps.format("Size of Counts File %12d%n", countsFile.length());
ps.format("Samples: %12d%n", nSamples);
ps.format("Unique Symbols: %12d%n", nUnique);
ps.format("Repeated Symbols: %12d%n", nRepeated);
ps.format("Total symbols: %12d%n", nSymbols);
ps.format("Max count: %12d%n", maxCount);
ps.format("Entropy: %9.5f%n ", entropy);
} catch (IOException ioex) {
ps.println("IOException accessing " + inputFile.getPath() + ", " + ioex.getMessage());
ioex.printStackTrace(ps);
}
countsFile.delete();
return entropy;
}
Aggregations