Search in sources :

Example 6 with GvrsElement

use of org.gridfour.gvrs.GvrsElement in project gridfour by gwlucastrig.

the class ReadGvrs method main.

public static void main(String[] args) throws IOException {
    PrintStream ps = System.out;
    long time0, time1;
    if (args.length == 0) {
        System.out.println("No input file specified");
        System.exit(0);
    }
    File file = new File(args[0]);
    System.out.println("Reading file " + file.getPath());
    boolean oneTestPerTile = args.length > 1;
    // Open the file.  The time required to open the file depends, in part,
    // on whether a supplemental index file (.gvrs) is available.  To test the
    // difference, simply delete the .gvrx file.   Deleting the index file
    // will also allow you to test whether the .gvrs file can be opened
    // successfully when an index file is not availble.
    time0 = System.nanoTime();
    GvrsFile gvrs = new GvrsFile(file, "r");
    time1 = System.nanoTime();
    double timeForOpeningFile = (time1 - time0) / 1.0e+6;
    // GvrsFile implements a method that allows an application to obtain
    // a safe copy of the specification that was used to create the
    // original GVRS file.  The specification element is the primary
    // method for obtaining descriptive metadata about the organization
    // of the file.   The example that follows demonstrates the use of
    // the specification to get some descriptive data.
    // Of course, if an application just wants to print that
    // metadata, the summarize function is the most efficient way of
    // doing so.
    GvrsFileSpecification spec = gvrs.getSpecification();
    int nRows = spec.getRowsInGrid();
    int nCols = spec.getColumnsInGrid();
    int nRowsOfTiles = spec.getRowsOfTilesInGrid();
    int nColsOfTiles = spec.getColumnsOfTilesInGrid();
    int nTiles = nRowsOfTiles * nColsOfTiles;
    ps.format("File dimensions%n");
    ps.format("  Rows:      %8d%n", nRows);
    ps.format("  Columns:   %8d%n", nCols);
    ps.format("  Tiles:     %8d%n", nTiles);
    ps.format("Time to read header and index %10.1f ms%n", timeForOpeningFile);
    gvrs.summarize(ps, true);
    // Variable length records can contain either binary or text data.
    // The VLR's are read during initial access, though their payload
    // (which may be quite large) is not read until requested by the
    // application code.
    ps.println("\n\nGVRS Metadata");
    ps.println("------------------------------------------------");
    List<GvrsMetadata> metadataList = gvrs.readMetadata();
    for (GvrsMetadata metadata : metadataList) {
        String description = metadata.getDescription();
        ps.format("  %-24.24s  %6d:  %s%n", metadata.getName(), metadata.getRecordID(), description == null ? "" : description);
    }
    gvrs.close();
    // we collect a sum of the samples.  we don't really care about
    // this value, but we collect it to ensure that Java doesn't optimize
    // away the actions inside the loop by telling it that we want a
    // computed value.
    int nTest = 4;
    double sumSample = 0;
    long nSample = 0;
    int rowStep = 1;
    int colStep = 1;
    if (oneTestPerTile) {
        rowStep = spec.getRowsInTile();
        colStep = spec.getColumnsInTile();
    }
    for (int iTest = 0; iTest < nTest; iTest++) {
        time0 = System.nanoTime();
        gvrs = new GvrsFile(file, "r");
        List<GvrsElement> elementList = gvrs.getElements();
        GvrsElement zElement = elementList.get(0);
        gvrs.setTileCacheSize(GvrsCacheSize.Large);
        for (int iRow = 0; iRow < nRows; iRow += rowStep) {
            for (int iCol = 0; iCol < nCols; iCol += colStep) {
                double sample = zElement.readValue(iRow, iCol);
                sumSample += sample;
                nSample++;
            }
        }
        time1 = System.nanoTime();
        double timeForReadingFile = (time1 - time0) / 1.0e+6;
        System.out.format("Time to read all tiles        %10.1f ms%n", timeForReadingFile);
        if (iTest == nTest - 1) {
            // on the last test, summarize
            gvrs.summarize(ps, false);
        }
        gvrs.close();
    }
    ps.println("Avg Samples " + sumSample / (double) nSample);
}
Also used : PrintStream(java.io.PrintStream) GvrsElement(org.gridfour.gvrs.GvrsElement) GvrsMetadata(org.gridfour.gvrs.GvrsMetadata) GvrsFileSpecification(org.gridfour.gvrs.GvrsFileSpecification) GvrsFile(org.gridfour.gvrs.GvrsFile) File(java.io.File) GvrsFile(org.gridfour.gvrs.GvrsFile)

Example 7 with GvrsElement

use of org.gridfour.gvrs.GvrsElement in project gridfour by gwlucastrig.

the class PackageData method process.

void process(PrintStream ps, TestOptions options, String[] args) throws IOException {
    // The packaging of data in a Gvrs file can be thought of in terms of
    // the steps shown below.
    // 
    // 0.  Obtain descriptive parameters about source data.  In this
    // case, the application is packing data from a NetCDF source
    // and most of the descriptive parameters follow the pattern
    // established in the earlier ExtractData.java demonstration
    // 
    // 1.  Define the fixed metadata about the file (it's dimensions,
    // data type, tile organization, etc.) using a GvrsFileSpecification
    // object.
    // 
    // 2.  Open a new GvrsFile object using the settings created in step 1.
    // Adjust any run-time parameters (such as the tile-cache size)
    // according to the needs of the application.
    // 
    // 3.  Extract the data from its source and store in the Gvrs file.
    // 
    ps.format("%nGvrs Packaging Application for NetCDF-format Global DEM files%n");
    Locale locale = Locale.getDefault();
    Date date = new Date();
    SimpleDateFormat sdFormat = new SimpleDateFormat("dd MMM yyyy HH:mm z", locale);
    ps.format("Date of Execution: %s%n", sdFormat.format(date));
    String inputPath = options.getInputFile().getPath();
    File outputFile = options.getOutputFile();
    if (outputFile == null) {
        ps.format("Missing specification for output file%n");
        ps.format("Packaging application terminated%n");
        return;
    }
    ps.format("Input file:  %s%n", inputPath);
    ps.format("Output file: %s%n", outputFile.getPath());
    boolean[] matched = new boolean[args.length];
    boolean useLsop = options.scanBooleanOption(args, "-lsop", matched, false);
    // Open the NetCDF file -----------------------------------
    ps.println("Opening NetCDF input file");
    NetcdfFile ncfile = NetcdfFile.open(inputPath);
    // Identify which Variable instances carry information about the
    // geographic (latitude/longitude) coordinate system and also which
    // carry information for elevation and bathymetry.
    // the Variable that carries row-latitude information
    Variable lat;
    // the Variable that carries column-longitude information
    Variable lon;
    // the variable that carries elevation and bathymetry
    Variable z;
    lat = ncfile.findVariable("lat");
    lon = ncfile.findVariable("lon");
    z = ncfile.findVariable("elevation");
    int[] tileSize;
    // Use the input file name to format a product label
    File inputFile = new File(inputPath);
    String productLabel = inputFile.getName();
    if (productLabel.toLowerCase().endsWith(".nc")) {
        productLabel = productLabel.substring(0, productLabel.length() - 3);
    }
    if (lat == null) {
        // ETOPO1 specification
        tileSize = options.getTileSize(90, 120);
        lat = ncfile.findVariable("y");
        lon = ncfile.findVariable("x");
        z = ncfile.findVariable("z");
    } else {
        tileSize = options.getTileSize(90, 120);
    }
    if (lat == null || lon == null || z == null) {
        throw new IllegalArgumentException("Input does not contain valid lat,lon, and elevation Variables");
    }
    // using the variables from above, extract coordinate system
    // information for the product and print it to the output.
    ExtractionCoordinates extractionCoords = new ExtractionCoordinates(lat, lon);
    extractionCoords.summarizeCoordinates(ps);
    // Get the dimensions of the raster (grid) elevation/bathymetry data.
    // should be 1.
    int rank = z.getRank();
    int[] shape = z.getShape();
    int nRows = shape[0];
    int nCols = shape[1];
    ps.format("Rows:      %8d%n", nRows);
    ps.format("Columns:   %8d%n", nCols);
    int nRowsInTile = tileSize[0];
    int nColsInTile = tileSize[1];
    // Initialize the specification used to initialize the Gvrs file -------
    GvrsFileSpecification spec = new GvrsFileSpecification(nRows, nCols, nRowsInTile, nColsInTile);
    spec.setLabel(productLabel);
    // Initialize the data type.  If a zScale option was specified,
    // use integer-coded floats.  Otherwise, pick the data type
    // based on whether the NetCDF file gives integral or floating point
    // data.
    boolean isZScaleSpecified = options.isZScaleSpecified();
    float zScale = (float) options.getZScale();
    float zOffset = (float) options.getZOffset();
    // data type from NetCDF file
    DataType sourceDataType = z.getDataType();
    GvrsElementSpecification elementSpec = null;
    GvrsElementType gvrsDataType;
    if (isZScaleSpecified) {
        // the options define our data type
        int encodedLimitDepth = (int) ((LIMIT_DEPTH - zOffset) * zScale);
        int encodedLimitElev = (int) ((LIMIT_ELEVATION - zOffset) * zScale);
        elementSpec = new GvrsElementSpecificationIntCodedFloat("z", zScale, zOffset, encodedLimitDepth, encodedLimitElev, Integer.MIN_VALUE, true);
        spec.addElementSpecification(elementSpec);
        gvrsDataType = GvrsElementType.INT_CODED_FLOAT;
    } else if (sourceDataType.isIntegral()) {
        elementSpec = new GvrsElementSpecificationShort("z", LIMIT_DEPTH, LIMIT_ELEVATION, FILL_VALUE);
        spec.addElementSpecification(elementSpec);
        gvrsDataType = GvrsElementType.SHORT;
    } else {
        elementSpec = new GvrsElementSpecificationFloat("z", LIMIT_DEPTH, LIMIT_ELEVATION, Float.NaN);
        spec.addElementSpecification(elementSpec);
        gvrsDataType = GvrsElementType.FLOAT;
    }
    elementSpec.setDescription("Elevation (positive values) or depth (negative), in meters");
    elementSpec.setUnitOfMeasure("m");
    // Example with special character
    elementSpec.setLabel("die H\u00f6henlage");
    ps.println("Source date type " + sourceDataType + ", stored as " + gvrsDataType);
    ps.println("");
    // Determine whether data compression is used -------------------
    boolean compressionEnabled = options.isCompressionEnabled();
    spec.setDataCompressionEnabled(compressionEnabled);
    boolean checksumsEnalbed = options.isChecksumComputationEnabled();
    spec.setChecksumEnabled(checksumsEnalbed);
    boolean bigAddressSpaceEnabled = options.isBigAddressSpaceEnabled();
    spec.setExtendedFileSizeEnabled(bigAddressSpaceEnabled);
    double[] geoCoords = extractionCoords.getGeographicCoordinateBounds();
    spec.setGeographicCoordinates(geoCoords[0], geoCoords[1], geoCoords[2], geoCoords[3]);
    // Check to verify that the geographic coordinates and grid coordinate
    // are correctly implemented. This test is not truly part of the packaging
    // process (since it should always work), but is included here as a
    // diagnostic.
    extractionCoords.checkSpecificationTransform(ps, spec);
    // is enabled and the data type is integral.
    if (useLsop) {
        LsCodecUtility.addLsopToSpecification(spec, false);
    }
    // Create the output file and store the content from the input file.
    if (outputFile.exists()) {
        ps.println("Output file exists. Removing old file");
        boolean status = outputFile.delete();
        if (!status) {
            ps.println("Removal attempt failed");
            return;
        }
    }
    ps.println("Begin processing");
    double zMin = Double.POSITIVE_INFINITY;
    double zMax = Double.NEGATIVE_INFINITY;
    double zSum = 0;
    long nSum = 0;
    try (GvrsFile gvrs = new GvrsFile(outputFile, spec)) {
        gvrs.writeMetadata(GvrsMnc.Copyright, "This data is in the public domain and may be used free of charge");
        gvrs.writeMetadata(GvrsMnc.TermsOfUse, "This data should not be used for navigation");
        GvrsElement zElement = gvrs.getElement("z");
        gvrs.setTileCacheSize(GvrsCacheSize.Large);
        storeGeoreferencingInformation(gvrs);
        // Initialize data-statistics collection ---------------------------
        // we happen to know the range of values for the global DEM a-priori.
        // it ranges from about -11000 to 8650.  This allows us to tabulate counts
        // of which values we find in the data source.  We can use this information
        // to estimate the entropy of the source data and make a realistic
        // assessment of how many bytes would be needed to store them.
        InputDataStatCollector stats = new InputDataStatCollector(-11000, 8650, zScale);
        int[] readOrigin = new int[rank];
        int[] readShape = new int[rank];
        // -----------------------------------------------------------------
        // Package the data
        long time0 = System.currentTimeMillis();
        for (int iRow = 0; iRow < nRows; iRow++) {
            if (iRow % 1000 == 999) {
                long time1 = System.currentTimeMillis();
                double deltaT = time1 - time0;
                // rows per millis
                double rate = (iRow + 1) / deltaT;
                int nRemaining = nRows - iRow;
                long remainingT = (long) (nRemaining / rate);
                Date d = new Date(time1 + remainingT);
                ps.format("Completed %d rows, %4.1f%% of total, est completion at %s%n", iRow + 1, 100.0 * (double) iRow / (nRows - 1.0), d);
                ps.flush();
            }
            int row0 = iRow;
            int col0 = 0;
            readOrigin[0] = row0;
            readOrigin[1] = col0;
            readShape[0] = 1;
            readShape[1] = nCols;
            // happen in this application unless the input file is corrupt.
            try {
                Array array = z.read(readOrigin, readShape);
                // and store it in the Gvrs file.
                switch(gvrsDataType) {
                    case INTEGER:
                    case SHORT:
                        for (int iCol = 0; iCol < nCols; iCol++) {
                            int sample = array.getInt(iCol);
                            zElement.writeValueInt(iRow, iCol, sample);
                            stats.addSample(sample);
                            if (sample < zMin) {
                                zMin = sample;
                            }
                            if (sample > zMax) {
                                zMax = sample;
                            }
                            zSum += sample;
                            nSum++;
                        }
                        break;
                    case INT_CODED_FLOAT:
                    case FLOAT:
                    default:
                        for (int iCol = 0; iCol < nCols; iCol++) {
                            float sample = array.getFloat(iCol);
                            zElement.writeValue(iRow, iCol, sample);
                            stats.addSample(sample);
                            if (sample < zMin) {
                                zMin = sample;
                            }
                            if (sample > zMax) {
                                zMax = sample;
                            }
                            zSum += sample;
                            nSum++;
                        }
                }
            } catch (InvalidRangeException irex) {
                throw new IOException(irex.getMessage(), irex);
            }
        }
        gvrs.flush();
        long time1 = System.currentTimeMillis();
        double timeToProcess = (time1 - time0) / 1000.0;
        ps.format("Finished processing file in %4.1f seconds%n", timeToProcess);
        ps.format("Entropy for input data %4.1f bits/sample%n", stats.getEntropy());
        long outputSize = outputFile.length();
        long nCells = (long) nRows * (long) nCols;
        double bitsPerSymbol = 8.0 * (double) outputSize / (double) nCells;
        ps.format("Storage used (including overhead) %6.4f bits/sample%n", bitsPerSymbol);
        ps.format("%nSummary of file content and packaging actions------------%n");
        gvrs.summarize(ps, true);
        ps.format("Range of z values:%n");
        ps.format("  Min z: %8.3f%n", zMin);
        ps.format("  Max z: %8.3f%n", zMax);
        ps.format("  Avg z: %8.3f%n", zSum / (nSum > 0 ? nSum : 1));
    }
    // to those of the source data.
    if (options.isVerificationEnabled()) {
        int[] readOrigin = new int[rank];
        int[] readShape = new int[rank];
        ps.println("\nTesting product for data consistency with source");
        ps.println("Opening gvrs file for reading");
        long time0 = System.currentTimeMillis();
        try (GvrsFile gvrs = new GvrsFile(outputFile, "r")) {
            long time1 = System.currentTimeMillis();
            ps.println("Opening complete in " + (time1 - time0) + " ms");
            GvrsFileSpecification testSpec = gvrs.getSpecification();
            String testLabel = testSpec.getLabel();
            ps.println("Label:     " + testLabel);
            GvrsMetadata m = gvrs.readMetadata("Copyright", 0);
            if (m != null) {
                ps.println("Copyright: " + m.getString());
            }
            GvrsElement zElement = gvrs.getElement("z");
            ps.println("Element:   " + zElement.getName() + ", " + zElement.getDescription());
            gvrs.setTileCacheSize(GvrsCacheSize.Large);
            for (int iRow = 0; iRow < nRows; iRow++) {
                if (iRow % 10000 == 9999) {
                    time1 = System.currentTimeMillis();
                    double deltaT = time1 - time0;
                    // rows per millis
                    double rate = (iRow + 1) / deltaT;
                    int nRemaining = nRows - iRow;
                    long remainingT = (long) (nRemaining / rate);
                    Date d = new Date(time1 + remainingT);
                    ps.format("Completed %d rows, %4.1f%% of total, est completion at %s%n", iRow + 1, 100.0 * (double) iRow / (nRows - 1.0), d);
                    ps.flush();
                }
                int row0 = iRow;
                int col0 = 0;
                readOrigin[0] = row0;
                readOrigin[1] = col0;
                readShape[0] = 1;
                readShape[1] = nCols;
                try {
                    Array array = z.read(readOrigin, readShape);
                    switch(gvrsDataType) {
                        case INTEGER:
                            for (int iCol = 0; iCol < nCols; iCol++) {
                                int sample = array.getInt(iCol);
                                int test = zElement.readValueInt(iRow, iCol);
                                if (sample != test) {
                                    ps.println("Failure at " + iRow + ", " + iCol);
                                    test = zElement.readValueInt(iRow, iCol);
                                    System.exit(-1);
                                }
                            }
                            break;
                        case INT_CODED_FLOAT:
                            for (int iCol = 0; iCol < nCols; iCol++) {
                                double sample = array.getDouble(iCol);
                                int iSample = (int) ((sample - zOffset) * zScale + 0.5);
                                float fSample = iSample / zScale + zOffset;
                                float test = zElement.readValue(iRow, iCol);
                                double delta = Math.abs(fSample - test);
                                if (delta > 1.01 / zScale) {
                                    ps.println("Failure at " + iRow + ", " + iCol);
                                    System.exit(-1);
                                }
                            }
                            break;
                        case FLOAT:
                        default:
                            for (int iCol = 0; iCol < nCols; iCol++) {
                                float sample = array.getFloat(iCol);
                                float test = zElement.readValue(iRow, iCol);
                                if (sample != test) {
                                    ps.println("Failure at " + iRow + ", " + iCol);
                                    test = zElement.readValueInt(iRow, iCol);
                                    System.exit(-1);
                                }
                            }
                    }
                } catch (InvalidRangeException irex) {
                    throw new IOException(irex.getMessage(), irex);
                }
            }
            time1 = System.currentTimeMillis();
            ps.println("Exhaustive cross check complete in " + (time1 - time0) + " ms");
            gvrs.summarize(ps, false);
        }
    }
    ncfile.close();
}
Also used : Locale(java.util.Locale) GvrsElementSpecificationFloat(org.gridfour.gvrs.GvrsElementSpecificationFloat) Variable(ucar.nc2.Variable) GvrsElement(org.gridfour.gvrs.GvrsElement) GvrsElementSpecificationShort(org.gridfour.gvrs.GvrsElementSpecificationShort) DataType(ucar.ma2.DataType) GvrsElementSpecification(org.gridfour.gvrs.GvrsElementSpecification) InvalidRangeException(ucar.ma2.InvalidRangeException) IOException(java.io.IOException) Date(java.util.Date) GvrsElementSpecificationIntCodedFloat(org.gridfour.gvrs.GvrsElementSpecificationIntCodedFloat) NetcdfFile(ucar.nc2.NetcdfFile) Array(ucar.ma2.Array) GvrsElementType(org.gridfour.gvrs.GvrsElementType) GvrsMetadata(org.gridfour.gvrs.GvrsMetadata) GvrsFileSpecification(org.gridfour.gvrs.GvrsFileSpecification) SimpleDateFormat(java.text.SimpleDateFormat) GvrsFile(org.gridfour.gvrs.GvrsFile) File(java.io.File) NetcdfFile(ucar.nc2.NetcdfFile) GvrsFile(org.gridfour.gvrs.GvrsFile)

Example 8 with GvrsElement

use of org.gridfour.gvrs.GvrsElement in project gridfour by gwlucastrig.

the class GvrsReadPerformance method testTileBlockScan.

void testTileBlockScan(PrintStream ps) throws IOException {
    try (GvrsFile gvrs = new GvrsFile(inputFile, "r")) {
        gvrs.setTileCacheSize(GvrsCacheSize.Large);
        List<GvrsElement> elementList = gvrs.getElements();
        GvrsElement element = elementList.get(0);
        GvrsElementType dType = element.getDataType();
        double avgValue = 0;
        long nSample = 0;
        long time0 = System.nanoTime();
        double sum = 0;
        if (dType == GvrsElementType.INTEGER || dType == GvrsElementType.SHORT) {
            for (int iRow = 0; iRow < nRowsOfTiles; iRow++) {
                for (int iCol = 0; iCol < nColsOfTiles; iCol++) {
                    int row0 = iRow * nRowsInTile;
                    int col0 = iCol * nColsInTile;
                    int[] block = element.readBlockInt(row0, col0, nRowsInTile, nColsInTile);
                    for (int sample : block) {
                        sum += sample;
                        nSample++;
                    }
                }
            }
        } else {
            for (int iRow = 0; iRow < nRowsOfTiles; iRow++) {
                for (int iCol = 0; iCol < nColsOfTiles; iCol++) {
                    int row0 = iRow * nRowsInTile;
                    int col0 = iCol * nColsInTile;
                    float[] block = element.readBlock(row0, col0, nRowsInTile, nColsInTile);
                    for (float sample : block) {
                        if (!Float.isNaN(sample)) {
                            sum += sample;
                            nSample++;
                        }
                    }
                }
            }
        }
        if (nSample > 0) {
            avgValue = sum / (double) nSample;
        }
        long time1 = System.nanoTime();
        double deltaT = (time1 - time0) / 1.0e+9;
        report(ps, "Block Test", deltaT, avgValue, nSample);
    }
}
Also used : GvrsElementType(org.gridfour.gvrs.GvrsElementType) GvrsElement(org.gridfour.gvrs.GvrsElement) GvrsFile(org.gridfour.gvrs.GvrsFile)

Example 9 with GvrsElement

use of org.gridfour.gvrs.GvrsElement in project gridfour by gwlucastrig.

the class EntropyTabulator method process.

/**
 * Process the specified GVRS file and write a report to the specified print
 * stream.
 * <p>
 * If configured to do so, this method will write progress reports to the
 * specified print stream.
 *
 * @param ps a valid print stream, System&#46;out is a valid candidate
 * @param inputFile a reference to a GVRS file
 * @param showProgress indicates if progress reports are to be printed during
 * processing
 * @return on successful completion, a valid floating-point value; otherwise,
 * a Double&#46;NaN.
 */
public double process(PrintStream ps, File inputFile, boolean showProgress) {
    double entropy = Double.NaN;
    ps.format("%nEntropy tabulation for GVRS files%n");
    Locale locale = Locale.getDefault();
    Date date = new Date();
    SimpleDateFormat sdFormat = new SimpleDateFormat("dd MMM yyyy HH:mm z", locale);
    ps.format("Date of Execution: %s%n", sdFormat.format(date));
    String inputPath = inputFile.getPath();
    ps.format("Input file:  %s%n", inputPath);
    File parent = inputFile.getParentFile();
    File countsFile = new File(parent, TEMP_COUNT_FILE_NAME);
    // Define the specs for the entropy stats file
    GvrsFileSpecification countsSpec = new GvrsFileSpecification(65536, 65536, 256, 256);
    countsSpec.setDataCompressionEnabled(false);
    GvrsElementSpecificationInt countsElementSpec = new GvrsElementSpecificationInt("counts", 0);
    countsSpec.addElementSpecification(countsElementSpec);
    try (GvrsFile source = new GvrsFile(inputFile, "r");
        GvrsFile counts = new GvrsFile(countsFile, countsSpec)) {
        GvrsFileSpecification sourceSpec = source.getSpecification();
        int nRowsInSource = sourceSpec.getRowsInGrid();
        int nColsInSource = sourceSpec.getColumnsInGrid();
        int nRowsOfTilesInSource = sourceSpec.getRowsOfTilesInGrid();
        int nColsOfTilesInSource = sourceSpec.getColumnsOfTilesInGrid();
        int nRowsInTile = sourceSpec.getRowsInTile();
        int nColsInTile = sourceSpec.getColumnsInTile();
        GvrsElement sourceElement = source.getElements().get(0);
        GvrsElementType sourceDataType = sourceElement.getDataType();
        GvrsElement countsElement = counts.getElement("counts");
        long nSamples = 0;
        long nSymbols = 0;
        ps.println("Source File " + inputFile.getName());
        ps.format("   Rows:      %8d%n", nRowsInSource);
        ps.format("   Columns:   %8d%n", nColsInSource);
        source.setTileCacheSize(GvrsCacheSize.Small);
        counts.setTileCacheSize(2000);
        long time0 = System.currentTimeMillis();
        if (showProgress) {
            ps.format("Initializing temporary entropy tabulation file %s%n", countsFile.getPath());
            ps.flush();
        }
        // Package the data
        if (showProgress) {
            ps.format("Initialization done in %d ms%n", System.currentTimeMillis() - time0);
            ps.println("Beginning tabulation");
        }
        time0 = System.currentTimeMillis();
        for (int iTileRow = 0; iTileRow < nRowsOfTilesInSource; iTileRow++) {
            if (showProgress && iTileRow > 0) {
                long time1 = System.currentTimeMillis();
                double deltaT = time1 - time0;
                // rows per millis
                double rate = (iTileRow + 1) / deltaT;
                int nRemaining = nRowsOfTilesInSource - iTileRow;
                long remainingT = (long) (nRemaining / rate);
                Date d = new Date(time1 + remainingT);
                ps.format("Surveyed %d rows, %4.1f%% of total, est completion at %s%n", iTileRow * nRowsInTile, 100.0 * (double) iTileRow / (nRowsOfTilesInSource - 1.0), d);
                ps.flush();
            }
            int row0 = iTileRow * nRowsInTile;
            int row1 = row0 + nRowsInTile;
            if (row1 > nRowsInSource) {
                row1 = nRowsInSource;
            }
            for (int iTileCol = 0; iTileCol < nColsOfTilesInSource; iTileCol++) {
                int col0 = iTileCol * nColsInTile;
                int col1 = col0 + nColsInTile;
                if (col1 > nColsInSource) {
                    col1 = nColsInSource;
                }
                for (int iRow = row0; iRow < row1; iRow++) {
                    for (int iCol = col0; iCol < col1; iCol++) {
                        int bits;
                        if (sourceDataType == GvrsElementType.FLOAT) {
                            float sample = sourceElement.readValue(iRow, iCol);
                            bits = Float.floatToRawIntBits(sample);
                        } else {
                            bits = sourceElement.readValueInt(iRow, iCol);
                        }
                        long longIndex = ((long) bits) & 0x00ffffffffL;
                        long longRow = longIndex / 65536L;
                        long longCol = longIndex - longRow * 65536L;
                        int count = countsElement.readValueInt((int) longRow, (int) longCol);
                        countsElement.writeValueInt((int) longRow, (int) longCol, count + 1);
                        nSamples++;
                        if (count == 0) {
                            nSymbols++;
                        }
                    }
                }
            }
        }
        counts.flush();
        long time1 = System.currentTimeMillis();
        double timeToProcess = (time1 - time0) / 1000.0;
        if (showProgress) {
            ps.format("Finished surveying source file in %4.1f seconds%n", timeToProcess);
            ps.format("Performing tabulation of count data%n");
            ps.flush();
        }
        time0 = System.currentTimeMillis();
        double nSamplesDouble = (double) nSamples;
        int maxCount = 0;
        long nUnique = 0;
        long nRepeated = 0;
        KahanSummation ks = new KahanSummation();
        for (int iRow = 0; iRow < 65536; iRow++) {
            if (showProgress && (iRow & 1023) == 0 && iRow > 0) {
                time1 = System.currentTimeMillis();
                double deltaT = time1 - time0;
                // rows per millis
                double rate = (iRow + 1) / deltaT;
                int nRemaining = 65536 - iRow;
                long remainingT = (long) (nRemaining / rate);
                Date d = new Date(time1 + remainingT);
                ps.format("Tabulated %d rows, %4.1f%% of total, est completion at %s%n", iRow, 100.0 * (double) iRow / 65536.0, d);
                ps.flush();
            }
            for (int iCol = 0; iCol < 65536; iCol++) {
                int count = countsElement.readValueInt(iRow, iCol);
                if (count > 0) {
                    double p = (double) count / nSamplesDouble;
                    double s = -p * Math.log(p);
                    ks.add(s);
                    if (count > maxCount) {
                        maxCount = count;
                    }
                    if (count == 1) {
                        nUnique++;
                    } else {
                        nRepeated++;
                    }
                }
            }
        }
        // get sum of entropy calculations, and them apply
        // adjustment for base 2.
        entropy = ks.getSum() / Math.log(2.0);
        time1 = System.currentTimeMillis();
        double timeToTabulate = (time1 - time0) / 1000.0;
        ps.format("Finished processing file in %4.1f seconds%n", timeToTabulate);
        ps.format("Size of Counts File %12d%n", countsFile.length());
        ps.format("Samples:            %12d%n", nSamples);
        ps.format("Unique Symbols:     %12d%n", nUnique);
        ps.format("Repeated Symbols:   %12d%n", nRepeated);
        ps.format("Total symbols:      %12d%n", nSymbols);
        ps.format("Max count:          %12d%n", maxCount);
        ps.format("Entropy:            %9.5f%n ", entropy);
    } catch (IOException ioex) {
        ps.println("IOException accessing " + inputFile.getPath() + ", " + ioex.getMessage());
        ioex.printStackTrace(ps);
    }
    countsFile.delete();
    return entropy;
}
Also used : Locale(java.util.Locale) GvrsElementSpecificationInt(org.gridfour.gvrs.GvrsElementSpecificationInt) GvrsElement(org.gridfour.gvrs.GvrsElement) IOException(java.io.IOException) Date(java.util.Date) GvrsElementType(org.gridfour.gvrs.GvrsElementType) GvrsFileSpecification(org.gridfour.gvrs.GvrsFileSpecification) KahanSummation(org.gridfour.util.KahanSummation) SimpleDateFormat(java.text.SimpleDateFormat) File(java.io.File) GvrsFile(org.gridfour.gvrs.GvrsFile) GvrsFile(org.gridfour.gvrs.GvrsFile)

Aggregations

GvrsElement (org.gridfour.gvrs.GvrsElement)9 GvrsFile (org.gridfour.gvrs.GvrsFile)9 GvrsElementType (org.gridfour.gvrs.GvrsElementType)6 File (java.io.File)4 GvrsFileSpecification (org.gridfour.gvrs.GvrsFileSpecification)4 SimpleDateFormat (java.text.SimpleDateFormat)3 Date (java.util.Date)3 IOException (java.io.IOException)2 Locale (java.util.Locale)2 GvrsElementSpecification (org.gridfour.gvrs.GvrsElementSpecification)2 GvrsElementSpecificationInt (org.gridfour.gvrs.GvrsElementSpecificationInt)2 GvrsMetadata (org.gridfour.gvrs.GvrsMetadata)2 BufferedImage (java.awt.image.BufferedImage)1 PrintStream (java.io.PrintStream)1 SimpleTimeZone (java.util.SimpleTimeZone)1 GvrsElementSpecificationFloat (org.gridfour.gvrs.GvrsElementSpecificationFloat)1 GvrsElementSpecificationIntCodedFloat (org.gridfour.gvrs.GvrsElementSpecificationIntCodedFloat)1 GvrsElementSpecificationShort (org.gridfour.gvrs.GvrsElementSpecificationShort)1 KahanSummation (org.gridfour.util.KahanSummation)1 Array (ucar.ma2.Array)1