Search in sources :

Example 56 with ZipFile

use of java.util.zip.ZipFile in project deeplearning4j by deeplearning4j.

the class WordVectorSerializer method readParagraphVectors.

/**
     * This method restores ParagraphVectors model previously saved with writeParagraphVectors()
     *
     * @return
     */
public static ParagraphVectors readParagraphVectors(File file) throws IOException {
    File tmpFileL = File.createTempFile("paravec", "l");
    tmpFileL.deleteOnExit();
    Word2Vec w2v = readWord2Vec(file);
    // and "convert" it to ParaVec model + optionally trying to restore labels information
    ParagraphVectors vectors = new ParagraphVectors.Builder(w2v.getConfiguration()).vocabCache(w2v.getVocab()).lookupTable(w2v.getLookupTable()).resetModel(false).build();
    ZipFile zipFile = new ZipFile(file);
    // now we try to restore labels information
    ZipEntry labels = zipFile.getEntry("labels.txt");
    if (labels != null) {
        InputStream stream = zipFile.getInputStream(labels);
        Files.copy(stream, Paths.get(tmpFileL.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
        try (BufferedReader reader = new BufferedReader(new FileReader(tmpFileL))) {
            String line;
            while ((line = reader.readLine()) != null) {
                VocabWord word = vectors.getVocab().tokenFor(decodeB64(line.trim()));
                if (word != null) {
                    word.markAsLabel(true);
                }
            }
        }
    }
    vectors.extractLabels();
    return vectors;
}
Also used : ZipFile(java.util.zip.ZipFile) GZIPInputStream(java.util.zip.GZIPInputStream) StaticWord2Vec(org.deeplearning4j.models.word2vec.StaticWord2Vec) Word2Vec(org.deeplearning4j.models.word2vec.Word2Vec) ZipEntry(java.util.zip.ZipEntry) VocabWord(org.deeplearning4j.models.word2vec.VocabWord) ZipFile(java.util.zip.ZipFile) ParagraphVectors(org.deeplearning4j.models.paragraphvectors.ParagraphVectors)

Example 57 with ZipFile

use of java.util.zip.ZipFile in project deeplearning4j by deeplearning4j.

the class WordVectorSerializer method loadStaticModel.

/**
     * This method restores previously saved w2v model. File can be in one of the following formats:
     * 1) Binary model, either compressed or not. Like well-known Google Model
     * 2) Popular CSV word2vec text format
     * 3) DL4j compressed format
     *
     * In return you get StaticWord2Vec model, which might be used as lookup table only in multi-gpu environment.
     *
     * @param file File should point to previously saved w2v model
     * @return
     */
// TODO: this method needs better name :)
public static WordVectors loadStaticModel(File file) {
    if (!file.exists() || file.isDirectory())
        throw new RuntimeException(new FileNotFoundException("File [" + file.getAbsolutePath() + "] was not found"));
    int originalFreq = Nd4j.getMemoryManager().getOccasionalGcFrequency();
    boolean originalPeriodic = Nd4j.getMemoryManager().isPeriodicGcActive();
    if (originalPeriodic)
        Nd4j.getMemoryManager().togglePeriodicGc(false);
    Nd4j.getMemoryManager().setOccasionalGcFrequency(50000);
    CompressedRamStorage<Integer> storage = new CompressedRamStorage.Builder<Integer>().useInplaceCompression(false).setCompressor(new NoOp()).emulateIsAbsent(false).build();
    VocabCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();
    // if zip - that's dl4j format
    try {
        log.debug("Trying DL4j format...");
        File tmpFileSyn0 = File.createTempFile("word2vec", "syn");
        ZipFile zipFile = new ZipFile(file);
        ZipEntry syn0 = zipFile.getEntry("syn0.txt");
        InputStream stream = zipFile.getInputStream(syn0);
        Files.copy(stream, Paths.get(tmpFileSyn0.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
        storage.clear();
        try (Reader reader = new CSVReader(tmpFileSyn0)) {
            while (reader.hasNext()) {
                Pair<VocabWord, float[]> pair = reader.next();
                VocabWord word = pair.getFirst();
                storage.store(word.getIndex(), pair.getSecond());
                vocabCache.addToken(word);
                vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
                Nd4j.getMemoryManager().invokeGcOccasionally();
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            if (originalPeriodic)
                Nd4j.getMemoryManager().togglePeriodicGc(true);
            Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
        }
    } catch (Exception e) {
        //
        try {
            // try to load file as text csv
            vocabCache = new AbstractCache.Builder<VocabWord>().build();
            storage.clear();
            log.debug("Trying CSVReader...");
            try (Reader reader = new CSVReader(file)) {
                while (reader.hasNext()) {
                    Pair<VocabWord, float[]> pair = reader.next();
                    VocabWord word = pair.getFirst();
                    storage.store(word.getIndex(), pair.getSecond());
                    vocabCache.addToken(word);
                    vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
                    Nd4j.getMemoryManager().invokeGcOccasionally();
                }
            } catch (Exception ef) {
                // we throw away this exception, and trying to load data as binary model
                throw new RuntimeException(ef);
            } finally {
                if (originalPeriodic)
                    Nd4j.getMemoryManager().togglePeriodicGc(true);
                Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
            }
        } catch (Exception ex) {
            // otherwise it's probably google model. which might be compressed or not
            log.debug("Trying BinaryReader...");
            vocabCache = new AbstractCache.Builder<VocabWord>().build();
            storage.clear();
            try (Reader reader = new BinaryReader(file)) {
                while (reader.hasNext()) {
                    Pair<VocabWord, float[]> pair = reader.next();
                    VocabWord word = pair.getFirst();
                    storage.store(word.getIndex(), pair.getSecond());
                    vocabCache.addToken(word);
                    vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
                    Nd4j.getMemoryManager().invokeGcOccasionally();
                }
            } catch (Exception ez) {
                throw new RuntimeException("Unable to guess input file format");
            } finally {
                if (originalPeriodic)
                    Nd4j.getMemoryManager().togglePeriodicGc(true);
                Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
            }
        } finally {
            if (originalPeriodic)
                Nd4j.getMemoryManager().togglePeriodicGc(true);
            Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
        }
    }
    StaticWord2Vec word2Vec = new StaticWord2Vec.Builder(storage, vocabCache).build();
    return word2Vec;
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) NoOp(org.nd4j.compression.impl.NoOp) ZipEntry(java.util.zip.ZipEntry) VocabWord(org.deeplearning4j.models.word2vec.VocabWord) AbstractCache(org.deeplearning4j.models.word2vec.wordstore.inmemory.AbstractCache) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StaticWord2Vec(org.deeplearning4j.models.word2vec.StaticWord2Vec) ZipFile(java.util.zip.ZipFile) ZipFile(java.util.zip.ZipFile) Pair(org.deeplearning4j.berkeley.Pair)

Example 58 with ZipFile

use of java.util.zip.ZipFile in project cogtool by cogtool.

the class ZipUtil method unzip.

/**
   * Unzips a zipfile to a destination directory.
   * @param zip the file to unzip
   * @param fileDir the destination directory for zipfile contents
   * @throws FileNotFoundException
   * @throws IOException
   */
public static void unzip(ZipFile zip, File fileDir) throws FileNotFoundException, IOException {
    // Read out all entries from ZipFile via input streams
    for (Enumeration<? extends ZipEntry> en = zip.entries(); en.hasMoreElements(); ) {
        ZipEntry ze = en.nextElement();
        // Get info from file entry
        long size = ze.getSize();
        // Create File in fileDir for unpacked entry
        String name = ze.getName();
        //      System.out.println("Unzipping: " + name);
        File zeFile = new File(fileDir, name);
        // Check for a trailing slash to see if this is a directory entry
        if (name.charAt(name.length() - 1) == '/') {
            // If this entry is a directory, make it
            zeFile.mkdirs();
        } else {
            // if this entry is a file, make its parent directories, then it
            zeFile.getParentFile().mkdirs();
            zeFile.createNewFile();
            // Create plus OutputStream to the new file
            FileOutputStream fout = null;
            OutputStream out = null;
            // Get ZipInputStream for reading data
            InputStream zin = null;
            try {
                fout = new FileOutputStream(zeFile);
                out = new BufferedOutputStream(fout);
                zin = zip.getInputStream(ze);
                // Set modification time
                zeFile.setLastModified(ze.getTime());
                // Copy data from zin to out, 100k at a time
                int chunkSize = 100 * 1024;
                byte[] buff = new byte[chunkSize];
                int len = chunkSize;
                for (; size > 0; size -= len) {
                    if (size < chunkSize) {
                        len = (int) size;
                    } else {
                        len = chunkSize;
                    }
                    int actualBytes = 0;
                    int off = 0;
                    do {
                        actualBytes = zin.read(buff, off, len);
                        if (actualBytes == -1) {
                            out.write(buff, off, len);
                            //                System.out.print("!" + len + ':' + actualBytes + ':' + off + ' ');
                            throw new RuntimeException("Bad math in unzip!");
                        } else {
                            out.write(buff, off, actualBytes);
                        //                System.out.print("" + len + ':' + actualBytes + ':' + off + ' ');
                        }
                        len -= actualBytes;
                        size -= actualBytes;
                        off += actualBytes;
                    } while ((len > 0));
                }
            } finally {
                // Close the streams
                if (fout != null) {
                    if (out != null) {
                        if (zin != null) {
                            zin.close();
                        }
                        out.close();
                    }
                    fout.close();
                }
            }
        }
    }
}
Also used : FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) FileOutputStream(java.io.FileOutputStream) OutputStream(java.io.OutputStream) ZipOutputStream(java.util.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) File(java.io.File) ZipFile(java.util.zip.ZipFile) BufferedOutputStream(java.io.BufferedOutputStream)

Example 59 with ZipFile

use of java.util.zip.ZipFile in project buck by facebook.

the class ZipWriteTest method main.

public static void main(String[] args) throws IOException, InterruptedException {
    try (CustomZipOutputStream zipOut = ZipOutputStreams.newOutputStream(Paths.get("/dev/null"), ZipOutputStreams.HandleDuplicates.APPEND_TO_ZIP)) {
        try (ZipFile zipIn = new ZipFile(new File(args[0]))) {
            for (Enumeration<? extends ZipEntry> entries = zipIn.entries(); entries.hasMoreElements(); ) {
                ZipEntry entry = entries.nextElement();
                ZipEntry newEntry = new ZipEntry(entry);
                if (entry.getMethod() == ZipEntry.DEFLATED) {
                    newEntry.setCompressedSize(-1);
                }
                zipOut.putNextEntry(newEntry);
                InputStream inputStream = zipIn.getInputStream(entry);
                ByteStreams.copy(inputStream, zipOut);
                zipOut.closeEntry();
            }
        }
    }
    System.gc();
    Thread.sleep(TimeUnit.SECONDS.toMillis(1));
    System.gc();
    Thread.sleep(TimeUnit.SECONDS.toMillis(5));
}
Also used : ZipFile(java.util.zip.ZipFile) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) ZipFile(java.util.zip.ZipFile) File(java.io.File)

Example 60 with ZipFile

use of java.util.zip.ZipFile in project buck by facebook.

the class GenAidlIntegrationTest method rootDirectoryDoesntChangeBuild.

@Test
public void rootDirectoryDoesntChangeBuild() throws IOException {
    AssumeAndroidPlatform.assumeSdkIsAvailable();
    ProjectWorkspace workspace = TestDataHelper.createProjectWorkspaceForScenario(this, "cached_build", tmp);
    workspace.setUp();
    Path outputOne = workspace.buildAndReturnOutput("//:AService");
    ProjectWorkspace workspaceTwo = TestDataHelper.createProjectWorkspaceForScenario(this, "cached_build", tmp2);
    workspaceTwo.setUp();
    Path outputTwo = workspaceTwo.buildAndReturnOutput("//:AService");
    assertEquals(workspace.getBuildLog().getRuleKey("//:AService"), workspaceTwo.getBuildLog().getRuleKey("//:AService"));
    try (ZipFile zipOne = new ZipFile(outputOne.toFile());
        ZipFile zipTwo = new ZipFile(outputTwo.toFile())) {
        Enumeration<? extends ZipEntry> entriesOne = zipOne.entries(), entriesTwo = zipTwo.entries();
        while (entriesOne.hasMoreElements()) {
            assertTrue(entriesTwo.hasMoreElements());
            ZipEntry entryOne = entriesOne.nextElement(), entryTwo = entriesTwo.nextElement();
            // Compare data first, otherwise crc difference will cause a failure and you don't get to
            // see the actual difference.
            assertEquals(zipEntryData(zipOne, entryOne), zipEntryData(zipTwo, entryTwo));
            assertEquals(zipEntryDebugString(entryOne), zipEntryDebugString(entryTwo));
        }
        assertFalse(entriesTwo.hasMoreElements());
    }
    assertEquals(new String(Files.readAllBytes(outputOne)), new String(Files.readAllBytes(outputTwo)));
}
Also used : Path(java.nio.file.Path) ProjectWorkspace(com.facebook.buck.testutil.integration.ProjectWorkspace) ZipFile(java.util.zip.ZipFile) ZipEntry(java.util.zip.ZipEntry) Test(org.junit.Test)

Aggregations

ZipFile (java.util.zip.ZipFile)637 ZipEntry (java.util.zip.ZipEntry)454 File (java.io.File)287 IOException (java.io.IOException)214 InputStream (java.io.InputStream)147 FileOutputStream (java.io.FileOutputStream)108 ZipOutputStream (java.util.zip.ZipOutputStream)92 Test (org.junit.Test)89 FileInputStream (java.io.FileInputStream)68 Enumeration (java.util.Enumeration)47 ArrayList (java.util.ArrayList)46 BufferedInputStream (java.io.BufferedInputStream)44 BufferedOutputStream (java.io.BufferedOutputStream)39 ZipInputStream (java.util.zip.ZipInputStream)35 ZipException (java.util.zip.ZipException)34 OutputStream (java.io.OutputStream)31 ClassReader (org.objectweb.asm.ClassReader)29 FileNotFoundException (java.io.FileNotFoundException)26 JarFile (java.util.jar.JarFile)26 ByteArrayOutputStream (java.io.ByteArrayOutputStream)24