Search in sources :

Example 11 with ZipEntry

use of java.util.zip.ZipEntry in project che by eclipse.

the class ZipUtilsTest method setUp.

@BeforeMethod
public void setUp() throws IOException {
    zipFile = File.createTempFile("test", "zip");
    zipFile.deleteOnExit();
    byte[] testData = new byte[2048];
    Random random = new Random();
    random.nextBytes(testData);
    try (ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(zipFile))) {
        ZipEntry entry = new ZipEntry("test");
        entry.setSize(testData.length);
        zos.putNextEntry(entry);
        zos.write(testData);
        zos.closeEntry();
        zos.close();
    }
}
Also used : Random(java.util.Random) ZipOutputStream(java.util.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) ZipEntry(java.util.zip.ZipEntry) BeforeMethod(org.testng.annotations.BeforeMethod)

Example 12 with ZipEntry

use of java.util.zip.ZipEntry in project druid by druid-io.

the class CompressionUtils method unzip.

/**
   * Unzip from the input stream to the output directory, using the entry's file name as the file name in the output directory.
   * The behavior of directories in the input stream's zip is undefined.
   * If possible, it is recommended to use unzip(ByteStream, File) instead
   *
   * @param in     The input stream of the zip data. This stream is closed
   * @param outDir The directory to copy the unzipped data to
   *
   * @return The FileUtils.FileCopyResult containing information on all the files which were written
   *
   * @throws IOException
   */
public static FileUtils.FileCopyResult unzip(InputStream in, File outDir) throws IOException {
    try (final ZipInputStream zipIn = new ZipInputStream(in)) {
        final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
        ZipEntry entry;
        while ((entry = zipIn.getNextEntry()) != null) {
            final File file = new File(outDir, entry.getName());
            Files.asByteSink(file).writeFrom(zipIn);
            result.addFile(file);
            zipIn.closeEntry();
        }
        return result;
    }
}
Also used : ZipInputStream(java.util.zip.ZipInputStream) ZipEntry(java.util.zip.ZipEntry) File(java.io.File) ZipFile(java.util.zip.ZipFile)

Example 13 with ZipEntry

use of java.util.zip.ZipEntry in project druid by druid-io.

the class CompressionUtils method unzip.

/**
   * Unzip the pulled file to an output directory. This is only expected to work on zips with lone files, and is not intended for zips with directory structures.
   *
   * @param pulledFile The file to unzip
   * @param outDir     The directory to store the contents of the file.
   *
   * @return a FileCopyResult of the files which were written to disk
   *
   * @throws IOException
   */
public static FileUtils.FileCopyResult unzip(final File pulledFile, final File outDir) throws IOException {
    if (!(outDir.exists() && outDir.isDirectory())) {
        throw new ISE("outDir[%s] must exist and be a directory", outDir);
    }
    log.info("Unzipping file[%s] to [%s]", pulledFile, outDir);
    final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
    try (final ZipFile zipFile = new ZipFile(pulledFile)) {
        final Enumeration<? extends ZipEntry> enumeration = zipFile.entries();
        while (enumeration.hasMoreElements()) {
            final ZipEntry entry = enumeration.nextElement();
            result.addFiles(FileUtils.retryCopy(new ByteSource() {

                @Override
                public InputStream openStream() throws IOException {
                    return new BufferedInputStream(zipFile.getInputStream(entry));
                }
            }, new File(outDir, entry.getName()), FileUtils.IS_EXCEPTION, DEFAULT_RETRY_COUNT).getFiles());
        }
    }
    return result;
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ZipInputStream(java.util.zip.ZipInputStream) FilterInputStream(java.io.FilterInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) IOException(java.io.IOException) ZipFile(java.util.zip.ZipFile) BufferedInputStream(java.io.BufferedInputStream) ByteSource(com.google.common.io.ByteSource) File(java.io.File) ZipFile(java.util.zip.ZipFile)

Example 14 with ZipEntry

use of java.util.zip.ZipEntry in project deeplearning4j by deeplearning4j.

the class WordVectorSerializer method writeWord2VecModel.

/**
     * This method saves Word2Vec model into compressed zip file and sends it to output stream
     * PLEASE NOTE: This method saves FULL model, including syn0 AND syn1
     *
     */
public static void writeWord2VecModel(Word2Vec vectors, OutputStream stream) throws IOException {
    ZipOutputStream zipfile = new ZipOutputStream(new BufferedOutputStream(new CloseShieldOutputStream(stream)));
    ZipEntry syn0 = new ZipEntry("syn0.txt");
    zipfile.putNextEntry(syn0);
    // writing out syn0
    File tempFileSyn0 = File.createTempFile("word2vec", "0");
    tempFileSyn0.deleteOnExit();
    writeWordVectors(vectors.lookupTable(), tempFileSyn0);
    BufferedInputStream fis = new BufferedInputStream(new FileInputStream(tempFileSyn0));
    writeEntry(fis, zipfile);
    fis.close();
    // writing out syn1
    File tempFileSyn1 = File.createTempFile("word2vec", "1");
    tempFileSyn1.deleteOnExit();
    INDArray syn1 = ((InMemoryLookupTable<VocabWord>) vectors.getLookupTable()).getSyn1();
    if (syn1 != null)
        try (PrintWriter writer = new PrintWriter(new FileWriter(tempFileSyn1))) {
            for (int x = 0; x < syn1.rows(); x++) {
                INDArray row = syn1.getRow(x);
                StringBuilder builder = new StringBuilder();
                for (int i = 0; i < row.length(); i++) {
                    builder.append(row.getDouble(i)).append(" ");
                }
                writer.println(builder.toString().trim());
            }
        }
    ZipEntry zSyn1 = new ZipEntry("syn1.txt");
    zipfile.putNextEntry(zSyn1);
    fis = new BufferedInputStream(new FileInputStream(tempFileSyn1));
    writeEntry(fis, zipfile);
    fis.close();
    // writing out syn1
    File tempFileSyn1Neg = File.createTempFile("word2vec", "n");
    tempFileSyn1Neg.deleteOnExit();
    INDArray syn1Neg = ((InMemoryLookupTable<VocabWord>) vectors.getLookupTable()).getSyn1Neg();
    if (syn1Neg != null)
        try (PrintWriter writer = new PrintWriter(new FileWriter(tempFileSyn1Neg))) {
            for (int x = 0; x < syn1Neg.rows(); x++) {
                INDArray row = syn1Neg.getRow(x);
                StringBuilder builder = new StringBuilder();
                for (int i = 0; i < row.length(); i++) {
                    builder.append(row.getDouble(i)).append(" ");
                }
                writer.println(builder.toString().trim());
            }
        }
    ZipEntry zSyn1Neg = new ZipEntry("syn1Neg.txt");
    zipfile.putNextEntry(zSyn1Neg);
    fis = new BufferedInputStream(new FileInputStream(tempFileSyn1Neg));
    writeEntry(fis, zipfile);
    fis.close();
    File tempFileCodes = File.createTempFile("word2vec", "h");
    tempFileCodes.deleteOnExit();
    ZipEntry hC = new ZipEntry("codes.txt");
    zipfile.putNextEntry(hC);
    // writing out huffman tree
    try (PrintWriter writer = new PrintWriter(new FileWriter(tempFileCodes))) {
        for (int i = 0; i < vectors.getVocab().numWords(); i++) {
            VocabWord word = vectors.getVocab().elementAtIndex(i);
            StringBuilder builder = new StringBuilder(encodeB64(word.getLabel())).append(" ");
            for (int code : word.getCodes()) {
                builder.append(code).append(" ");
            }
            writer.println(builder.toString().trim());
        }
    }
    fis = new BufferedInputStream(new FileInputStream(tempFileCodes));
    writeEntry(fis, zipfile);
    fis.close();
    File tempFileHuffman = File.createTempFile("word2vec", "h");
    tempFileHuffman.deleteOnExit();
    ZipEntry hP = new ZipEntry("huffman.txt");
    zipfile.putNextEntry(hP);
    // writing out huffman tree
    try (PrintWriter writer = new PrintWriter(new FileWriter(tempFileHuffman))) {
        for (int i = 0; i < vectors.getVocab().numWords(); i++) {
            VocabWord word = vectors.getVocab().elementAtIndex(i);
            StringBuilder builder = new StringBuilder(encodeB64(word.getLabel())).append(" ");
            for (int point : word.getPoints()) {
                builder.append(point).append(" ");
            }
            writer.println(builder.toString().trim());
        }
    }
    fis = new BufferedInputStream(new FileInputStream(tempFileHuffman));
    writeEntry(fis, zipfile);
    fis.close();
    File tempFileFreqs = File.createTempFile("word2vec", "f");
    tempFileFreqs.deleteOnExit();
    ZipEntry hF = new ZipEntry("frequencies.txt");
    zipfile.putNextEntry(hF);
    // writing out word frequencies
    try (PrintWriter writer = new PrintWriter(new FileWriter(tempFileFreqs))) {
        for (int i = 0; i < vectors.getVocab().numWords(); i++) {
            VocabWord word = vectors.getVocab().elementAtIndex(i);
            StringBuilder builder = new StringBuilder(encodeB64(word.getLabel())).append(" ").append(word.getElementFrequency()).append(" ").append(vectors.getVocab().docAppearedIn(word.getLabel()));
            writer.println(builder.toString().trim());
        }
    }
    fis = new BufferedInputStream(new FileInputStream(tempFileFreqs));
    writeEntry(fis, zipfile);
    fis.close();
    ZipEntry config = new ZipEntry("config.json");
    zipfile.putNextEntry(config);
    //log.info("Current config: {}", vectors.getConfiguration().toJson());
    writeEntry(new ByteArrayInputStream(vectors.getConfiguration().toJson().getBytes()), zipfile);
    zipfile.flush();
    zipfile.close();
    try {
        tempFileCodes.delete();
        tempFileFreqs.delete();
        tempFileHuffman.delete();
        tempFileSyn0.delete();
        tempFileSyn1.delete();
        tempFileSyn1Neg.delete();
    } catch (Exception e) {
    //
    }
}
Also used : ZipEntry(java.util.zip.ZipEntry) VocabWord(org.deeplearning4j.models.word2vec.VocabWord) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) InMemoryLookupTable(org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ZipOutputStream(java.util.zip.ZipOutputStream) ZipFile(java.util.zip.ZipFile)

Example 15 with ZipEntry

use of java.util.zip.ZipEntry in project deeplearning4j by deeplearning4j.

the class WordVectorSerializer method readWord2Vec.

/**
     * This method restores Word2Vec model previously saved with writeWord2VecModel
     *
     * PLEASE NOTE: This method loads FULL model, so don't use it if you're only going to use weights.
     *
     * @param file
     * @return
     * @throws IOException
     */
@Deprecated
public static Word2Vec readWord2Vec(File file) throws IOException {
    File tmpFileSyn0 = File.createTempFile("word2vec", "0");
    File tmpFileSyn1 = File.createTempFile("word2vec", "1");
    File tmpFileC = File.createTempFile("word2vec", "c");
    File tmpFileH = File.createTempFile("word2vec", "h");
    File tmpFileF = File.createTempFile("word2vec", "f");
    tmpFileSyn0.deleteOnExit();
    tmpFileSyn1.deleteOnExit();
    tmpFileH.deleteOnExit();
    tmpFileC.deleteOnExit();
    tmpFileF.deleteOnExit();
    int originalFreq = Nd4j.getMemoryManager().getOccasionalGcFrequency();
    boolean originalPeriodic = Nd4j.getMemoryManager().isPeriodicGcActive();
    if (originalPeriodic)
        Nd4j.getMemoryManager().togglePeriodicGc(false);
    Nd4j.getMemoryManager().setOccasionalGcFrequency(50000);
    try {
        ZipFile zipFile = new ZipFile(file);
        ZipEntry syn0 = zipFile.getEntry("syn0.txt");
        InputStream stream = zipFile.getInputStream(syn0);
        Files.copy(stream, Paths.get(tmpFileSyn0.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
        ZipEntry syn1 = zipFile.getEntry("syn1.txt");
        stream = zipFile.getInputStream(syn1);
        Files.copy(stream, Paths.get(tmpFileSyn1.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
        ZipEntry codes = zipFile.getEntry("codes.txt");
        stream = zipFile.getInputStream(codes);
        Files.copy(stream, Paths.get(tmpFileC.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
        ZipEntry huffman = zipFile.getEntry("huffman.txt");
        stream = zipFile.getInputStream(huffman);
        Files.copy(stream, Paths.get(tmpFileH.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
        ZipEntry config = zipFile.getEntry("config.json");
        stream = zipFile.getInputStream(config);
        StringBuilder builder = new StringBuilder();
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream))) {
            String line;
            while ((line = reader.readLine()) != null) {
                builder.append(line);
            }
        }
        VectorsConfiguration configuration = VectorsConfiguration.fromJson(builder.toString().trim());
        // we read first 4 files as w2v model
        Word2Vec w2v = readWord2VecFromText(tmpFileSyn0, tmpFileSyn1, tmpFileC, tmpFileH, configuration);
        // we read frequencies from frequencies.txt, however it's possible that we might not have this file
        ZipEntry frequencies = zipFile.getEntry("frequencies.txt");
        if (frequencies != null) {
            stream = zipFile.getInputStream(frequencies);
            try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream))) {
                String line;
                while ((line = reader.readLine()) != null) {
                    String[] split = line.split(" ");
                    VocabWord word = w2v.getVocab().tokenFor(decodeB64(split[0]));
                    word.setElementFrequency((long) Double.parseDouble(split[1]));
                    word.setSequencesCount((long) Double.parseDouble(split[2]));
                }
            }
        }
        ZipEntry zsyn1Neg = zipFile.getEntry("syn1Neg.txt");
        if (zsyn1Neg != null) {
            stream = zipFile.getInputStream(zsyn1Neg);
            try (InputStreamReader isr = new InputStreamReader(stream);
                BufferedReader reader = new BufferedReader(isr)) {
                String line = null;
                List<INDArray> rows = new ArrayList<>();
                while ((line = reader.readLine()) != null) {
                    String[] split = line.split(" ");
                    double[] array = new double[split.length];
                    for (int i = 0; i < split.length; i++) {
                        array[i] = Double.parseDouble(split[i]);
                    }
                    rows.add(Nd4j.create(array));
                }
                // it's possible to have full model without syn1Neg
                if (rows.size() > 0) {
                    INDArray syn1Neg = Nd4j.vstack(rows);
                    ((InMemoryLookupTable) w2v.getLookupTable()).setSyn1Neg(syn1Neg);
                }
            }
        }
        return w2v;
    } finally {
        if (originalPeriodic)
            Nd4j.getMemoryManager().togglePeriodicGc(true);
        Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
    }
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) ZipEntry(java.util.zip.ZipEntry) ArrayList(java.util.ArrayList) VocabWord(org.deeplearning4j.models.word2vec.VocabWord) InMemoryLookupTable(org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable) ZipFile(java.util.zip.ZipFile) INDArray(org.nd4j.linalg.api.ndarray.INDArray) StaticWord2Vec(org.deeplearning4j.models.word2vec.StaticWord2Vec) Word2Vec(org.deeplearning4j.models.word2vec.Word2Vec) ZipFile(java.util.zip.ZipFile)

Aggregations

ZipEntry (java.util.zip.ZipEntry)1367 ZipFile (java.util.zip.ZipFile)479 File (java.io.File)469 IOException (java.io.IOException)361 ZipOutputStream (java.util.zip.ZipOutputStream)321 ZipInputStream (java.util.zip.ZipInputStream)300 InputStream (java.io.InputStream)282 FileOutputStream (java.io.FileOutputStream)278 FileInputStream (java.io.FileInputStream)270 Test (org.junit.Test)124 BufferedInputStream (java.io.BufferedInputStream)122 JarFile (java.util.jar.JarFile)122 BufferedOutputStream (java.io.BufferedOutputStream)99 ByteArrayOutputStream (java.io.ByteArrayOutputStream)97 ArrayList (java.util.ArrayList)84 ByteArrayInputStream (java.io.ByteArrayInputStream)78 OutputStream (java.io.OutputStream)67 JarOutputStream (java.util.jar.JarOutputStream)59 Path (java.nio.file.Path)56 Enumeration (java.util.Enumeration)56