use of java.util.zip.ZipEntry in project che by eclipse.
the class ZipUtilsTest method setUp.
@BeforeMethod
public void setUp() throws IOException {
zipFile = File.createTempFile("test", "zip");
zipFile.deleteOnExit();
byte[] testData = new byte[2048];
Random random = new Random();
random.nextBytes(testData);
try (ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(zipFile))) {
ZipEntry entry = new ZipEntry("test");
entry.setSize(testData.length);
zos.putNextEntry(entry);
zos.write(testData);
zos.closeEntry();
zos.close();
}
}
use of java.util.zip.ZipEntry in project druid by druid-io.
the class CompressionUtils method unzip.
/**
* Unzip from the input stream to the output directory, using the entry's file name as the file name in the output directory.
* The behavior of directories in the input stream's zip is undefined.
* If possible, it is recommended to use unzip(ByteStream, File) instead
*
* @param in The input stream of the zip data. This stream is closed
* @param outDir The directory to copy the unzipped data to
*
* @return The FileUtils.FileCopyResult containing information on all the files which were written
*
* @throws IOException
*/
public static FileUtils.FileCopyResult unzip(InputStream in, File outDir) throws IOException {
try (final ZipInputStream zipIn = new ZipInputStream(in)) {
final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
ZipEntry entry;
while ((entry = zipIn.getNextEntry()) != null) {
final File file = new File(outDir, entry.getName());
Files.asByteSink(file).writeFrom(zipIn);
result.addFile(file);
zipIn.closeEntry();
}
return result;
}
}
use of java.util.zip.ZipEntry in project druid by druid-io.
the class CompressionUtils method unzip.
/**
* Unzip the pulled file to an output directory. This is only expected to work on zips with lone files, and is not intended for zips with directory structures.
*
* @param pulledFile The file to unzip
* @param outDir The directory to store the contents of the file.
*
* @return a FileCopyResult of the files which were written to disk
*
* @throws IOException
*/
public static FileUtils.FileCopyResult unzip(final File pulledFile, final File outDir) throws IOException {
if (!(outDir.exists() && outDir.isDirectory())) {
throw new ISE("outDir[%s] must exist and be a directory", outDir);
}
log.info("Unzipping file[%s] to [%s]", pulledFile, outDir);
final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
try (final ZipFile zipFile = new ZipFile(pulledFile)) {
final Enumeration<? extends ZipEntry> enumeration = zipFile.entries();
while (enumeration.hasMoreElements()) {
final ZipEntry entry = enumeration.nextElement();
result.addFiles(FileUtils.retryCopy(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return new BufferedInputStream(zipFile.getInputStream(entry));
}
}, new File(outDir, entry.getName()), FileUtils.IS_EXCEPTION, DEFAULT_RETRY_COUNT).getFiles());
}
}
return result;
}
use of java.util.zip.ZipEntry in project deeplearning4j by deeplearning4j.
the class WordVectorSerializer method writeWord2VecModel.
/**
* This method saves Word2Vec model into compressed zip file and sends it to output stream
* PLEASE NOTE: This method saves FULL model, including syn0 AND syn1
*
*/
public static void writeWord2VecModel(Word2Vec vectors, OutputStream stream) throws IOException {
ZipOutputStream zipfile = new ZipOutputStream(new BufferedOutputStream(new CloseShieldOutputStream(stream)));
ZipEntry syn0 = new ZipEntry("syn0.txt");
zipfile.putNextEntry(syn0);
// writing out syn0
File tempFileSyn0 = File.createTempFile("word2vec", "0");
tempFileSyn0.deleteOnExit();
writeWordVectors(vectors.lookupTable(), tempFileSyn0);
BufferedInputStream fis = new BufferedInputStream(new FileInputStream(tempFileSyn0));
writeEntry(fis, zipfile);
fis.close();
// writing out syn1
File tempFileSyn1 = File.createTempFile("word2vec", "1");
tempFileSyn1.deleteOnExit();
INDArray syn1 = ((InMemoryLookupTable<VocabWord>) vectors.getLookupTable()).getSyn1();
if (syn1 != null)
try (PrintWriter writer = new PrintWriter(new FileWriter(tempFileSyn1))) {
for (int x = 0; x < syn1.rows(); x++) {
INDArray row = syn1.getRow(x);
StringBuilder builder = new StringBuilder();
for (int i = 0; i < row.length(); i++) {
builder.append(row.getDouble(i)).append(" ");
}
writer.println(builder.toString().trim());
}
}
ZipEntry zSyn1 = new ZipEntry("syn1.txt");
zipfile.putNextEntry(zSyn1);
fis = new BufferedInputStream(new FileInputStream(tempFileSyn1));
writeEntry(fis, zipfile);
fis.close();
// writing out syn1
File tempFileSyn1Neg = File.createTempFile("word2vec", "n");
tempFileSyn1Neg.deleteOnExit();
INDArray syn1Neg = ((InMemoryLookupTable<VocabWord>) vectors.getLookupTable()).getSyn1Neg();
if (syn1Neg != null)
try (PrintWriter writer = new PrintWriter(new FileWriter(tempFileSyn1Neg))) {
for (int x = 0; x < syn1Neg.rows(); x++) {
INDArray row = syn1Neg.getRow(x);
StringBuilder builder = new StringBuilder();
for (int i = 0; i < row.length(); i++) {
builder.append(row.getDouble(i)).append(" ");
}
writer.println(builder.toString().trim());
}
}
ZipEntry zSyn1Neg = new ZipEntry("syn1Neg.txt");
zipfile.putNextEntry(zSyn1Neg);
fis = new BufferedInputStream(new FileInputStream(tempFileSyn1Neg));
writeEntry(fis, zipfile);
fis.close();
File tempFileCodes = File.createTempFile("word2vec", "h");
tempFileCodes.deleteOnExit();
ZipEntry hC = new ZipEntry("codes.txt");
zipfile.putNextEntry(hC);
// writing out huffman tree
try (PrintWriter writer = new PrintWriter(new FileWriter(tempFileCodes))) {
for (int i = 0; i < vectors.getVocab().numWords(); i++) {
VocabWord word = vectors.getVocab().elementAtIndex(i);
StringBuilder builder = new StringBuilder(encodeB64(word.getLabel())).append(" ");
for (int code : word.getCodes()) {
builder.append(code).append(" ");
}
writer.println(builder.toString().trim());
}
}
fis = new BufferedInputStream(new FileInputStream(tempFileCodes));
writeEntry(fis, zipfile);
fis.close();
File tempFileHuffman = File.createTempFile("word2vec", "h");
tempFileHuffman.deleteOnExit();
ZipEntry hP = new ZipEntry("huffman.txt");
zipfile.putNextEntry(hP);
// writing out huffman tree
try (PrintWriter writer = new PrintWriter(new FileWriter(tempFileHuffman))) {
for (int i = 0; i < vectors.getVocab().numWords(); i++) {
VocabWord word = vectors.getVocab().elementAtIndex(i);
StringBuilder builder = new StringBuilder(encodeB64(word.getLabel())).append(" ");
for (int point : word.getPoints()) {
builder.append(point).append(" ");
}
writer.println(builder.toString().trim());
}
}
fis = new BufferedInputStream(new FileInputStream(tempFileHuffman));
writeEntry(fis, zipfile);
fis.close();
File tempFileFreqs = File.createTempFile("word2vec", "f");
tempFileFreqs.deleteOnExit();
ZipEntry hF = new ZipEntry("frequencies.txt");
zipfile.putNextEntry(hF);
// writing out word frequencies
try (PrintWriter writer = new PrintWriter(new FileWriter(tempFileFreqs))) {
for (int i = 0; i < vectors.getVocab().numWords(); i++) {
VocabWord word = vectors.getVocab().elementAtIndex(i);
StringBuilder builder = new StringBuilder(encodeB64(word.getLabel())).append(" ").append(word.getElementFrequency()).append(" ").append(vectors.getVocab().docAppearedIn(word.getLabel()));
writer.println(builder.toString().trim());
}
}
fis = new BufferedInputStream(new FileInputStream(tempFileFreqs));
writeEntry(fis, zipfile);
fis.close();
ZipEntry config = new ZipEntry("config.json");
zipfile.putNextEntry(config);
//log.info("Current config: {}", vectors.getConfiguration().toJson());
writeEntry(new ByteArrayInputStream(vectors.getConfiguration().toJson().getBytes()), zipfile);
zipfile.flush();
zipfile.close();
try {
tempFileCodes.delete();
tempFileFreqs.delete();
tempFileHuffman.delete();
tempFileSyn0.delete();
tempFileSyn1.delete();
tempFileSyn1Neg.delete();
} catch (Exception e) {
//
}
}
use of java.util.zip.ZipEntry in project deeplearning4j by deeplearning4j.
the class WordVectorSerializer method readWord2Vec.
/**
* This method restores Word2Vec model previously saved with writeWord2VecModel
*
* PLEASE NOTE: This method loads FULL model, so don't use it if you're only going to use weights.
*
* @param file
* @return
* @throws IOException
*/
@Deprecated
public static Word2Vec readWord2Vec(File file) throws IOException {
File tmpFileSyn0 = File.createTempFile("word2vec", "0");
File tmpFileSyn1 = File.createTempFile("word2vec", "1");
File tmpFileC = File.createTempFile("word2vec", "c");
File tmpFileH = File.createTempFile("word2vec", "h");
File tmpFileF = File.createTempFile("word2vec", "f");
tmpFileSyn0.deleteOnExit();
tmpFileSyn1.deleteOnExit();
tmpFileH.deleteOnExit();
tmpFileC.deleteOnExit();
tmpFileF.deleteOnExit();
int originalFreq = Nd4j.getMemoryManager().getOccasionalGcFrequency();
boolean originalPeriodic = Nd4j.getMemoryManager().isPeriodicGcActive();
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(false);
Nd4j.getMemoryManager().setOccasionalGcFrequency(50000);
try {
ZipFile zipFile = new ZipFile(file);
ZipEntry syn0 = zipFile.getEntry("syn0.txt");
InputStream stream = zipFile.getInputStream(syn0);
Files.copy(stream, Paths.get(tmpFileSyn0.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
ZipEntry syn1 = zipFile.getEntry("syn1.txt");
stream = zipFile.getInputStream(syn1);
Files.copy(stream, Paths.get(tmpFileSyn1.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
ZipEntry codes = zipFile.getEntry("codes.txt");
stream = zipFile.getInputStream(codes);
Files.copy(stream, Paths.get(tmpFileC.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
ZipEntry huffman = zipFile.getEntry("huffman.txt");
stream = zipFile.getInputStream(huffman);
Files.copy(stream, Paths.get(tmpFileH.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
ZipEntry config = zipFile.getEntry("config.json");
stream = zipFile.getInputStream(config);
StringBuilder builder = new StringBuilder();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream))) {
String line;
while ((line = reader.readLine()) != null) {
builder.append(line);
}
}
VectorsConfiguration configuration = VectorsConfiguration.fromJson(builder.toString().trim());
// we read first 4 files as w2v model
Word2Vec w2v = readWord2VecFromText(tmpFileSyn0, tmpFileSyn1, tmpFileC, tmpFileH, configuration);
// we read frequencies from frequencies.txt, however it's possible that we might not have this file
ZipEntry frequencies = zipFile.getEntry("frequencies.txt");
if (frequencies != null) {
stream = zipFile.getInputStream(frequencies);
try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream))) {
String line;
while ((line = reader.readLine()) != null) {
String[] split = line.split(" ");
VocabWord word = w2v.getVocab().tokenFor(decodeB64(split[0]));
word.setElementFrequency((long) Double.parseDouble(split[1]));
word.setSequencesCount((long) Double.parseDouble(split[2]));
}
}
}
ZipEntry zsyn1Neg = zipFile.getEntry("syn1Neg.txt");
if (zsyn1Neg != null) {
stream = zipFile.getInputStream(zsyn1Neg);
try (InputStreamReader isr = new InputStreamReader(stream);
BufferedReader reader = new BufferedReader(isr)) {
String line = null;
List<INDArray> rows = new ArrayList<>();
while ((line = reader.readLine()) != null) {
String[] split = line.split(" ");
double[] array = new double[split.length];
for (int i = 0; i < split.length; i++) {
array[i] = Double.parseDouble(split[i]);
}
rows.add(Nd4j.create(array));
}
// it's possible to have full model without syn1Neg
if (rows.size() > 0) {
INDArray syn1Neg = Nd4j.vstack(rows);
((InMemoryLookupTable) w2v.getLookupTable()).setSyn1Neg(syn1Neg);
}
}
}
return w2v;
} finally {
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(true);
Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
}
}
Aggregations