use of java.util.zip.ZipFile in project deeplearning4j by deeplearning4j.
the class WordVectorSerializer method readParagraphVectors.
/**
* This method restores ParagraphVectors model previously saved with writeParagraphVectors()
*
* @return
*/
public static ParagraphVectors readParagraphVectors(File file) throws IOException {
File tmpFileL = File.createTempFile("paravec", "l");
tmpFileL.deleteOnExit();
Word2Vec w2v = readWord2Vec(file);
// and "convert" it to ParaVec model + optionally trying to restore labels information
ParagraphVectors vectors = new ParagraphVectors.Builder(w2v.getConfiguration()).vocabCache(w2v.getVocab()).lookupTable(w2v.getLookupTable()).resetModel(false).build();
ZipFile zipFile = new ZipFile(file);
// now we try to restore labels information
ZipEntry labels = zipFile.getEntry("labels.txt");
if (labels != null) {
InputStream stream = zipFile.getInputStream(labels);
Files.copy(stream, Paths.get(tmpFileL.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
try (BufferedReader reader = new BufferedReader(new FileReader(tmpFileL))) {
String line;
while ((line = reader.readLine()) != null) {
VocabWord word = vectors.getVocab().tokenFor(decodeB64(line.trim()));
if (word != null) {
word.markAsLabel(true);
}
}
}
}
vectors.extractLabels();
return vectors;
}
use of java.util.zip.ZipFile in project deeplearning4j by deeplearning4j.
the class WordVectorSerializer method loadStaticModel.
/**
* This method restores previously saved w2v model. File can be in one of the following formats:
* 1) Binary model, either compressed or not. Like well-known Google Model
* 2) Popular CSV word2vec text format
* 3) DL4j compressed format
*
* In return you get StaticWord2Vec model, which might be used as lookup table only in multi-gpu environment.
*
* @param file File should point to previously saved w2v model
* @return
*/
// TODO: this method needs better name :)
public static WordVectors loadStaticModel(File file) {
if (!file.exists() || file.isDirectory())
throw new RuntimeException(new FileNotFoundException("File [" + file.getAbsolutePath() + "] was not found"));
int originalFreq = Nd4j.getMemoryManager().getOccasionalGcFrequency();
boolean originalPeriodic = Nd4j.getMemoryManager().isPeriodicGcActive();
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(false);
Nd4j.getMemoryManager().setOccasionalGcFrequency(50000);
CompressedRamStorage<Integer> storage = new CompressedRamStorage.Builder<Integer>().useInplaceCompression(false).setCompressor(new NoOp()).emulateIsAbsent(false).build();
VocabCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();
// if zip - that's dl4j format
try {
log.debug("Trying DL4j format...");
File tmpFileSyn0 = File.createTempFile("word2vec", "syn");
ZipFile zipFile = new ZipFile(file);
ZipEntry syn0 = zipFile.getEntry("syn0.txt");
InputStream stream = zipFile.getInputStream(syn0);
Files.copy(stream, Paths.get(tmpFileSyn0.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
storage.clear();
try (Reader reader = new CSVReader(tmpFileSyn0)) {
while (reader.hasNext()) {
Pair<VocabWord, float[]> pair = reader.next();
VocabWord word = pair.getFirst();
storage.store(word.getIndex(), pair.getSecond());
vocabCache.addToken(word);
vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
Nd4j.getMemoryManager().invokeGcOccasionally();
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(true);
Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
}
} catch (Exception e) {
//
try {
// try to load file as text csv
vocabCache = new AbstractCache.Builder<VocabWord>().build();
storage.clear();
log.debug("Trying CSVReader...");
try (Reader reader = new CSVReader(file)) {
while (reader.hasNext()) {
Pair<VocabWord, float[]> pair = reader.next();
VocabWord word = pair.getFirst();
storage.store(word.getIndex(), pair.getSecond());
vocabCache.addToken(word);
vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
Nd4j.getMemoryManager().invokeGcOccasionally();
}
} catch (Exception ef) {
// we throw away this exception, and trying to load data as binary model
throw new RuntimeException(ef);
} finally {
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(true);
Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
}
} catch (Exception ex) {
// otherwise it's probably google model. which might be compressed or not
log.debug("Trying BinaryReader...");
vocabCache = new AbstractCache.Builder<VocabWord>().build();
storage.clear();
try (Reader reader = new BinaryReader(file)) {
while (reader.hasNext()) {
Pair<VocabWord, float[]> pair = reader.next();
VocabWord word = pair.getFirst();
storage.store(word.getIndex(), pair.getSecond());
vocabCache.addToken(word);
vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
Nd4j.getMemoryManager().invokeGcOccasionally();
}
} catch (Exception ez) {
throw new RuntimeException("Unable to guess input file format");
} finally {
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(true);
Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
}
} finally {
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(true);
Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
}
}
StaticWord2Vec word2Vec = new StaticWord2Vec.Builder(storage, vocabCache).build();
return word2Vec;
}
use of java.util.zip.ZipFile in project cogtool by cogtool.
the class ZipUtil method unzip.
/**
* Unzips a zipfile to a destination directory.
* @param zip the file to unzip
* @param fileDir the destination directory for zipfile contents
* @throws FileNotFoundException
* @throws IOException
*/
public static void unzip(ZipFile zip, File fileDir) throws FileNotFoundException, IOException {
// Read out all entries from ZipFile via input streams
for (Enumeration<? extends ZipEntry> en = zip.entries(); en.hasMoreElements(); ) {
ZipEntry ze = en.nextElement();
// Get info from file entry
long size = ze.getSize();
// Create File in fileDir for unpacked entry
String name = ze.getName();
// System.out.println("Unzipping: " + name);
File zeFile = new File(fileDir, name);
// Check for a trailing slash to see if this is a directory entry
if (name.charAt(name.length() - 1) == '/') {
// If this entry is a directory, make it
zeFile.mkdirs();
} else {
// if this entry is a file, make its parent directories, then it
zeFile.getParentFile().mkdirs();
zeFile.createNewFile();
// Create plus OutputStream to the new file
FileOutputStream fout = null;
OutputStream out = null;
// Get ZipInputStream for reading data
InputStream zin = null;
try {
fout = new FileOutputStream(zeFile);
out = new BufferedOutputStream(fout);
zin = zip.getInputStream(ze);
// Set modification time
zeFile.setLastModified(ze.getTime());
// Copy data from zin to out, 100k at a time
int chunkSize = 100 * 1024;
byte[] buff = new byte[chunkSize];
int len = chunkSize;
for (; size > 0; size -= len) {
if (size < chunkSize) {
len = (int) size;
} else {
len = chunkSize;
}
int actualBytes = 0;
int off = 0;
do {
actualBytes = zin.read(buff, off, len);
if (actualBytes == -1) {
out.write(buff, off, len);
// System.out.print("!" + len + ':' + actualBytes + ':' + off + ' ');
throw new RuntimeException("Bad math in unzip!");
} else {
out.write(buff, off, actualBytes);
// System.out.print("" + len + ':' + actualBytes + ':' + off + ' ');
}
len -= actualBytes;
size -= actualBytes;
off += actualBytes;
} while ((len > 0));
}
} finally {
// Close the streams
if (fout != null) {
if (out != null) {
if (zin != null) {
zin.close();
}
out.close();
}
fout.close();
}
}
}
}
}
use of java.util.zip.ZipFile in project buck by facebook.
the class ZipWriteTest method main.
public static void main(String[] args) throws IOException, InterruptedException {
try (CustomZipOutputStream zipOut = ZipOutputStreams.newOutputStream(Paths.get("/dev/null"), ZipOutputStreams.HandleDuplicates.APPEND_TO_ZIP)) {
try (ZipFile zipIn = new ZipFile(new File(args[0]))) {
for (Enumeration<? extends ZipEntry> entries = zipIn.entries(); entries.hasMoreElements(); ) {
ZipEntry entry = entries.nextElement();
ZipEntry newEntry = new ZipEntry(entry);
if (entry.getMethod() == ZipEntry.DEFLATED) {
newEntry.setCompressedSize(-1);
}
zipOut.putNextEntry(newEntry);
InputStream inputStream = zipIn.getInputStream(entry);
ByteStreams.copy(inputStream, zipOut);
zipOut.closeEntry();
}
}
}
System.gc();
Thread.sleep(TimeUnit.SECONDS.toMillis(1));
System.gc();
Thread.sleep(TimeUnit.SECONDS.toMillis(5));
}
use of java.util.zip.ZipFile in project buck by facebook.
the class GenAidlIntegrationTest method rootDirectoryDoesntChangeBuild.
@Test
public void rootDirectoryDoesntChangeBuild() throws IOException {
AssumeAndroidPlatform.assumeSdkIsAvailable();
ProjectWorkspace workspace = TestDataHelper.createProjectWorkspaceForScenario(this, "cached_build", tmp);
workspace.setUp();
Path outputOne = workspace.buildAndReturnOutput("//:AService");
ProjectWorkspace workspaceTwo = TestDataHelper.createProjectWorkspaceForScenario(this, "cached_build", tmp2);
workspaceTwo.setUp();
Path outputTwo = workspaceTwo.buildAndReturnOutput("//:AService");
assertEquals(workspace.getBuildLog().getRuleKey("//:AService"), workspaceTwo.getBuildLog().getRuleKey("//:AService"));
try (ZipFile zipOne = new ZipFile(outputOne.toFile());
ZipFile zipTwo = new ZipFile(outputTwo.toFile())) {
Enumeration<? extends ZipEntry> entriesOne = zipOne.entries(), entriesTwo = zipTwo.entries();
while (entriesOne.hasMoreElements()) {
assertTrue(entriesTwo.hasMoreElements());
ZipEntry entryOne = entriesOne.nextElement(), entryTwo = entriesTwo.nextElement();
// Compare data first, otherwise crc difference will cause a failure and you don't get to
// see the actual difference.
assertEquals(zipEntryData(zipOne, entryOne), zipEntryData(zipTwo, entryTwo));
assertEquals(zipEntryDebugString(entryOne), zipEntryDebugString(entryTwo));
}
assertFalse(entriesTwo.hasMoreElements());
}
assertEquals(new String(Files.readAllBytes(outputOne)), new String(Files.readAllBytes(outputTwo)));
}
Aggregations