use of com.zimbra.cs.service.formatter.ArchiveFormatter.ArchiveInputStream in project zm-mailbox by Zimbra.
the class SpamExtract method extractMessages.
private static List<String> extractMessages(HttpClient hc, GetMethod gm, String path, File outdir, boolean raw) throws HttpException, IOException {
List<String> extractedIds = new ArrayList<String>();
gm.setPath(path);
if (LOG.isDebugEnabled()) {
LOG.debug("Fetching " + path);
}
HttpClientUtil.executeMethod(hc, gm);
if (gm.getStatusCode() != HttpStatus.SC_OK) {
throw new IOException("HTTP GET failed: " + gm.getPath() + ": " + gm.getStatusCode() + ": " + gm.getStatusText());
}
ArchiveInputStream tgzStream = null;
try {
tgzStream = new TarArchiveInputStream(new GZIPInputStream(gm.getResponseBodyAsStream()), Charsets.UTF_8.name());
ArchiveInputEntry entry = null;
while ((entry = tgzStream.getNextEntry()) != null) {
LOG.debug("got entry name %s", entry.getName());
if (entry.getName().endsWith(".meta")) {
ItemData itemData = new ItemData(readArchiveEntry(tgzStream, entry));
UnderlyingData ud = itemData.ud;
//.meta always followed by .eml
entry = tgzStream.getNextEntry();
if (raw) {
// Write the message as-is.
File file = new File(outdir, mOutputPrefix + "-" + mExtractIndex++);
OutputStream os = null;
try {
os = new BufferedOutputStream(new FileOutputStream(file));
byte[] data = readArchiveEntry(tgzStream, entry);
ByteUtil.copy(new ByteArrayInputStream(data), true, os, false);
if (verbose) {
LOG.info("Wrote: " + file);
}
extractedIds.add(ud.id + "");
} catch (java.io.IOException e) {
String fileName = outdir + "/" + mOutputPrefix + "-" + mExtractIndex;
LOG.error("Cannot write to " + fileName, e);
} finally {
if (os != null) {
os.close();
}
}
} else {
// Write the attached message to the output directory.
BufferStream buffer = new BufferStream(entry.getSize(), MAX_BUFFER_SIZE);
buffer.setSequenced(false);
MimeMessage mm = null;
InputStream fis = null;
try {
byte[] data = readArchiveEntry(tgzStream, entry);
ByteUtil.copy(new ByteArrayInputStream(data), true, buffer, false);
if (buffer.isSpooled()) {
fis = new ZSharedFileInputStream(buffer.getFile());
mm = new ZMimeMessage(mJMSession, fis);
} else {
mm = new ZMimeMessage(mJMSession, buffer.getInputStream());
}
writeAttachedMessages(mm, outdir, entry.getName());
extractedIds.add(ud.id + "");
} catch (MessagingException me) {
LOG.warn("exception occurred fetching message", me);
} finally {
ByteUtil.closeStream(fis);
}
}
}
}
} finally {
Closeables.closeQuietly(tgzStream);
}
return extractedIds;
}
use of com.zimbra.cs.service.formatter.ArchiveFormatter.ArchiveInputStream in project zm-mailbox by Zimbra.
the class SpamExtract method extractMessages.
private static List<String> extractMessages(HttpClientBuilder hc, HttpGet gm, String path, File outdir, boolean raw) throws HttpException, IOException {
List<String> extractedIds = new ArrayList<String>();
HttpClient client = hc.build();
if (LOG.isDebugEnabled()) {
LOG.debug("Fetching " + path);
}
try {
URI uri = new URI(path);
gm.setURI(uri);
} catch (URISyntaxException e) {
LOG.warn("exception occurred in URI path", e);
}
HttpResponse httpResp = HttpClientUtil.executeMethod(client, gm);
if (httpResp.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
throw new IOException("HTTP GET failed: " + gm.getRequestLine().getUri() + ": " + httpResp.getStatusLine().getStatusCode() + ": " + httpResp.getStatusLine().getReasonPhrase());
}
try (ArchiveInputStream tgzStream = new TarArchiveInputStream(new GZIPInputStream(httpResp.getEntity().getContent()), Charsets.UTF_8.name())) {
ArchiveInputEntry entry = null;
while ((entry = tgzStream.getNextEntry()) != null) {
LOG.debug("got entry name %s", entry.getName());
if (entry.getName().endsWith(".meta")) {
ItemData itemData = new ItemData(readArchiveEntry(tgzStream, entry));
UnderlyingData ud = itemData.ud;
// .meta always followed
entry = tgzStream.getNextEntry();
// by .eml
if (raw) {
// Write the message as-is.
File file = new File(outdir, mOutputPrefix + "-" + mExtractIndex++);
OutputStream os = null;
try {
os = new BufferedOutputStream(new FileOutputStream(file));
byte[] data = readArchiveEntry(tgzStream, entry);
ByteUtil.copy(new ByteArrayInputStream(data), true, os, false);
if (verbose) {
LOG.info("Wrote: " + file);
}
extractedIds.add(ud.id + "");
} catch (java.io.IOException e) {
String fileName = outdir + "/" + mOutputPrefix + "-" + mExtractIndex;
LOG.error("Cannot write to " + fileName, e);
} finally {
if (os != null) {
os.close();
}
}
} else {
// Write the attached message to the output directory.
BufferStream buffer = new BufferStream(entry.getSize(), MAX_BUFFER_SIZE);
buffer.setSequenced(false);
MimeMessage mm = null;
InputStream fis = null;
try {
byte[] data = readArchiveEntry(tgzStream, entry);
ByteUtil.copy(new ByteArrayInputStream(data), true, buffer, false);
if (buffer.isSpooled()) {
fis = new ZSharedFileInputStream(buffer.getFile());
mm = new ZMimeMessage(mJMSession, fis);
} else {
mm = new ZMimeMessage(mJMSession, buffer.getInputStream());
}
writeAttachedMessages(mm, outdir, entry.getName());
extractedIds.add(ud.id + "");
} catch (MessagingException me) {
LOG.warn("exception occurred fetching message", me);
} finally {
ByteUtil.closeStream(fis);
}
}
}
}
}
return extractedIds;
}
use of com.zimbra.cs.service.formatter.ArchiveFormatter.ArchiveInputStream in project zm-mailbox by Zimbra.
the class ContactTest method testTruncatedContactsTgzImport.
@Test
public void testTruncatedContactsTgzImport() throws IOException {
File file = new File(MailboxTestUtil.getZimbraServerDir("") + "src/java-test/Truncated.tgz");
System.out.println(file.getAbsolutePath());
InputStream is = new FileInputStream(file);
ArchiveInputStream ais = new TarArchiveInputStream(new GZIPInputStream(is), "UTF-8");
ArchiveInputEntry aie;
boolean errorCaught = false;
while ((aie = ais.getNextEntry()) != null) {
try {
ArchiveFormatter.readArchiveEntry(ais, aie);
} catch (IOException e) {
e.printStackTrace();
errorCaught = true;
break;
}
}
Assert.assertTrue(errorCaught);
}
Aggregations