use of i2p.susi.util.ReadBuffer in project i2p.i2p by i2p.
the class HeaderLine method decode.
/**
* Decode all the header lines, up through \r\n\r\n,
* and puts them in the ReadBuffer, including the \r\n\r\n
*/
public void decode(InputStream in, Buffer bout) throws IOException {
OutputStream out = bout.getOutputStream();
boolean linebreak = false;
boolean lastCharWasQuoted = false;
byte[] encodedWord = null;
// we support one char of pushback,
// to catch some simple malformed input
int pushbackChar = 0;
boolean hasPushback = false;
while (true) {
int c;
if (hasPushback) {
c = pushbackChar;
hasPushback = false;
// Debug.debug(Debug.DEBUG, "Loop " + count + " Using pbchar(dec) " + c);
} else {
c = in.read();
if (c < 0)
break;
}
if (c == '=') {
// Sadly, base64 can be a lot longer
if (encodedWord == null)
encodedWord = new byte[DECODE_MAX];
int offset = 0;
int f1 = 0, f2 = 0, f3 = 0, f4 = 0;
encodedWord[offset++] = (byte) c;
// but for the most part it gets thrown out, as RFC 2047 allows
for (; offset < DECODE_MAX; offset++) {
c = in.read();
if (c == '?') {
if (f1 == 0)
f1 = offset;
else if (f2 == 0)
f2 = offset;
else if (f3 == 0)
f3 = offset;
else if (f4 == 0)
f4 = offset;
} else if (c == -1) {
break;
} else if (c == '\r' || c == '\n') {
pushbackChar = c;
hasPushback = true;
break;
} else if (offset == 1) {
// no '?' after '='
out.write('=');
pushbackChar = c;
hasPushback = true;
break;
}
encodedWord[offset] = (byte) c;
// store one past the 4th '?', presumably the '='
if (f4 > 0 && offset >= f4 + 1) {
if (c == '=') {
offset++;
} else {
pushbackChar = c;
hasPushback = true;
}
break;
}
}
// net.i2p.util.HexDump.dump(encodedWord, 0, offset));
if (f4 == 0) {
// at most 1 byte is pushed back
if (f1 == 0) {
// This is normal
continue;
} else if (f2 == 0) {
// =? but no more ?
// output what we buffered
Debug.debug(Debug.DEBUG, "2nd '?' not found");
for (int i = 0; i < offset; i++) {
out.write(encodedWord[i] & 0xff);
}
continue;
} else if (f3 == 0) {
// discard what we buffered
Debug.debug(Debug.DEBUG, "3rd '?' not found");
continue;
} else {
// probably just too long, but could be end of line without the "?=".
// synthesize a 4th '?' in an attempt to output
// something, probably with some trailing garbage
Debug.debug(Debug.DEBUG, "4th '?' not found");
f4 = offset + 1;
// keep going and output what we have
}
}
/*
* 4th question mark found, we are complete, so lets start
*/
String enc = (encodedWord[f2 + 1] == 'Q' || encodedWord[f2 + 1] == 'q') ? "quoted-printable" : ((encodedWord[f2 + 1] == 'B' || encodedWord[f2 + 1] == 'b') ? "base64" : null);
// System.err.println( "4th ? found at " + f4 + ", encoding=" + enc );
if (enc != null) {
Encoding e = EncodingFactory.getEncoding(enc);
if (e != null) {
try {
// System.err.println( "decode(" + (f3 + 1) + "," + ( f4 - f3 - 1 ) + ")" );
ReadBuffer tmpIn = new ReadBuffer(encodedWord, f3 + 1, f4 - f3 - 1);
// decoded won't be longer than encoded
MemoryBuffer tmp = new MemoryBuffer(f4 - f3 - 1);
try {
e.decode(tmpIn, tmp);
} catch (EOFException eof) {
// Keep going and output what we got, if any
if (Debug.getLevel() >= Debug.DEBUG) {
Debug.debug(Debug.DEBUG, "q-w " + enc, eof);
Debug.debug(Debug.DEBUG, net.i2p.util.HexDump.dump(encodedWord));
}
}
tmp.writeComplete(true);
// get charset
String charset = new String(encodedWord, f1 + 1, f2 - f1 - 1, "ISO-8859-1");
String clc = charset.toLowerCase(Locale.US);
if (clc.equals("utf-8") || clc.equals("utf8")) {
// FIXME could be more efficient?
InputStream tis = tmp.getInputStream();
if (enc.equals("quoted-printable")) {
int d;
while ((d = tis.read()) != -1) {
out.write(d == '_' ? 32 : d);
}
} else {
DataHelper.copy(tis, out);
}
} else {
// FIXME could be more efficient?
// decode string
String decoded = new String(tmp.getContent(), tmp.getOffset(), tmp.getLength(), charset);
// encode string
byte[] utf8 = DataHelper.getUTF8(decoded);
if (enc.equals("quoted-printable")) {
for (int j = 0; j < utf8.length; j++) {
byte d = utf8[j];
out.write(d == '_' ? 32 : d);
}
} else {
out.write(utf8);
}
}
lastCharWasQuoted = true;
continue;
} catch (IOException e1) {
Debug.debug(Debug.ERROR, "q-w " + enc, e1);
if (Debug.getLevel() >= Debug.DEBUG) {
Debug.debug(Debug.DEBUG, net.i2p.util.HexDump.dump(encodedWord));
}
} catch (RuntimeException e1) {
Debug.debug(Debug.ERROR, "q-w " + enc, e1);
if (Debug.getLevel() >= Debug.DEBUG) {
Debug.debug(Debug.DEBUG, net.i2p.util.HexDump.dump(encodedWord));
}
}
} else {
// can't happen
Debug.debug(Debug.DEBUG, "No decoder for " + enc);
}
// e != null
} else {
Debug.debug(Debug.DEBUG, "Invalid encoding '" + (char) encodedWord[f2 + 1] + '\'');
}
// enc != null
} else // c == '='
if (c == '\r') {
if ((c = in.read()) == '\n') {
/*
* delay linebreak in case of long line
*/
linebreak = true;
} else {
// pushback?
Debug.debug(Debug.DEBUG, "No \\n after \\r");
}
}
// swallow whitespace here if lastCharWasQuoted
if (linebreak) {
linebreak = false;
for (int i = 0; ; i++) {
c = in.read();
if (c == -1)
break;
if (c != ' ' && c != '\t') {
if (i == 0) {
/*
* new line does not start with whitespace, so its not a new part of a
* long line
*/
out.write('\r');
out.write('\n');
if (c == '\r') {
linebreak = true;
// \n
in.read();
break;
}
} else {
// treat all preceding whitespace as a single one
if (!lastCharWasQuoted)
out.write(' ');
}
pushbackChar = c;
hasPushback = true;
break;
}
/*
* skip whitespace
*/
}
// if \r\n\r\n, we are done
if (linebreak)
break;
} else {
/*
* print out everything else literally
*/
out.write(c);
lastCharWasQuoted = false;
}
}
// while true
if (linebreak) {
out.write('\r');
out.write('\n');
}
bout.writeComplete(true);
}
Aggregations