use of java.nio.charset.CharsetDecoder in project perun by CESNET.
the class ELIXIRCILogonDNGenerator method truncate.
/**
* Implementation of the general truncating rule outlined in the RCauth Policy Document
* ( https://rcauth.eu/policy ) in section 3.1.2. It takes an RDN as input and checks its
* UTF-8 encoded byte size. In case it's larger than the size provided in the parameters,
* the RDN will get truncated to 61 UTF-8 bytes (or less in case the bordering byte is
* in the middle of a UTF-8 character definition) with RDN_TRUNCATE_SIGN appended to the
* end.
*
* @param rdn Input RDN to be truncated in case it's too large
* @param size The size to which the RDN should be truncated. This value defaults to
* RDN_MAX_SIZE (64 bytes) in case the size provided is less then or equal to 0
* @return Truncated RDN
*/
protected String truncate(String rdn, int size) {
if (size <= 0) {
size = RDN_MAX_SIZE;
}
Charset defaultCharset = Charset.forName("UTF-8");
// only truncate if the RDN exceeds the maximum allowed size
if (rdn.getBytes(defaultCharset).length > size) {
int truncatedSize = size - RDN_TRUNCATE_SIGN.getBytes(defaultCharset).length;
CharsetDecoder cd = defaultCharset.newDecoder();
byte[] sba = rdn.getBytes(defaultCharset);
// Ensure truncating by having byte buffer = DB_FIELD_LENGTH
// len in [B]
ByteBuffer bb = ByteBuffer.wrap(sba, 0, truncatedSize);
// len in [char] <= # [B]
CharBuffer cb = CharBuffer.allocate(truncatedSize);
// Ignore an incomplete character
cd.onMalformedInput(CodingErrorAction.IGNORE);
cd.decode(bb, cb, true);
cd.flush(cb);
rdn = new String(cb.array(), 0, cb.position()) + RDN_TRUNCATE_SIGN;
}
return rdn;
}
use of java.nio.charset.CharsetDecoder in project drill by apache.
the class Text method decode.
private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
use of java.nio.charset.CharsetDecoder in project lucene-solr by apache.
the class SynonymFilterFactory method loadSynonyms.
/**
* Load synonyms with the given {@link SynonymMap.Parser} class.
*/
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
SynonymMap.Parser parser;
Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
try {
parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
} catch (Exception e) {
throw new RuntimeException(e);
}
List<String> files = splitFileNames(synonyms);
for (String file : files) {
decoder.reset();
try (final Reader isr = new InputStreamReader(loader.openResource(file), decoder)) {
parser.parse(isr);
}
}
return parser.build();
}
use of java.nio.charset.CharsetDecoder in project lucene-solr by apache.
the class SynonymGraphFilterFactory method loadSynonyms.
/**
* Load synonyms with the given {@link SynonymMap.Parser} class.
*/
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
SynonymMap.Parser parser;
Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
try {
parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
} catch (Exception e) {
throw new RuntimeException(e);
}
List<String> files = splitFileNames(synonyms);
for (String file : files) {
decoder.reset();
parser.parse(new InputStreamReader(loader.openResource(file), decoder));
}
return parser.build();
}
use of java.nio.charset.CharsetDecoder in project lucene-solr by apache.
the class SolrRequestParsers method parseFormDataContent.
/**
* Given a url-encoded form from POST content (as InputStream), map it into the given map.
* The given InputStream should be buffered!
* @param postContent to be parsed
* @param charset to be used to decode resulting bytes after %-decoding
* @param map place all parameters in this map
*/
@SuppressWarnings({ "fallthrough", "resource" })
static long parseFormDataContent(final InputStream postContent, final long maxLen, Charset charset, final Map<String, String[]> map, boolean supportCharsetParam) throws IOException {
CharsetDecoder charsetDecoder = supportCharsetParam ? null : getCharsetDecoder(charset);
final LinkedList<Object> buffer = supportCharsetParam ? new LinkedList<>() : null;
long len = 0L, keyPos = 0L, valuePos = 0L;
final ByteArrayOutputStream keyStream = new ByteArrayOutputStream(), valueStream = new ByteArrayOutputStream();
ByteArrayOutputStream currentStream = keyStream;
for (; ; ) {
int b = postContent.read();
switch(b) {
// end of stream
case -1:
case // separator
'&':
if (keyStream.size() > 0) {
final byte[] keyBytes = keyStream.toByteArray(), valueBytes = valueStream.toByteArray();
if (Arrays.equals(keyBytes, INPUT_ENCODING_BYTES)) {
// we found a charset declaration in the raw bytes
if (charsetDecoder != null) {
throw new SolrException(ErrorCode.BAD_REQUEST, supportCharsetParam ? ("Query string invalid: duplicate '" + INPUT_ENCODING_KEY + "' (input encoding) key.") : ("Key '" + INPUT_ENCODING_KEY + "' (input encoding) cannot " + "be used in POSTed application/x-www-form-urlencoded form data. " + "To set the input encoding of POSTed form data, use the " + "'Content-Type' header and provide a charset!"));
}
// decode the charset from raw bytes
charset = Charset.forName(decodeChars(valueBytes, keyPos, getCharsetDecoder(CHARSET_US_ASCII)));
charsetDecoder = getCharsetDecoder(charset);
// finally decode all buffered tokens
decodeBuffer(buffer, map, charsetDecoder);
} else if (charsetDecoder == null) {
// we have no charset decoder until now, buffer the keys / values for later processing:
buffer.add(keyBytes);
buffer.add(Long.valueOf(keyPos));
buffer.add(valueBytes);
buffer.add(Long.valueOf(valuePos));
} else {
// we already have a charsetDecoder, so we can directly decode without buffering:
final String key = decodeChars(keyBytes, keyPos, charsetDecoder), value = decodeChars(valueBytes, valuePos, charsetDecoder);
MultiMapSolrParams.addParam(key.trim(), value, map);
}
} else if (valueStream.size() > 0) {
throw new SolrException(ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded invalid: missing key");
}
keyStream.reset();
valueStream.reset();
keyPos = valuePos = len + 1;
currentStream = keyStream;
break;
case // space replacement
'+':
currentStream.write(' ');
break;
case // escape
'%':
final int upper = digit16(b = postContent.read());
len++;
final int lower = digit16(b = postContent.read());
len++;
currentStream.write(((upper << 4) + lower));
break;
case // kv separator
'=':
if (currentStream == keyStream) {
valuePos = len + 1;
currentStream = valueStream;
break;
}
// fall-through
default:
currentStream.write(b);
}
if (b == -1) {
break;
}
len++;
if (len > maxLen) {
throw new SolrException(ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content exceeds upload limit of " + (maxLen / 1024L) + " KB");
}
}
// if we have not seen a charset declaration, decode the buffer now using the default one (UTF-8 or given via Content-Type):
if (buffer != null && !buffer.isEmpty()) {
assert charsetDecoder == null;
decodeBuffer(buffer, map, getCharsetDecoder(charset));
}
return len;
}
Aggregations