use of java.nio.charset.CharsetDecoder in project lucene-solr by apache.
the class JapaneseTokenizerFactory method inform.
@Override
public void inform(ResourceLoader loader) throws IOException {
if (userDictionaryPath != null) {
try (InputStream stream = loader.openResource(userDictionaryPath)) {
String encoding = userDictionaryEncoding;
if (encoding == null) {
encoding = IOUtils.UTF_8;
}
CharsetDecoder decoder = Charset.forName(encoding).newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
Reader reader = new InputStreamReader(stream, decoder);
userDictionary = UserDictionary.open(reader);
}
} else {
userDictionary = null;
}
}
use of java.nio.charset.CharsetDecoder in project lucene-solr by apache.
the class ConnectionCostsBuilder method build.
public static ConnectionCostsWriter build(String filename) throws IOException {
FileInputStream inputStream = new FileInputStream(filename);
Charset cs = StandardCharsets.US_ASCII;
CharsetDecoder decoder = cs.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
InputStreamReader streamReader = new InputStreamReader(inputStream, decoder);
LineNumberReader lineReader = new LineNumberReader(streamReader);
String line = lineReader.readLine();
String[] dimensions = line.split("\\s+");
assert dimensions.length == 2;
int forwardSize = Integer.parseInt(dimensions[0]);
int backwardSize = Integer.parseInt(dimensions[1]);
assert forwardSize > 0 && backwardSize > 0;
ConnectionCostsWriter costs = new ConnectionCostsWriter(forwardSize, backwardSize);
while ((line = lineReader.readLine()) != null) {
String[] fields = line.split("\\s+");
assert fields.length == 3;
int forwardId = Integer.parseInt(fields[0]);
int backwardId = Integer.parseInt(fields[1]);
int cost = Integer.parseInt(fields[2]);
costs.add(forwardId, backwardId, cost);
}
return costs;
}
use of java.nio.charset.CharsetDecoder in project tika by apache.
the class TextExtractor method pushBytes.
// Decodes the buffered bytes in pendingBytes
// into UTF16 code units, and sends the characters
// to the out ContentHandler, if we are in the body,
// else appends the characters to the pendingBuffer
private void pushBytes() throws IOException, SAXException, TikaException {
if (pendingByteCount > 0 && (!groupState.ignore || nextMetaData != null)) {
final CharsetDecoder decoder = getDecoder();
pendingByteBuffer.limit(pendingByteCount);
assert pendingByteBuffer.position() == 0;
assert outputBuffer.position() == 0;
while (true) {
// We pass true for endOfInput because, when
// we are called, we should have seen a
// complete sequence of characters for this
// charset:
final CoderResult result = decoder.decode(pendingByteBuffer, outputBuffer, true);
final int pos = outputBuffer.position();
if (pos > 0) {
if (inHeader || fieldState == 1) {
pendingBuffer.append(outputArray, 0, pos);
} else {
lazyStartParagraph();
out.characters(outputArray, 0, pos);
}
outputBuffer.position(0);
}
if (result == CoderResult.UNDERFLOW) {
break;
}
}
while (true) {
final CoderResult result = decoder.flush(outputBuffer);
final int pos = outputBuffer.position();
if (pos > 0) {
if (inHeader || fieldState == 1) {
pendingBuffer.append(outputArray, 0, pos);
} else {
lazyStartParagraph();
out.characters(outputArray, 0, pos);
}
outputBuffer.position(0);
}
if (result == CoderResult.UNDERFLOW) {
break;
}
}
// Reset for next decode
decoder.reset();
pendingByteBuffer.position(0);
}
pendingByteCount = 0;
}
use of java.nio.charset.CharsetDecoder in project bnd by bndtools.
the class UTF8Properties method decode.
private String decode(byte[] buffer) throws IOException {
ByteBuffer bb = ByteBuffer.wrap(buffer);
CharBuffer cb = CharBuffer.allocate(buffer.length * 4);
for (CharsetDecoder decoder : decoders) {
boolean success = !decoder.decode(bb, cb, true).isError();
if (success) {
decoder.flush(cb);
}
decoder.reset();
if (success) {
return cb.flip().toString();
}
bb.rewind();
cb.clear();
}
// default decoding
return new String(buffer);
}
use of java.nio.charset.CharsetDecoder in project Essentials by drtshock.
the class EssentialsConf method load.
public synchronized void load() {
if (pendingDiskWrites.get() != 0) {
LOGGER.log(Level.INFO, "File {0} not read, because it''s not yet written to disk.", configFile);
return;
}
if (!configFile.getParentFile().exists()) {
if (!configFile.getParentFile().mkdirs()) {
LOGGER.log(Level.SEVERE, tl("failedToCreateConfig", configFile.toString()));
}
}
// This will delete files where the first character is 0. In most cases they are broken.
if (configFile.exists() && configFile.length() != 0) {
try {
final InputStream input = new FileInputStream(configFile);
try {
if (input.read() == 0) {
input.close();
configFile.delete();
}
} catch (IOException ex) {
LOGGER.log(Level.SEVERE, null, ex);
} finally {
try {
input.close();
} catch (IOException ex) {
LOGGER.log(Level.SEVERE, null, ex);
}
}
} catch (FileNotFoundException ex) {
LOGGER.log(Level.SEVERE, null, ex);
}
}
if (!configFile.exists()) {
if (legacyFileExists()) {
convertLegacyFile();
} else if (altFileExists()) {
convertAltFile();
} else if (templateName != null) {
LOGGER.log(Level.INFO, tl("creatingConfigFromTemplate", configFile.toString()));
createFromTemplate();
} else {
return;
}
}
try {
try (FileInputStream inputStream = new FileInputStream(configFile)) {
long startSize = configFile.length();
if (startSize > Integer.MAX_VALUE) {
throw new InvalidConfigurationException("File too big");
}
ByteBuffer buffer = ByteBuffer.allocate((int) startSize);
int length;
while ((length = inputStream.read(bytebuffer)) != -1) {
if (length > buffer.remaining()) {
ByteBuffer resize = ByteBuffer.allocate(buffer.capacity() + length - buffer.remaining());
int resizePosition = buffer.position();
buffer.rewind();
resize.put(buffer);
resize.position(resizePosition);
buffer = resize;
}
buffer.put(bytebuffer, 0, length);
}
buffer.rewind();
final CharBuffer data = CharBuffer.allocate(buffer.capacity());
CharsetDecoder decoder = UTF8.newDecoder();
CoderResult result = decoder.decode(buffer, data, true);
if (result.isError()) {
buffer.rewind();
data.clear();
LOGGER.log(Level.INFO, "File " + configFile.getAbsolutePath() + " is not utf-8 encoded, trying " + Charset.defaultCharset().displayName());
decoder = Charset.defaultCharset().newDecoder();
result = decoder.decode(buffer, data, true);
if (result.isError()) {
throw new InvalidConfigurationException("Invalid Characters in file " + configFile.getAbsolutePath());
} else {
decoder.flush(data);
}
} else {
decoder.flush(data);
}
final int end = data.position();
data.rewind();
super.loadFromString(data.subSequence(0, end).toString());
}
} catch (IOException ex) {
LOGGER.log(Level.SEVERE, ex.getMessage(), ex);
} catch (InvalidConfigurationException ex) {
File broken = new File(configFile.getAbsolutePath() + ".broken." + System.currentTimeMillis());
configFile.renameTo(broken);
LOGGER.log(Level.SEVERE, "The file " + configFile.toString() + " is broken, it has been renamed to " + broken.toString(), ex.getCause());
}
}
Aggregations