use of java.nio.charset.CharsetEncoder in project zxing by zxing.
the class PDF417HighLevelEncoder method determineConsecutiveBinaryCount.
/**
* Determines the number of consecutive characters that are encodable using binary compaction.
*
* @param msg the message
* @param startpos the start position within the message
* @param encoding the charset used to convert the message to a byte array
* @return the requested character count
*/
private static int determineConsecutiveBinaryCount(String msg, int startpos, Charset encoding) throws WriterException {
CharsetEncoder encoder = encoding.newEncoder();
int len = msg.length();
int idx = startpos;
while (idx < len) {
char ch = msg.charAt(idx);
int numericCount = 0;
while (numericCount < 13 && isDigit(ch)) {
numericCount++;
//textCount++;
int i = idx + numericCount;
if (i >= len) {
break;
}
ch = msg.charAt(i);
}
if (numericCount >= 13) {
return idx - startpos;
}
ch = msg.charAt(idx);
if (!encoder.canEncode(ch)) {
throw new WriterException("Non-encodable character detected: " + ch + " (Unicode: " + (int) ch + ')');
}
idx++;
}
return idx - startpos;
}
use of java.nio.charset.CharsetEncoder in project cogcomp-nlp by CogComp.
the class StringTransformationCleanup method normalizeToEncoding.
/**
* tries to normalize string to specified encoding. The number of characters returned should be
* the same, and tokens should remain contiguous in the output; non-recognized characters will
* be substituted for *something*.
*/
public static StringTransformation normalizeToEncoding(StringTransformation stringTransformation, Charset encoding) {
String startStr = stringTransformation.getTransformedText();
CharsetEncoder encoder = encoding.newEncoder();
if (!encoder.canEncode(startStr)) {
final int length = startStr.length();
int charNum = 0;
for (int offset = 0; offset < length; ) {
// do something with the codepoint
Pair<Boolean, Integer> replacement = normalizeCharacter(startStr, encoding, offset);
Character replacedChar = (char) replacement.getSecond().intValue();
if (null != replacedChar) {
stringTransformation.transformString(charNum, charNum + 1, String.valueOf(replacedChar));
charNum++;
}
offset += Character.charCount(replacedChar);
}
}
return stringTransformation;
}
use of java.nio.charset.CharsetEncoder in project intellij-community by JetBrains.
the class PyElementGeneratorImpl method createStringLiteralFromString.
@Override
public PyStringLiteralExpression createStringLiteralFromString(@Nullable PsiFile destination, @NotNull String unescaped, final boolean preferUTF8) {
boolean useDouble = !unescaped.contains("\"");
boolean useMulti = unescaped.matches(".*(\r|\n).*");
String quotes;
if (useMulti) {
quotes = useDouble ? "\"\"\"" : "'''";
} else {
quotes = useDouble ? "\"" : "'";
}
StringBuilder buf = new StringBuilder(unescaped.length() * 2);
buf.append(quotes);
VirtualFile vfile = destination == null ? null : destination.getVirtualFile();
Charset charset;
if (vfile == null) {
charset = (preferUTF8 ? CharsetToolkit.UTF8_CHARSET : Charset.forName("US-ASCII"));
} else {
charset = vfile.getCharset();
}
CharsetEncoder encoder = charset.newEncoder();
Formatter formatter = new Formatter(buf);
boolean unicode = false;
for (int i = 0; i < unescaped.length(); i++) {
int c = unescaped.codePointAt(i);
if (c == '"' && useDouble) {
buf.append("\\\"");
} else if (c == '\'' && !useDouble) {
buf.append("\\'");
} else if ((c == '\r' || c == '\n') && !useMulti) {
if (c == '\r') {
buf.append("\\r");
} else if (c == '\n')
buf.append("\\n");
} else if (!encoder.canEncode(new String(Character.toChars(c)))) {
if (c <= 0xff) {
formatter.format("\\x%02x", c);
} else if (c < 0xffff) {
unicode = true;
formatter.format("\\u%04x", c);
} else {
unicode = true;
formatter.format("\\U%08x", c);
}
} else {
buf.appendCodePoint(c);
}
}
buf.append(quotes);
if (unicode)
buf.insert(0, "u");
return createStringLiteralAlreadyEscaped(buf.toString());
}
use of java.nio.charset.CharsetEncoder in project android_frameworks_base by AOSPA.
the class StrictJarManifest method write.
/**
* Writes out the attribute information of the specified manifest to the
* specified {@code OutputStream}
*
* @param manifest
* the manifest to write out.
* @param out
* The {@code OutputStream} to write to.
* @throws IOException
* If an error occurs writing the {@code StrictJarManifest}.
*/
static void write(StrictJarManifest manifest, OutputStream out) throws IOException {
CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder();
ByteBuffer buffer = ByteBuffer.allocate(LINE_LENGTH_LIMIT);
Attributes.Name versionName = Attributes.Name.MANIFEST_VERSION;
String version = manifest.mainAttributes.getValue(versionName);
if (version == null) {
versionName = Attributes.Name.SIGNATURE_VERSION;
version = manifest.mainAttributes.getValue(versionName);
}
if (version != null) {
writeEntry(out, versionName, version, encoder, buffer);
Iterator<?> entries = manifest.mainAttributes.keySet().iterator();
while (entries.hasNext()) {
Attributes.Name name = (Attributes.Name) entries.next();
if (!name.equals(versionName)) {
writeEntry(out, name, manifest.mainAttributes.getValue(name), encoder, buffer);
}
}
}
out.write(LINE_SEPARATOR);
Iterator<String> i = manifest.getEntries().keySet().iterator();
while (i.hasNext()) {
String key = i.next();
writeEntry(out, Attributes.Name.NAME, key, encoder, buffer);
Attributes attributes = manifest.entries.get(key);
Iterator<?> entries = attributes.keySet().iterator();
while (entries.hasNext()) {
Attributes.Name name = (Attributes.Name) entries.next();
writeEntry(out, name, attributes.getValue(name), encoder, buffer);
}
out.write(LINE_SEPARATOR);
}
}
use of java.nio.charset.CharsetEncoder in project presto by prestodb.
the class PrestoConnection method setSessionProperty.
/**
* Adds a session property (experimental).
*/
public void setSessionProperty(String name, String value) {
requireNonNull(name, "name is null");
requireNonNull(value, "value is null");
checkArgument(!name.isEmpty(), "name is empty");
CharsetEncoder charsetEncoder = US_ASCII.newEncoder();
checkArgument(name.indexOf('=') < 0, "Session property name must not contain '=': %s", name);
checkArgument(charsetEncoder.canEncode(name), "Session property name is not US_ASCII: %s", name);
checkArgument(charsetEncoder.canEncode(value), "Session property value is not US_ASCII: %s", value);
sessionProperties.put(name, value);
}
Aggregations