Search in sources :

Example 61 with CharsetEncoder

use of java.nio.charset.CharsetEncoder in project zxing by zxing.

the class PDF417HighLevelEncoder method determineConsecutiveBinaryCount.

/**
   * Determines the number of consecutive characters that are encodable using binary compaction.
   *
   * @param msg      the message
   * @param startpos the start position within the message
   * @param encoding the charset used to convert the message to a byte array
   * @return the requested character count
   */
private static int determineConsecutiveBinaryCount(String msg, int startpos, Charset encoding) throws WriterException {
    CharsetEncoder encoder = encoding.newEncoder();
    int len = msg.length();
    int idx = startpos;
    while (idx < len) {
        char ch = msg.charAt(idx);
        int numericCount = 0;
        while (numericCount < 13 && isDigit(ch)) {
            numericCount++;
            //textCount++;
            int i = idx + numericCount;
            if (i >= len) {
                break;
            }
            ch = msg.charAt(i);
        }
        if (numericCount >= 13) {
            return idx - startpos;
        }
        ch = msg.charAt(idx);
        if (!encoder.canEncode(ch)) {
            throw new WriterException("Non-encodable character detected: " + ch + " (Unicode: " + (int) ch + ')');
        }
        idx++;
    }
    return idx - startpos;
}
Also used : CharsetEncoder(java.nio.charset.CharsetEncoder) WriterException(com.google.zxing.WriterException)

Example 62 with CharsetEncoder

use of java.nio.charset.CharsetEncoder in project cogcomp-nlp by CogComp.

the class StringTransformationCleanup method normalizeToEncoding.

/**
     * tries to normalize string to specified encoding. The number of characters returned should be
     * the same, and tokens should remain contiguous in the output; non-recognized characters will
     * be substituted for *something*.
     */
public static StringTransformation normalizeToEncoding(StringTransformation stringTransformation, Charset encoding) {
    String startStr = stringTransformation.getTransformedText();
    CharsetEncoder encoder = encoding.newEncoder();
    if (!encoder.canEncode(startStr)) {
        final int length = startStr.length();
        int charNum = 0;
        for (int offset = 0; offset < length; ) {
            // do something with the codepoint
            Pair<Boolean, Integer> replacement = normalizeCharacter(startStr, encoding, offset);
            Character replacedChar = (char) replacement.getSecond().intValue();
            if (null != replacedChar) {
                stringTransformation.transformString(charNum, charNum + 1, String.valueOf(replacedChar));
                charNum++;
            }
            offset += Character.charCount(replacedChar);
        }
    }
    return stringTransformation;
}
Also used : CharsetEncoder(java.nio.charset.CharsetEncoder)

Example 63 with CharsetEncoder

use of java.nio.charset.CharsetEncoder in project intellij-community by JetBrains.

the class PyElementGeneratorImpl method createStringLiteralFromString.

@Override
public PyStringLiteralExpression createStringLiteralFromString(@Nullable PsiFile destination, @NotNull String unescaped, final boolean preferUTF8) {
    boolean useDouble = !unescaped.contains("\"");
    boolean useMulti = unescaped.matches(".*(\r|\n).*");
    String quotes;
    if (useMulti) {
        quotes = useDouble ? "\"\"\"" : "'''";
    } else {
        quotes = useDouble ? "\"" : "'";
    }
    StringBuilder buf = new StringBuilder(unescaped.length() * 2);
    buf.append(quotes);
    VirtualFile vfile = destination == null ? null : destination.getVirtualFile();
    Charset charset;
    if (vfile == null) {
        charset = (preferUTF8 ? CharsetToolkit.UTF8_CHARSET : Charset.forName("US-ASCII"));
    } else {
        charset = vfile.getCharset();
    }
    CharsetEncoder encoder = charset.newEncoder();
    Formatter formatter = new Formatter(buf);
    boolean unicode = false;
    for (int i = 0; i < unescaped.length(); i++) {
        int c = unescaped.codePointAt(i);
        if (c == '"' && useDouble) {
            buf.append("\\\"");
        } else if (c == '\'' && !useDouble) {
            buf.append("\\'");
        } else if ((c == '\r' || c == '\n') && !useMulti) {
            if (c == '\r') {
                buf.append("\\r");
            } else if (c == '\n')
                buf.append("\\n");
        } else if (!encoder.canEncode(new String(Character.toChars(c)))) {
            if (c <= 0xff) {
                formatter.format("\\x%02x", c);
            } else if (c < 0xffff) {
                unicode = true;
                formatter.format("\\u%04x", c);
            } else {
                unicode = true;
                formatter.format("\\U%08x", c);
            }
        } else {
            buf.appendCodePoint(c);
        }
    }
    buf.append(quotes);
    if (unicode)
        buf.insert(0, "u");
    return createStringLiteralAlreadyEscaped(buf.toString());
}
Also used : VirtualFile(com.intellij.openapi.vfs.VirtualFile) LightVirtualFile(com.intellij.testFramework.LightVirtualFile) Formatter(java.util.Formatter) Charset(java.nio.charset.Charset) CharsetEncoder(java.nio.charset.CharsetEncoder)

Example 64 with CharsetEncoder

use of java.nio.charset.CharsetEncoder in project android_frameworks_base by AOSPA.

the class StrictJarManifest method write.

/**
     * Writes out the attribute information of the specified manifest to the
     * specified {@code OutputStream}
     *
     * @param manifest
     *            the manifest to write out.
     * @param out
     *            The {@code OutputStream} to write to.
     * @throws IOException
     *             If an error occurs writing the {@code StrictJarManifest}.
     */
static void write(StrictJarManifest manifest, OutputStream out) throws IOException {
    CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder();
    ByteBuffer buffer = ByteBuffer.allocate(LINE_LENGTH_LIMIT);
    Attributes.Name versionName = Attributes.Name.MANIFEST_VERSION;
    String version = manifest.mainAttributes.getValue(versionName);
    if (version == null) {
        versionName = Attributes.Name.SIGNATURE_VERSION;
        version = manifest.mainAttributes.getValue(versionName);
    }
    if (version != null) {
        writeEntry(out, versionName, version, encoder, buffer);
        Iterator<?> entries = manifest.mainAttributes.keySet().iterator();
        while (entries.hasNext()) {
            Attributes.Name name = (Attributes.Name) entries.next();
            if (!name.equals(versionName)) {
                writeEntry(out, name, manifest.mainAttributes.getValue(name), encoder, buffer);
            }
        }
    }
    out.write(LINE_SEPARATOR);
    Iterator<String> i = manifest.getEntries().keySet().iterator();
    while (i.hasNext()) {
        String key = i.next();
        writeEntry(out, Attributes.Name.NAME, key, encoder, buffer);
        Attributes attributes = manifest.entries.get(key);
        Iterator<?> entries = attributes.keySet().iterator();
        while (entries.hasNext()) {
            Attributes.Name name = (Attributes.Name) entries.next();
            writeEntry(out, name, attributes.getValue(name), encoder, buffer);
        }
        out.write(LINE_SEPARATOR);
    }
}
Also used : Attributes(java.util.jar.Attributes) CharsetEncoder(java.nio.charset.CharsetEncoder) ByteBuffer(java.nio.ByteBuffer)

Example 65 with CharsetEncoder

use of java.nio.charset.CharsetEncoder in project presto by prestodb.

the class PrestoConnection method setSessionProperty.

/**
     * Adds a session property (experimental).
     */
public void setSessionProperty(String name, String value) {
    requireNonNull(name, "name is null");
    requireNonNull(value, "value is null");
    checkArgument(!name.isEmpty(), "name is empty");
    CharsetEncoder charsetEncoder = US_ASCII.newEncoder();
    checkArgument(name.indexOf('=') < 0, "Session property name must not contain '=': %s", name);
    checkArgument(charsetEncoder.canEncode(name), "Session property name is not US_ASCII: %s", name);
    checkArgument(charsetEncoder.canEncode(value), "Session property value is not US_ASCII: %s", value);
    sessionProperties.put(name, value);
}
Also used : CharsetEncoder(java.nio.charset.CharsetEncoder)

Aggregations

CharsetEncoder (java.nio.charset.CharsetEncoder)84 ByteBuffer (java.nio.ByteBuffer)43 Charset (java.nio.charset.Charset)27 CharacterCodingException (java.nio.charset.CharacterCodingException)16 CharBuffer (java.nio.CharBuffer)15 CoderResult (java.nio.charset.CoderResult)13 ByteArrayInputStream (java.io.ByteArrayInputStream)6 UnsupportedCharsetException (java.nio.charset.UnsupportedCharsetException)6 IStatus (org.eclipse.core.runtime.IStatus)6 IOException (java.io.IOException)5 InputStream (java.io.InputStream)5 IllegalCharsetNameException (java.nio.charset.IllegalCharsetNameException)5 UnmappableCharacterException (java.nio.charset.UnmappableCharacterException)5 Attributes (java.util.jar.Attributes)5 CoreException (org.eclipse.core.runtime.CoreException)5 Status (org.eclipse.core.runtime.Status)5 OutputStreamWriter (java.io.OutputStreamWriter)4 SequenceInputStream (java.io.SequenceInputStream)4 BufferedWriter (java.io.BufferedWriter)3 OutputStream (java.io.OutputStream)3