下面列出了java.text.Normalizer#normalize ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
static String normalizeText(String text) {
String ret = new String(text);
// replace spaces, double-quotes, percentage, ® with spaces
ret = ret.replaceAll("[\\s\"%®]", " ");
// replace vbar with 'I' for "Deiodinase type || (DIO2)"
ret = ret.replaceAll("\\|", "I");
// replace multiplication mark '×' with 'x'
ret = ret.replaceAll("×", "x");
// sharp-s to beta
ret = ret.replaceAll("ß", "β");
// replace '·' with '.'
ret = ret.replaceAll("·", ".");
// remove '±' with '+'
ret = ret.replaceAll("±", "+");
// remove ending whitespaces
ret = ret.replaceAll("\\s+$", "");
// remove non ASCII characters
// ret = nonAscii.replaceFrom(ret, ' ');
// replace diacritical marks plus symbols that look alike, see
// http://stackoverflow.com/questions/20674577/how-to-compare-unicode-characters-that-look-alike
ret = Normalizer.normalize(ret, Normalizer.Form.NFKD);
ret = diacriticalMarksPattern.matcher(ret).replaceAll("");
return ret;
}
/**
* Filter input.
*
* @param input the input
* @param filerNewline if new lines (ENTER) should be filtered
* @return the string
*/
public static String filterInput(String input, boolean filerNewline) {
input = input.replace((char)1, ' ');
input = input.replace((char)2, ' ');
input = input.replace((char)9, ' ');
input = input.replace((char)10, ' ');
input = input.replace((char)12, ' ');
if (filerNewline) {
input = input.replace((char)13, ' ');
}
if (GameConfiguration.getInstance().getBoolean("normalise.input.strings")) {
input = Normalizer.normalize(input, Normalizer.Form.NFD);
}
return input;
}
/**
* removes redundant slashes and checks for invalid characters
* @param uncleanPath path to check
* @return normalized path
* @throws InvalidPathException if <code>uncleanPath</code> contains invalid characters
*/
static String normalizeAndCheck(final String uncleanPath) {
String unicodeNormalizedUncleanPath = Normalizer.normalize(uncleanPath, Normalizer.Form.NFC);
char prevChar = 0;
final boolean afterSeparator = false;
for (int i = 0; i < unicodeNormalizedUncleanPath.length(); i++) {
final char c = unicodeNormalizedUncleanPath.charAt(i);
checkCharacter(unicodeNormalizedUncleanPath, c, afterSeparator);
if (c == SEPARATOR && prevChar == SEPARATOR) {
return normalize(unicodeNormalizedUncleanPath, unicodeNormalizedUncleanPath.length(), i - 1);
}
prevChar = c;
}
if (prevChar == SEPARATOR) {
//remove final slash
return normalize(unicodeNormalizedUncleanPath, unicodeNormalizedUncleanPath.length(),
unicodeNormalizedUncleanPath.length() - 1);
}
return unicodeNormalizedUncleanPath;
}
private String normalizePctEncoding(String encoded) {
int cidx = Arrays.binarySearch(common_pct, encoded);
if (cidx >= 0 && isMember(unreserved, common[cidx])) {
return new String(Character.toChars(common[cidx])); // quickly decode unreserved encodings
} else if (cidx >= 0) {
return encoded; // pass through reserved encodings
}
String decoded = pctDecode(encoded);
String ns = Normalizer.normalize(decoded, Normalizer.Form.NFC);
StringBuilder sb = new StringBuilder(ns.length());
for (int c = 0, n = ns.codePointCount(0, ns.length()); c < n; c++) {
int chr = ns.codePointAt(ns.offsetByCodePoints(0, c));
if (isMember(unreserved, chr)) {
sb.appendCodePoint(chr);
} else if (n == 1) {
return toUpperCase(encoded);
} else {
sb.append(pctEncode(chr));
}
}
return sb.toString();
}
public static byte[] normalize(String pin) {
pin = pin.trim();
if (PinString.allNumeric(pin)) {
pin = PinString.toArabic(pin);
}
pin = Normalizer.normalize(pin, Normalizer.Form.NFKD);
return pin.getBytes(StandardCharsets.UTF_8);
}
/**
* Get the character that is produced by combining the dead key producing accent
* with the key producing character c.
* For example, getDeadChar('`', 'e') returns è.
* getDeadChar('^', ' ') returns '^' and getDeadChar('^', '^') returns '^'.
*
* @param accent The accent character. eg. '`'
* @param c The basic character.
* @return The combined character, or 0 if the characters cannot be combined.
*/
public static int getDeadChar(int accent, int c) {
if (c == accent || CHAR_SPACE == c) {
// The same dead character typed twice or a dead character followed by a
// space should both produce the non-combining version of the combining char.
// In this case we don't even need to compute the combining character.
return accent;
}
int combining = sAccentToCombining.get(accent);
if (combining == 0) {
return 0;
}
final int combination = (combining << 16) | c;
int combined;
synchronized (sDeadKeyCache) {
combined = sDeadKeyCache.get(combination, -1);
if (combined == -1) {
sDeadKeyBuilder.setLength(0);
sDeadKeyBuilder.append((char)c);
sDeadKeyBuilder.append((char)combining);
String result = Normalizer.normalize(sDeadKeyBuilder, Normalizer.Form.NFC);
combined = result.codePointCount(0, result.length()) == 1
? result.codePointAt(0) : 0;
sDeadKeyCache.put(combination, combined);
}
}
return combined;
}
@SuppressWarnings("NewApi")
private MasterSeed(byte[] bip39RawEntropy, String bip39Passphrase, byte[] bip32MasterSeed) {
_bip39RawEntropy = bip39RawEntropy;
_bip39Passphrase = Normalizer.normalize(bip39Passphrase, Normalizer.Form.NFKD);
_bip32MasterSeed = bip32MasterSeed;
_wordListType = ENGLISH_WORD_LIST_TYPE;
}
@Override
public boolean isNameCompatible(String cn, JavaFileObject.Kind kind) {
cn.getClass();
// null check
if (kind == Kind.OTHER && getKind() != kind) {
return false;
}
String n = cn + kind.extension;
if (name.equals(n)) {
return true;
}
if (isMacOS && Normalizer.isNormalized(name, Normalizer.Form.NFD)
&& Normalizer.isNormalized(n, Normalizer.Form.NFC)) {
// On Mac OS X it is quite possible to file name and class
// name normalized in a different way - in that case we have to normalize file name
// to the Normal Form Compised (NFC)
String normName = Normalizer.normalize(name, Normalizer.Form.NFC);
if (normName.equals(n)) {
this.name = normName;
return true;
}
}
if (name.equalsIgnoreCase(n)) {
try {
// allow for Windows
return file.getCanonicalFile().getName().equals(n);
} catch (IOException e) {
}
}
return false;
}
public static String slugify(String input) {
String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("").replace('-', '_');
return slug;
}
/**
* The pattern is converted to normalizedD form and then a pure group
* is constructed to match canonical equivalences of the characters.
*/
private void normalize() {
boolean inCharClass = false;
int lastCodePoint = -1;
// Convert pattern into normalizedD form
normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
patternLength = normalizedPattern.length();
// Modify pattern to match canonical equivalences
StringBuilder newPattern = new StringBuilder(patternLength);
for(int i=0; i<patternLength; ) {
int c = normalizedPattern.codePointAt(i);
StringBuilder sequenceBuffer;
if ((Character.getType(c) == Character.NON_SPACING_MARK)
&& (lastCodePoint != -1)) {
sequenceBuffer = new StringBuilder();
sequenceBuffer.appendCodePoint(lastCodePoint);
sequenceBuffer.appendCodePoint(c);
while(Character.getType(c) == Character.NON_SPACING_MARK) {
i += Character.charCount(c);
if (i >= patternLength)
break;
c = normalizedPattern.codePointAt(i);
sequenceBuffer.appendCodePoint(c);
}
String ea = produceEquivalentAlternation(
sequenceBuffer.toString());
newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint));
newPattern.append("(?:").append(ea).append(")");
} else if (c == '[' && lastCodePoint != '\\') {
i = normalizeCharClass(newPattern, i);
} else {
newPattern.appendCodePoint(c);
}
lastCodePoint = c;
i += Character.charCount(c);
}
normalizedPattern = newPattern.toString();
}
/**
* Attempts to compose input by combining the first character
* with the first combining mark following it. Returns a String
* that is the composition of the leading character with its first
* combining mark followed by the remaining combining marks. Returns
* null if the first two characters cannot be further composed.
*/
private String composeOneStep(String input) {
int len = countChars(input, 0, 2);
String firstTwoCharacters = input.substring(0, len);
String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);
if (result.equals(firstTwoCharacters))
return null;
else {
String remainder = input.substring(len);
return result + remainder;
}
}
public static SmsMsg fromIntent(Intent intent) {
SmsMessage[] smsMessageParts = SmsMessageUtils.fromIntent(intent);
String sender = smsMessageParts[0].getDisplayOriginatingAddress();
String body = SmsMessageUtils.getMessageBody(smsMessageParts);
sender = Normalizer.normalize(sender, Normalizer.Form.NFC);
body = Normalizer.normalize(body, Normalizer.Form.NFC);
SmsMsg message = new SmsMsg();
message.setSender(sender).setBody(body);
return message;
}
/**
* Converts specified string to it's slug representation, which can be used to generate readable and SEO-friendly
* URLs.
*
* @param input string, which will be converted.
* @return slug representation of string, which can be used to generate readable and SEO-friendly
* URLs.
*/
public static String toSlug(String input) {
String transliterated = transliterator.transform(input);
String noWhitespace = WHITESPACE.matcher(transliterated).replaceAll("-");
String normalized = Normalizer.normalize(noWhitespace, Normalizer.Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
slug = EDGESDHASHES.matcher(slug).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}
private String generateContextPath(String domainName) {
String nfdNormalizedString = Normalizer.normalize(domainName, Normalizer.Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
domainName = pattern.matcher(nfdNormalizedString).replaceAll("");
return domainName.toLowerCase().trim().replaceAll("\\s{1,}", "-");
}
/**
* Returns the main class for a module. The query is either a module name
* or module-name/main-class. For the former then the module's main class
* is obtained from the module descriptor (MainClass attribute).
*/
private static Class<?> loadModuleMainClass(String what) {
int i = what.indexOf('/');
String mainModule;
String mainClass;
if (i == -1) {
mainModule = what;
mainClass = null;
} else {
mainModule = what.substring(0, i);
mainClass = what.substring(i+1);
}
// main module is in the boot layer
ModuleLayer layer = ModuleLayer.boot();
Optional<Module> om = layer.findModule(mainModule);
if (!om.isPresent()) {
// should not happen
throw new InternalError("Module " + mainModule + " not in boot Layer");
}
Module m = om.get();
// get main class
if (mainClass == null) {
Optional<String> omc = m.getDescriptor().mainClass();
if (!omc.isPresent()) {
abort(null, "java.launcher.module.error1", mainModule);
}
mainClass = omc.get();
}
// load the class from the module
Class<?> c = null;
try {
c = Class.forName(m, mainClass);
if (c == null && System.getProperty("os.name", "").contains("OS X")
&& Normalizer.isNormalized(mainClass, Normalizer.Form.NFD)) {
String cn = Normalizer.normalize(mainClass, Normalizer.Form.NFC);
c = Class.forName(m, cn);
}
} catch (LinkageError le) {
abort(null, "java.launcher.module.error3", mainClass, m.getName(),
le.getClass().getName() + ": " + le.getLocalizedMessage());
}
if (c == null) {
abort(null, "java.launcher.module.error2", mainClass, mainModule);
}
System.setProperty("jdk.module.main.class", c.getName());
return c;
}
protected static String normalize(String uri) {
return Normalizer.normalize(uri, Normalizer.Form.NFC);
}
private static ByteBuffer normalize(String s) {
String v = Normalizer.normalize(s, Normalizer.Form.NFC);
return StandardCharsets.UTF_8.encode(CharBuffer.wrap(v));
}
private static String normalize(CharSequence str) {
String string = Normalizer.normalize(str, Normalizer.Form.NFD);
string = string.replaceAll("[^\\p{ASCII}]", "_");
return string.toLowerCase(Locale.ENGLISH);
}
public static String slugify(String input) {
String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}
/**
* returns the given String in unicode NFC normal form.
*
* @param arg0 String to be normalized
* @see Normalizer#normalize(CharSequence, java.text.Normalizer.Form)
*/
public static String normalizeUnicode(String arg0) {
return Normalizer.normalize(arg0, Normalizer.Form.NFC);
}