下面列出了怎么用java.text.Normalizer的API类实例代码及写法,或者点击链接到github查看源代码。
@Nullable
public String evaluate(@Nullable String str, @Nullable String form) {
if (str == null) {
return null;
}
if (form == null) {
return Normalizer.normalize(str, Normalizer.Form.NFC);
} else if ("NFC".equals(form)) {
return Normalizer.normalize(str, Normalizer.Form.NFC);
} else if ("NFD".equals(form)) {
return Normalizer.normalize(str, Normalizer.Form.NFD);
} else if ("NFKC".equals(form)) {
return Normalizer.normalize(str, Normalizer.Form.NFKC);
} else if ("NFKD".equals(form)) {
return Normalizer.normalize(str, Normalizer.Form.NFKD);
} else {
return Normalizer.normalize(str, Normalizer.Form.NFC);
}
}
public static String htmlToString(String input) {
String str = input.replaceAll("\n", "");
str = StringEscapeUtils.unescapeHtml4(str);
str = Normalizer.normalize(str, Normalizer.Form.NFD);
str = str.replaceAll("\\P{InBasic_Latin}", "");
while (str.contains("<")) {
int startPos = str.indexOf("<");
int endPos = str.indexOf(">", startPos);
if (endPos > startPos) {
String beforeTag = str.substring(0, startPos);
String afterTag = str.substring(endPos + 1);
str = beforeTag + afterTag;
}
}
return str;
}
@Override
public void request(Message message, Matcher m, Language lg) {
String normalName = Normalizer.normalize(m.group(1).trim(), Normalizer.Form.NFD)
.replaceAll("\\p{InCombiningDiacriticalMarks}+", "").toLowerCase();
String editedName = removeUselessWords(normalName);
BestMatcher matcher = new BestMatcher(normalName);
try {
matcher.evaluateAll(getListTutoFrom(getSearchURL(editedName), message));
if (matcher.isUnique())// We have found it !
message.getChannel().flatMap(chan -> chan
.createMessage(Translator.getLabel(lg, "tutorial.request") + " " +
Constants.dofusPourLesNoobURL + matcher.getBest().getUrl()))
.subscribe();
else if (! matcher.isEmpty()) // Too much tutos
tooMuchTutos.throwException(message, this, lg, matcher.getBests());
else // empty
notFoundTuto.throwException(message, this, lg);
} catch(IOException e){
ExceptionManager.manageIOException(e, message, this, lg, notFoundTuto);
}
}
/**
* Test if a string is in a given normalization form.
* This is semantically equivalent to source.equals(normalize(source, mode)).
*
* Unlike quickCheck(), this function returns a definitive result,
* never a "maybe".
* For NFD, NFKD, and FCD, both functions work exactly the same.
* For NFC and NFKC where quickCheck may return "maybe", this function will
* perform further tests to arrive at a true/false result.
* @param str the input string to be checked to see if it is normalized
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static boolean isNormalized(String str, Normalizer.Form form, int options) {
switch (form) {
case NFC:
return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFD:
return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKC:
return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKD:
return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
private String pctEncodingNormalization(String path) {
if (path == null || path.length() == 0 || path.indexOf('%') < 0) {
return path; // no pct encodings
}
String[] encodings = listPctEncodings(path);
StringBuilder sb = new StringBuilder(path);
int pos = 0;
for (String encoding : encodings) {
int idx = sb.indexOf(encoding, pos);
String decoded = normalizePctEncoding(encoding);
sb.replace(idx, idx + encoding.length(), decoded);
pos += decoded.length();
}
return Normalizer.normalize(sb, Normalizer.Form.NFC);
}
/**
* Test if a string is in a given normalization form.
* This is semantically equivalent to source.equals(normalize(source, mode)).
*
* Unlike quickCheck(), this function returns a definitive result,
* never a "maybe".
* For NFD, NFKD, and FCD, both functions work exactly the same.
* For NFC and NFKC where quickCheck may return "maybe", this function will
* perform further tests to arrive at a true/false result.
* @param str the input string to be checked to see if it is normalized
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static boolean isNormalized(String str, Normalizer.Form form, int options) {
switch (form) {
case NFC:
return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFD:
return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKC:
return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKD:
return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
/**
* Filter input.
*
* @param input the input
* @param filerNewline if new lines (ENTER) should be filtered
* @return the string
*/
public static String filterInput(String input, boolean filerNewline) {
input = input.replace((char)1, ' ');
input = input.replace((char)2, ' ');
input = input.replace((char)9, ' ');
input = input.replace((char)10, ' ');
input = input.replace((char)12, ' ');
if (filerNewline) {
input = input.replace((char)13, ' ');
}
if (GameConfiguration.getInstance().getBoolean("normalise.input.strings")) {
input = Normalizer.normalize(input, Normalizer.Form.NFD);
}
return input;
}
private static String getHash(int iterations, byte[] salt, String text, String algorithm)
throws NoSuchAlgorithmException {
MessageDigest digest;
if (--iterations < 0) {
iterations = 0;
}
byte[] data;
digest = MessageDigest.getInstance(algorithm);
text = Normalizer.normalize(text, Form.NFC);
if (salt != null) {
digest.update(salt);
}
data = digest.digest(text.getBytes(StandardCharsets.UTF_8));
for (int i = 0; i < iterations; i++) {
data = digest.digest(data);
}
return toHexString(data);
}
/**
* Test if a string is in a given normalization form.
* This is semantically equivalent to source.equals(normalize(source, mode)).
*
* Unlike quickCheck(), this function returns a definitive result,
* never a "maybe".
* For NFD, NFKD, and FCD, both functions work exactly the same.
* For NFC and NFKC where quickCheck may return "maybe", this function will
* perform further tests to arrive at a true/false result.
* @param str the input string to be checked to see if it is normalized
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static boolean isNormalized(String str, Normalizer.Form form, int options) {
switch (form) {
case NFC:
return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFD:
return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKC:
return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKD:
return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
public static byte[] normalize(String pin) {
pin = pin.trim();
if (PinString.allNumeric(pin)) {
pin = PinString.toArabic(pin);
}
pin = Normalizer.normalize(pin, Normalizer.Form.NFKD);
return pin.getBytes(StandardCharsets.UTF_8);
}
@Description("Transforms the string to normalized form")
@ScalarFunction
@LiteralParameters({"x", "y"})
@SqlType(StandardTypes.VARCHAR)
public static Slice normalize(@SqlType("varchar(x)") Slice slice, @SqlType("varchar(y)") Slice form)
{
Normalizer.Form targetForm;
try {
targetForm = Normalizer.Form.valueOf(form.toStringUtf8());
}
catch (IllegalArgumentException e) {
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Normalization form must be one of [NFD, NFC, NFKD, NFKC]");
}
return utf8Slice(Normalizer.normalize(slice.toStringUtf8(), targetForm));
}
@Override
public boolean isNameCompatible(String cn, JavaFileObject.Kind kind) {
cn.getClass();
// null check
if (kind == Kind.OTHER && getKind() != kind) {
return false;
}
String n = cn + kind.extension;
if (name.equals(n)) {
return true;
}
if (isMacOS && Normalizer.isNormalized(name, Normalizer.Form.NFD)
&& Normalizer.isNormalized(n, Normalizer.Form.NFC)) {
// On Mac OS X it is quite possible to file name and class
// name normalized in a different way - in that case we have to normalize file name
// to the Normal Form Compised (NFC)
String normName = Normalizer.normalize(name, Normalizer.Form.NFC);
if (normName.equals(n)) {
this.name = normName;
return true;
}
}
if (name.equalsIgnoreCase(n)) {
try {
// allow for Windows
return file.getCanonicalFile().getName().equals(n);
} catch (IOException e) {
}
}
return false;
}
public static String flattenToAscii(String string) {
StringBuilder sb = new StringBuilder(string.length());
string = Normalizer.normalize(string, Normalizer.Form.NFD);
for (char c : string.toCharArray()) {
if (c <= '\u007F') {
sb.append(c);
}
}
return sb.toString();
}
/**
* Get the character that is produced by combining the dead key producing accent
* with the key producing character c.
* For example, getDeadChar('`', 'e') returns è.
* getDeadChar('^', ' ') returns '^' and getDeadChar('^', '^') returns '^'.
*
* @param accent The accent character. eg. '`'
* @param c The basic character.
* @return The combined character, or 0 if the characters cannot be combined.
*/
public static int getDeadChar(int accent, int c) {
if (c == accent || CHAR_SPACE == c) {
// The same dead character typed twice or a dead character followed by a
// space should both produce the non-combining version of the combining char.
// In this case we don't even need to compute the combining character.
return accent;
}
int combining = sAccentToCombining.get(accent);
if (combining == 0) {
return 0;
}
final int combination = (combining << 16) | c;
int combined;
synchronized (sDeadKeyCache) {
combined = sDeadKeyCache.get(combination, -1);
if (combined == -1) {
sDeadKeyBuilder.setLength(0);
sDeadKeyBuilder.append((char)c);
sDeadKeyBuilder.append((char)combining);
String result = Normalizer.normalize(sDeadKeyBuilder, Normalizer.Form.NFC);
combined = result.codePointCount(0, result.length()) == 1
? result.codePointAt(0) : 0;
sDeadKeyCache.put(combination, combined);
}
}
return combined;
}
@SuppressWarnings("NewApi")
private MasterSeed(byte[] bip39RawEntropy, String bip39Passphrase, byte[] bip32MasterSeed) {
_bip39RawEntropy = bip39RawEntropy;
_bip39Passphrase = Normalizer.normalize(bip39Passphrase, Normalizer.Form.NFKD);
_bip32MasterSeed = bip32MasterSeed;
_wordListType = ENGLISH_WORD_LIST_TYPE;
}
/**
* Generate a master seed from a BIP39 word list.
* <p/>
* This method does not check whether the check sum of the word list id valid
*
* @param wordList the word list
* @param passphrase the optional passphrase
* @return the BIP32 master seed
*/
@SuppressWarnings("NewApi")
public static MasterSeed generateSeedFromWordList(List<String> wordList, String passphrase) {
// Null passphrase defaults to the empty string
if (passphrase == null) {
passphrase = "";
}
// Concatenate all words using a single space as separator
StringBuilder sb = new StringBuilder();
for (String s : wordList) {
sb.append(s).append(' ');
}
String mnemonic = sb.toString().trim();
// The salt is is the passphrase with a prefix
String salt = BASE_SALT + passphrase;
// Calculate and return the seed
byte[] seed;
try {
byte[] saltBytes = Normalizer.normalize(salt, Normalizer.Form.NFKD).getBytes(UTF8);
seed = PBKDF.pbkdf2(ALGORITHM, mnemonic.getBytes(UTF8), saltBytes, REPETITIONS, BIP32_SEED_LENGTH);
} catch (UnsupportedEncodingException | GeneralSecurityException e) {
// UTF-8 should be supported by every system we run on
throw new RuntimeException(e);
}
return new MasterSeed(wordListToRawEntropy(wordList.toArray(new String[0])), passphrase, seed);
}
private static String doNormalise(String input)
{
if (Build.VERSION.SDK_INT >= 9) {
return Normalizer.normalize(input, Normalizer.Form.NFC);
}
return input;
}
static String normalizeName(@Nullable String idOrName) {
return idOrName == null
? ""
: Normalizer.normalize(idOrName, Normalizer.Form.NFD)
.replaceAll("[^A-Za-z0-9]", "")
.toLowerCase();
}
@SuppressWarnings("NewApi")
private MasterSeed(byte[] bip39RawEntropy, String bip39Passphrase, byte[] bip32MasterSeed) {
_bip39RawEntropy = bip39RawEntropy;
_bip39Passphrase = Normalizer.normalize(bip39Passphrase, Normalizer.Form.NFKD);
_bip32MasterSeed = bip32MasterSeed;
_wordListType = ENGLISH_WORD_LIST_TYPE;
}
@Override
public CharSequence normalize(final CharSequence name) {
if(!Normalizer.isNormalized(name, form)) {
// Canonical decomposition followed by canonical composition (default)
final String normalized = Normalizer.normalize(name, form);
if(log.isDebugEnabled()) {
log.debug(String.format("Normalized string %s to %s", name, normalized));
}
return normalized;
}
return name;
}
private static Set<String> addNonAccentVersion(Set<String> surfaceForms) {
// remove all the accents in the surface forms and add that new label
Set<String> normalizedLabels = new HashSet<String>();
for (String surfaceForm : surfaceForms) {
String normalized = Normalizer.normalize(surfaceForm, Normalizer.Form.NFD);
normalized = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
if (!normalized.equals(surfaceForm)) {
normalizedLabels.add(normalized);
}
}
surfaceForms.addAll(normalizedLabels);
return surfaceForms;
}
/**
* Normalizes a <code>String</code> using the given normalization form.
*
* @param str the input string to be normalized.
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static String normalize(String str, Normalizer.Form form, int options) {
int len = str.length();
boolean asciiOnly = true;
if (len < 80) {
for (int i = 0; i < len; i++) {
if (str.charAt(i) > 127) {
asciiOnly = false;
break;
}
}
} else {
char[] a = str.toCharArray();
for (int i = 0; i < len; i++) {
if (a[i] > 127) {
asciiOnly = false;
break;
}
}
}
switch (form) {
case NFC :
return asciiOnly ? str : NFC.normalize(str, options);
case NFD :
return asciiOnly ? str : NFD.normalize(str, options);
case NFKC :
return asciiOnly ? str : NFKC.normalize(str, options);
case NFKD :
return asciiOnly ? str : NFKD.normalize(str, options);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
long format, Transferable transferable) throws IOException {
if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass())) {
String charset = Charset.defaultCharset().name();
if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
try {
charset = new String((byte[]) transferable.getTransferData(javaTextEncodingFlavor), StandardCharsets.UTF_8);
} catch (UnsupportedFlavorException cannotHappen) {
}
}
String xml = new String(bytes, charset);
// macosx pasteboard returns a property list that consists of one URL
// let's extract it.
return new URL(extractURL(xml));
}
if(isUriListFlavor(flavor) && format == CF_FILE) {
// dragQueryFile works fine with files and url,
// it parses and extracts values from property list.
// maxosx always returns property list for
// CF_URL and CF_FILE
String[] strings = dragQueryFile(bytes);
if(strings == null) {
return null;
}
bytes = String.join(System.getProperty("line.separator"),
strings).getBytes();
// now we extracted uri from xml, now we should treat it as
// regular string that allows to translate data to target represantation
// class by base method
format = CF_STRING;
} else if (format == CF_STRING) {
bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
}
return super.translateBytes(bytes, flavor, format, transferable);
}
@Override
public boolean isNameCompatible(String cn, JavaFileObject.Kind kind) {
cn.getClass();
// null check
if (kind == Kind.OTHER && getKind() != kind) {
return false;
}
String n = cn + kind.extension;
if (name.equals(n)) {
return true;
}
if (isMacOS && Normalizer.isNormalized(name, Normalizer.Form.NFD)
&& Normalizer.isNormalized(n, Normalizer.Form.NFC)) {
// On Mac OS X it is quite possible to file name and class
// name normalized in a different way - in that case we have to normalize file name
// to the Normal Form Compised (NFC)
String normName = Normalizer.normalize(name, Normalizer.Form.NFC);
if (normName.equals(n)) {
this.name = normName;
return true;
}
}
if (name.equalsIgnoreCase(n)) {
try {
// allow for Windows
return file.getCanonicalFile().getName().equals(n);
} catch (IOException e) {
}
}
return false;
}
public BestMatcher(String base){
this.base = Normalizer.normalize(base.trim(), Normalizer.Form.NFD)
.replaceAll("\\p{InCombiningDiacriticalMarks}+", "")
.toLowerCase();
this.pattern = this.base.split("\\s+");
bestMatches = new ArrayList<>();
bestPoint = 0;
}
@SuppressWarnings("NewApi")
private MasterSeed(byte[] bip39RawEntropy, String bip39Passphrase, byte[] bip32MasterSeed) {
_bip39RawEntropy = bip39RawEntropy;
_bip39Passphrase = Normalizer.normalize(bip39Passphrase, Normalizer.Form.NFKD);
_bip32MasterSeed = bip32MasterSeed;
_wordListType = ENGLISH_WORD_LIST_TYPE;
}
private static String formatMethod(String readable) {
String slug = readable
.replace(".", " ")
.replace(" and ", " ")
.replace(" the ", " ")
.replace(" à ", " ")
.replace(" d'", " ")
.replace(" a ", " ")
.replace(" l'", " ")
.replace(" du ", " ")
.replace(" au ", " ")
.replace(" en ", " ")
.replace(" de ", " ")
.replace(" un ", " ")
.replace(" la ", " ")
.replace(" le ", " ")
.replace(" une ", " ")
.replace(" aux ", " ")
.replace(" des ", " ")
.replace(" pour ", " ")
.replace(" avec ", " ")
.replaceAll("( )+", " ");
String underscore = WHITESPACE.matcher(slug).replaceAll("_");
String normalized = Normalizer.normalize(underscore, Normalizer.Form.NFD);
String latin = NONLATIN.matcher(normalized).replaceAll("").toLowerCase(Locale.ENGLISH);
return CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL, latin);
}
@Override
public boolean isNameCompatible(String cn, JavaFileObject.Kind kind) {
cn.getClass();
// null check
if (kind == Kind.OTHER && getKind() != kind) {
return false;
}
String n = cn + kind.extension;
if (name.equals(n)) {
return true;
}
if (isMacOS && Normalizer.isNormalized(name, Normalizer.Form.NFD)
&& Normalizer.isNormalized(n, Normalizer.Form.NFC)) {
// On Mac OS X it is quite possible to file name and class
// name normalized in a different way - in that case we have to normalize file name
// to the Normal Form Compised (NFC)
String normName = Normalizer.normalize(name, Normalizer.Form.NFC);
if (normName.equals(n)) {
this.name = normName;
return true;
}
}
if (name.equalsIgnoreCase(n)) {
try {
// allow for Windows
return file.getCanonicalFile().getName().equals(n);
} catch (IOException e) {
}
}
return false;
}
/**
* The pattern is converted to normalizedD form and then a pure group
* is constructed to match canonical equivalences of the characters.
*/
private void normalize() {
boolean inCharClass = false;
int lastCodePoint = -1;
// Convert pattern into normalizedD form
normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
patternLength = normalizedPattern.length();
// Modify pattern to match canonical equivalences
StringBuilder newPattern = new StringBuilder(patternLength);
for(int i=0; i<patternLength; ) {
int c = normalizedPattern.codePointAt(i);
StringBuilder sequenceBuffer;
if ((Character.getType(c) == Character.NON_SPACING_MARK)
&& (lastCodePoint != -1)) {
sequenceBuffer = new StringBuilder();
sequenceBuffer.appendCodePoint(lastCodePoint);
sequenceBuffer.appendCodePoint(c);
while(Character.getType(c) == Character.NON_SPACING_MARK) {
i += Character.charCount(c);
if (i >= patternLength)
break;
c = normalizedPattern.codePointAt(i);
sequenceBuffer.appendCodePoint(c);
}
String ea = produceEquivalentAlternation(
sequenceBuffer.toString());
newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint));
newPattern.append("(?:").append(ea).append(")");
} else if (c == '[' && lastCodePoint != '\\') {
i = normalizeCharClass(newPattern, i);
} else {
newPattern.appendCodePoint(c);
}
lastCodePoint = c;
i += Character.charCount(c);
}
normalizedPattern = newPattern.toString();
}
/**
* Attempts to compose input by combining the first character
* with the first combining mark following it. Returns a String
* that is the composition of the leading character with its first
* combining mark followed by the remaining combining marks. Returns
* null if the first two characters cannot be further composed.
*/
private String composeOneStep(String input) {
int len = countChars(input, 0, 2);
String firstTwoCharacters = input.substring(0, len);
String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);
if (result.equals(firstTwoCharacters))
return null;
else {
String remainder = input.substring(len);
return result + remainder;
}
}