java.text.Normalizer#normalize ( )源码实例Demo

下面列出了java.text.Normalizer#normalize ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: bioasq   文件: PubAnnotationConvertUtil.java
static String normalizeText(String text) {
    String ret = new String(text);
    // replace spaces, double-quotes, percentage, ® with spaces
    ret = ret.replaceAll("[\\s\"%®]", " ");
    // replace vbar with 'I' for "Deiodinase type || (DIO2)"
    ret = ret.replaceAll("\\|", "I");
    // replace multiplication mark '×' with 'x'
    ret = ret.replaceAll("×", "x");
    // sharp-s to beta
    ret = ret.replaceAll("ß", "β");
    // replace '·' with '.'
    ret = ret.replaceAll("·", ".");
    // remove '±' with '+'
    ret = ret.replaceAll("±", "+");
    // remove ending whitespaces
    ret = ret.replaceAll("\\s+$", "");
    // remove non ASCII characters
//    ret = nonAscii.replaceFrom(ret, ' ');
    // replace diacritical marks plus symbols that look alike, see
    // http://stackoverflow.com/questions/20674577/how-to-compare-unicode-characters-that-look-alike
    ret = Normalizer.normalize(ret, Normalizer.Form.NFKD);
    ret = diacriticalMarksPattern.matcher(ret).replaceAll("");
    return ret;
  }
 
源代码2 项目: Kepler   文件: StringUtil.java
/**
 * Filter input.
 *
 * @param input the input
 * @param filerNewline if new lines (ENTER) should be filtered
 * @return the string
 */
public static String filterInput(String input, boolean filerNewline) {
    input = input.replace((char)1, ' ');
    input = input.replace((char)2, ' ');
    input = input.replace((char)9, ' ');
    input = input.replace((char)10, ' ');
    input = input.replace((char)12, ' ');

    if (filerNewline) {
        input = input.replace((char)13, ' ');
    }

    if (GameConfiguration.getInstance().getBoolean("normalise.input.strings")) {
        input = Normalizer.normalize(input, Normalizer.Form.NFD);
    }
    
    return input;
}
 
源代码3 项目: mycore   文件: MCRPath.java
/**
 * removes redundant slashes and checks for invalid characters
 * @param uncleanPath path to check
 * @return normalized path
 * @throws InvalidPathException if <code>uncleanPath</code> contains invalid characters
 */
static String normalizeAndCheck(final String uncleanPath) {
    String unicodeNormalizedUncleanPath = Normalizer.normalize(uncleanPath, Normalizer.Form.NFC);

    char prevChar = 0;
    final boolean afterSeparator = false;
    for (int i = 0; i < unicodeNormalizedUncleanPath.length(); i++) {
        final char c = unicodeNormalizedUncleanPath.charAt(i);
        checkCharacter(unicodeNormalizedUncleanPath, c, afterSeparator);
        if (c == SEPARATOR && prevChar == SEPARATOR) {
            return normalize(unicodeNormalizedUncleanPath, unicodeNormalizedUncleanPath.length(), i - 1);
        }
        prevChar = c;
    }
    if (prevChar == SEPARATOR) {
        //remove final slash
        return normalize(unicodeNormalizedUncleanPath, unicodeNormalizedUncleanPath.length(),
            unicodeNormalizedUncleanPath.length() - 1);
    }
    return unicodeNormalizedUncleanPath;
}
 
源代码4 项目: rdf4j   文件: ParsedIRI.java
private String normalizePctEncoding(String encoded) {
	int cidx = Arrays.binarySearch(common_pct, encoded);
	if (cidx >= 0 && isMember(unreserved, common[cidx])) {
		return new String(Character.toChars(common[cidx])); // quickly decode unreserved encodings
	} else if (cidx >= 0) {
		return encoded; // pass through reserved encodings
	}
	String decoded = pctDecode(encoded);
	String ns = Normalizer.normalize(decoded, Normalizer.Form.NFC);
	StringBuilder sb = new StringBuilder(ns.length());
	for (int c = 0, n = ns.codePointCount(0, ns.length()); c < n; c++) {
		int chr = ns.codePointAt(ns.offsetByCodePoints(0, c));
		if (isMember(unreserved, chr)) {
			sb.appendCodePoint(chr);
		} else if (n == 1) {
			return toUpperCase(encoded);
		} else {
			sb.append(pctEncode(chr));
		}
	}
	return sb.toString();
}
 
源代码5 项目: mollyim-android   文件: PinHasher.java
public static byte[] normalize(String pin) {
  pin = pin.trim();

  if (PinString.allNumeric(pin)) {
    pin = PinString.toArabic(pin);
  }

  pin = Normalizer.normalize(pin, Normalizer.Form.NFKD);

  return pin.getBytes(StandardCharsets.UTF_8);
}
 
源代码6 项目: android_9.0.0_r45   文件: KeyCharacterMap.java
/**
 * Get the character that is produced by combining the dead key producing accent
 * with the key producing character c.
 * For example, getDeadChar('`', 'e') returns &egrave;.
 * getDeadChar('^', ' ') returns '^' and getDeadChar('^', '^') returns '^'.
 *
 * @param accent The accent character.  eg. '`'
 * @param c The basic character.
 * @return The combined character, or 0 if the characters cannot be combined.
 */
public static int getDeadChar(int accent, int c) {
    if (c == accent || CHAR_SPACE == c) {
        // The same dead character typed twice or a dead character followed by a
        // space should both produce the non-combining version of the combining char.
        // In this case we don't even need to compute the combining character.
        return accent;
    }

    int combining = sAccentToCombining.get(accent);
    if (combining == 0) {
        return 0;
    }

    final int combination = (combining << 16) | c;
    int combined;
    synchronized (sDeadKeyCache) {
        combined = sDeadKeyCache.get(combination, -1);
        if (combined == -1) {
            sDeadKeyBuilder.setLength(0);
            sDeadKeyBuilder.append((char)c);
            sDeadKeyBuilder.append((char)combining);
            String result = Normalizer.normalize(sDeadKeyBuilder, Normalizer.Form.NFC);
            combined = result.codePointCount(0, result.length()) == 1
                    ? result.codePointAt(0) : 0;
            sDeadKeyCache.put(combination, combined);
        }
    }
    return combined;
}
 
源代码7 项目: AndroidWallet   文件: Bip39.java
@SuppressWarnings("NewApi")
private MasterSeed(byte[] bip39RawEntropy, String bip39Passphrase, byte[] bip32MasterSeed) {
   _bip39RawEntropy = bip39RawEntropy;
   _bip39Passphrase = Normalizer.normalize(bip39Passphrase, Normalizer.Form.NFKD);
   _bip32MasterSeed = bip32MasterSeed;
   _wordListType = ENGLISH_WORD_LIST_TYPE;
}
 
源代码8 项目: openjdk-8   文件: RegularFileObject.java
@Override
public boolean isNameCompatible(String cn, JavaFileObject.Kind kind) {
    cn.getClass();
    // null check
    if (kind == Kind.OTHER && getKind() != kind) {
        return false;
    }
    String n = cn + kind.extension;
    if (name.equals(n)) {
        return true;
    }
    if (isMacOS && Normalizer.isNormalized(name, Normalizer.Form.NFD)
        && Normalizer.isNormalized(n, Normalizer.Form.NFC)) {
        // On Mac OS X it is quite possible to file name and class
        // name normalized in a different way - in that case we have to normalize file name
        // to the Normal Form Compised (NFC)
        String normName = Normalizer.normalize(name, Normalizer.Form.NFC);
        if (normName.equals(n)) {
            this.name = normName;
            return true;
        }
    }

        if (name.equalsIgnoreCase(n)) {
        try {
            // allow for Windows
            return file.getCanonicalFile().getName().equals(n);
        } catch (IOException e) {
        }
    }
    return false;
}
 
源代码9 项目: BowlerStudio   文件: BowlerStudioMenu.java
public static String slugify(String input) {
	String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("").replace('-', '_');
    
    return slug;
}
 
源代码10 项目: ReScue   文件: ReScuePattern.java
/**
 * The pattern is converted to normalizedD form and then a pure group
 * is constructed to match canonical equivalences of the characters.
 */
private void normalize() {
    boolean inCharClass = false;
    int lastCodePoint = -1;

    // Convert pattern into normalizedD form
    normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
    patternLength = normalizedPattern.length();

    // Modify pattern to match canonical equivalences
    StringBuilder newPattern = new StringBuilder(patternLength);
    for(int i=0; i<patternLength; ) {
        int c = normalizedPattern.codePointAt(i);
        StringBuilder sequenceBuffer;
        if ((Character.getType(c) == Character.NON_SPACING_MARK)
            && (lastCodePoint != -1)) {
            sequenceBuffer = new StringBuilder();
            sequenceBuffer.appendCodePoint(lastCodePoint);
            sequenceBuffer.appendCodePoint(c);
            while(Character.getType(c) == Character.NON_SPACING_MARK) {
                i += Character.charCount(c);
                if (i >= patternLength)
                    break;
                c = normalizedPattern.codePointAt(i);
                sequenceBuffer.appendCodePoint(c);
            }
            String ea = produceEquivalentAlternation(
                                           sequenceBuffer.toString());
            newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint));
            newPattern.append("(?:").append(ea).append(")");
        } else if (c == '[' && lastCodePoint != '\\') {
            i = normalizeCharClass(newPattern, i);
        } else {
            newPattern.appendCodePoint(c);
        }
        lastCodePoint = c;
        i += Character.charCount(c);
    }
    normalizedPattern = newPattern.toString();
}
 
源代码11 项目: ReScue   文件: ReScuePattern.java
/**
 * Attempts to compose input by combining the first character
 * with the first combining mark following it. Returns a String
 * that is the composition of the leading character with its first
 * combining mark followed by the remaining combining marks. Returns
 * null if the first two characters cannot be further composed.
 */
private String composeOneStep(String input) {
    int len = countChars(input, 0, 2);
    String firstTwoCharacters = input.substring(0, len);
    String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);

    if (result.equals(firstTwoCharacters))
        return null;
    else {
        String remainder = input.substring(len);
        return result + remainder;
    }
}
 
源代码12 项目: XposedSmsCode   文件: SmsMsg.java
public static SmsMsg fromIntent(Intent intent) {
    SmsMessage[] smsMessageParts = SmsMessageUtils.fromIntent(intent);
    String sender = smsMessageParts[0].getDisplayOriginatingAddress();
    String body = SmsMessageUtils.getMessageBody(smsMessageParts);

    sender = Normalizer.normalize(sender, Normalizer.Form.NFC);
    body = Normalizer.normalize(body, Normalizer.Form.NFC);

    SmsMsg message = new SmsMsg();
    message.setSender(sender).setBody(body);
    return message;
}
 
源代码13 项目: mapr-music   文件: SlugUtil.java
/**
 * Converts specified string to it's slug representation, which can be used to generate readable and SEO-friendly
 * URLs.
 *
 * @param input string, which will be converted.
 * @return slug representation of string, which can be used to generate readable and SEO-friendly
 * URLs.
 */
public static String toSlug(String input) {

    String transliterated = transliterator.transform(input);
    String noWhitespace = WHITESPACE.matcher(transliterated).replaceAll("-");
    String normalized = Normalizer.normalize(noWhitespace, Normalizer.Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("");
    slug = EDGESDHASHES.matcher(slug).replaceAll("");

    return slug.toLowerCase(Locale.ENGLISH);
}
 
private String generateContextPath(String domainName) {
    String nfdNormalizedString = Normalizer.normalize(domainName, Normalizer.Form.NFD);
    Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    domainName = pattern.matcher(nfdNormalizedString).replaceAll("");
    return domainName.toLowerCase().trim().replaceAll("\\s{1,}", "-");
}
 
源代码15 项目: Bytecoder   文件: LauncherHelper.java
/**
 * Returns the main class for a module. The query is either a module name
 * or module-name/main-class. For the former then the module's main class
 * is obtained from the module descriptor (MainClass attribute).
 */
private static Class<?> loadModuleMainClass(String what) {
    int i = what.indexOf('/');
    String mainModule;
    String mainClass;
    if (i == -1) {
        mainModule = what;
        mainClass = null;
    } else {
        mainModule = what.substring(0, i);
        mainClass = what.substring(i+1);
    }

    // main module is in the boot layer
    ModuleLayer layer = ModuleLayer.boot();
    Optional<Module> om = layer.findModule(mainModule);
    if (!om.isPresent()) {
        // should not happen
        throw new InternalError("Module " + mainModule + " not in boot Layer");
    }
    Module m = om.get();

    // get main class
    if (mainClass == null) {
        Optional<String> omc = m.getDescriptor().mainClass();
        if (!omc.isPresent()) {
            abort(null, "java.launcher.module.error1", mainModule);
        }
        mainClass = omc.get();
    }

    // load the class from the module
    Class<?> c = null;
    try {
        c = Class.forName(m, mainClass);
        if (c == null && System.getProperty("os.name", "").contains("OS X")
                && Normalizer.isNormalized(mainClass, Normalizer.Form.NFD)) {

            String cn = Normalizer.normalize(mainClass, Normalizer.Form.NFC);
            c = Class.forName(m, cn);
        }
    } catch (LinkageError le) {
        abort(null, "java.launcher.module.error3", mainClass, m.getName(),
                le.getClass().getName() + ": " + le.getLocalizedMessage());
    }
    if (c == null) {
        abort(null, "java.launcher.module.error2", mainClass, mainModule);
    }

    System.setProperty("jdk.module.main.class", c.getName());
    return c;
}
 
protected static String normalize(String uri) {
    return Normalizer.normalize(uri, Normalizer.Form.NFC);
}
 
源代码17 项目: redis-rdb-cli   文件: RedisSentinelURI.java
private static ByteBuffer normalize(String s) {
    String v = Normalizer.normalize(s, Normalizer.Form.NFC);
    return StandardCharsets.UTF_8.encode(CharBuffer.wrap(v));
}
 
源代码18 项目: prayer-times-android   文件: Frag.java
private static String normalize(CharSequence str) {
    String string = Normalizer.normalize(str, Normalizer.Form.NFD);
    string = string.replaceAll("[^\\p{ASCII}]", "_");
    return string.toLowerCase(Locale.ENGLISH);
}
 
源代码19 项目: javaee8-jaxrs-sample   文件: Utils.java
public static String slugify(String input) {
    String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("");
    return slug.toLowerCase(Locale.ENGLISH);
}
 
源代码20 项目: mycore   文件: MCRXMLFunctions.java
/**
 * returns the given String in unicode NFC normal form.
 *
 * @param arg0 String to be normalized
 * @see Normalizer#normalize(CharSequence, java.text.Normalizer.Form)
 */
public static String normalizeUnicode(String arg0) {
    return Normalizer.normalize(arg0, Normalizer.Form.NFC);
}