下面列出了java.net.IDN#toUnicode ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
static Dns parse(String hostname) {
int lastDot = -1;
for (int i = 0; i < hostname.length(); i++) {
char c = hostname.charAt(i);
if (!DNS.matches(c)) {
throw new InvalidHostException(hostname, i);
} else if (c == '.') {
if (lastDot == i - 1) {
throw new InvalidHostException(hostname, i);
}
lastDot = i;
}
}
String lower = hostname.toLowerCase(Locale.US);
return new AutoValue_Dns(lower, IDN.toUnicode(lower));
}
/**
* Parse a domain name starting at the current offset and moving the input
* stream pointer past this domain name (even if cross references occure).
*
* @param dis The input stream.
* @param data The raw data (for cross references).
* @return The domain name string.
* @throws IOException Should never happen.
*/
private static String readName(DataInputStream dis, byte data[])
throws IOException {
int c = dis.readUnsignedByte();
if ((c & 0xc0) == 0xc0) {
c = ((c & 0x3f) << 8) + dis.readUnsignedByte();
HashSet<Integer> jumps = new HashSet<Integer>();
jumps.add(c);
return readName(data, c, jumps);
}
if (c == 0) {
return "";
}
byte b[] = new byte[c];
dis.readFully(b);
String s = IDN.toUnicode(new String(b));
String t = readName(dis, data);
if (t.length() > 0) {
s = s + "." + t;
}
return s;
}
/**
* Parse a domain name starting at the current offset and moving the input
* stream pointer past this domain name (even if cross references occure).
*
* @param dis The input stream.
* @param data The raw data (for cross references).
* @return The domain name string.
* @throws IOException Should never happen.
*/
private static String readName(DataInputStream dis, byte data[])
throws IOException {
int c = dis.readUnsignedByte();
if ((c & 0xc0) == 0xc0) {
c = ((c & 0x3f) << 8) + dis.readUnsignedByte();
HashSet<Integer> jumps = new HashSet<Integer>();
jumps.add(c);
return readName(data, c, jumps);
}
if (c == 0) {
return "";
}
byte b[] = new byte[c];
dis.readFully(b);
String s = IDN.toUnicode(new String(b));
String t = readName(dis, data);
if (t.length() > 0) {
s = s + "." + t;
}
return s;
}
/**
* {@link java.net.IDN#toUnicode(String)}
* @since 1.6
*/
public void test_ToUnicode_LString() {
try {
IDN.toUnicode(null);
fail("should throw NullPointerException");
} catch (NullPointerException e) {
// expected
}
assertEquals("", IDN.toUnicode(""));
assertEquals("www.bcher.de", IDN.toUnicode("www.bcher.de"));
assertEquals("www.b\u00FCcher.de", IDN.toUnicode("www.b\u00FCcher.de"));
assertEquals("www.\u65E5\u672C\u5E73.jp", IDN
.toUnicode("www.\u65E5\u672C\u5E73.jp"));
assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode("www\uFF0Exn--gwtq9nb2a\uFF61jp"));
assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode("www.xn--gwtq9nb2a.jp"));
}
/**
* Returns true if name matches against template.<p>
*
* The matching is performed as per RFC 2818 rules for TLS and
* RFC 2830 rules for LDAP.<p>
*
* The <code>name</code> parameter should represent a DNS name. The
* <code>template</code> parameter may contain the wildcard character '*'.
*/
private boolean isMatched(String name, String template,
boolean chainsToPublicCA) {
// Normalize to Unicode, because PSL is in Unicode.
try {
name = IDN.toUnicode(IDN.toASCII(name));
template = IDN.toUnicode(IDN.toASCII(template));
} catch (RuntimeException re) {
if (SSLLogger.isOn) {
SSLLogger.fine("Failed to normalize to Unicode: " + re);
}
return false;
}
if (hasIllegalWildcard(template, chainsToPublicCA)) {
return false;
}
// check the validity of the domain name template.
try {
// Replacing wildcard character '*' with 'z' so as to check
// the domain name template validity.
//
// Using the checking implemented in SNIHostName
new SNIHostName(template.replace('*', 'z'));
} catch (IllegalArgumentException iae) {
// It would be nice to add debug log if not matching.
return false;
}
if (checkType == TYPE_TLS) {
return matchAllWildcards(name, template);
} else if (checkType == TYPE_LDAP) {
return matchLeftmostWildcard(name, template);
} else {
return false;
}
}
/**
* Returns the effective top-level domain plus one (eTLD+1) by referencing the public suffix list.
* Returns null if the domain is a public suffix.
*
* <p>Here are some examples: <pre>{@code
* assertEquals("google.com", getEffectiveTldPlusOne("google.com"));
* assertEquals("google.com", getEffectiveTldPlusOne("www.google.com"));
* assertNull(getEffectiveTldPlusOne("com"));
* }</pre>
*
* @param domain A canonicalized domain. An International Domain Name (IDN) should be punycode
* encoded.
*/
public String getEffectiveTldPlusOne(String domain) {
if (domain == null) throw new NullPointerException("domain == null");
// We use UTF-8 in the list so we need to convert to Unicode.
String unicodeDomain = IDN.toUnicode(domain);
String[] domainLabels = unicodeDomain.split("\\.");
String[] rule = findMatchingRule(domainLabels);
if (domainLabels.length == rule.length && rule[0].charAt(0) != EXCEPTION_MARKER) {
// The domain is a public suffix.
return null;
}
int firstLabelOffset;
if (rule[0].charAt(0) == EXCEPTION_MARKER) {
// Exception rules hold the effective TLD plus one.
firstLabelOffset = domainLabels.length - rule.length;
} else {
// Otherwise the rule is for a public suffix, so we must take one more label.
firstLabelOffset = domainLabels.length - (rule.length + 1);
}
StringBuilder effectiveTldPlusOne = new StringBuilder();
String[] punycodeLabels = domain.split("\\.");
for (int i = firstLabelOffset; i < punycodeLabels.length; i++) {
effectiveTldPlusOne.append(punycodeLabels[i]).append('.');
}
effectiveTldPlusOne.deleteCharAt(effectiveTldPlusOne.length() - 1);
return effectiveTldPlusOne.toString();
}
/**
* Returns true if name matches against template.<p>
*
* The matching is performed as per RFC 2818 rules for TLS and
* RFC 2830 rules for LDAP.<p>
*
* The <code>name</code> parameter should represent a DNS name. The
* <code>template</code> parameter may contain the wildcard character '*'.
*/
private boolean isMatched(String name, String template,
boolean chainsToPublicCA) {
// Normalize to Unicode, because PSL is in Unicode.
try {
name = IDN.toUnicode(IDN.toASCII(name));
template = IDN.toUnicode(IDN.toASCII(template));
} catch (RuntimeException re) {
if (SSLLogger.isOn) {
SSLLogger.fine("Failed to normalize to Unicode: " + re);
}
return false;
}
if (hasIllegalWildcard(template, chainsToPublicCA)) {
return false;
}
// check the validity of the domain name template.
try {
// Replacing wildcard character '*' with 'z' so as to check
// the domain name template validity.
//
// Using the checking implemented in SNIHostName
new SNIHostName(template.replace('*', 'z'));
} catch (IllegalArgumentException iae) {
// It would be nice to add debug log if not matching.
return false;
}
if (checkType == TYPE_TLS) {
return matchAllWildcards(name, template);
} else if (checkType == TYPE_LDAP) {
return matchLeftmostWildcard(name, template);
} else {
return false;
}
}
/**
* Converts an internationalized domain name (IDN) in an URL to and from ASCII/Unicode.
* @param url the URL where the domain name should be converted
* @param toASCII if true converts from Unicode to ASCII, if false converts from ASCII to Unicode
* @return the URL containing the converted domain name
*/
@TargetApi(Build.VERSION_CODES.GINGERBREAD)
public static String convertIdn(String url, boolean toASCII) {
String urlNoDots = url;
String dots="";
while (urlNoDots.startsWith(".")) {
urlNoDots = url.substring(1);
dots = dots + ".";
}
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
// Find host name after '//' or '@'
int hostStart = 0;
if (urlNoDots.contains("//")) {
hostStart = url.indexOf("//") + "//".length();
} else if (url.contains("@")) {
hostStart = url.indexOf("@") + "@".length();
}
int hostEnd = url.substring(hostStart).indexOf("/");
// Handle URL which doesn't have a path (path is implicitly '/')
hostEnd = (hostEnd == -1 ? urlNoDots.length() : hostStart + hostEnd);
String host = urlNoDots.substring(hostStart, hostEnd);
host = (toASCII ? IDN.toASCII(host) : IDN.toUnicode(host));
return dots + urlNoDots.substring(0, hostStart) + host + urlNoDots.substring(hostEnd);
} else {
return dots + url;
}
}
@Override
public Set<String> generateCandidates(String originalString) {
Set<String> result = new HashSet<>();
String domain = originalString;
if(StringUtils.isEmpty(domain)) {
return result;
}
if(isAce(domain)) {
//this is an ace domain.
domain = IDN.toUnicode(domain);
}
for(int ws = 0;ws < domain.length();ws++) {
for(int i = 0;i < domain.length() - ws + 1;++i) {
String win = domain.substring(i, i+ws);
for(int j = 0;j < ws;j++) {
char c = win.charAt(j);
if( glyphs.containsKey(c)) {
for( String g : glyphs.get(c)) {
String winNew = win.replaceAll("" + c, g);
String d = domain.substring(0, i) + winNew + domain.substring(i + ws);
result.add(d);
if(!isAce(d)) {
try {
String dAscii = IDN.toASCII(d, IDN.ALLOW_UNASSIGNED);
if (!d.equals(dAscii)) {
result.add(dAscii);
}
}
catch(IllegalArgumentException iae) {
LOG.debug("Unable to parse " + d + ": " + iae.getMessage(), iae);
}
}
}
}
}
}
}
return result;
}
/**
* @return the hostname in IDN encoding
*/
public String getDecodedHostname() {
return (hostname == null) ? null : IDN.toUnicode(hostname);
}
public String host() {
return IDN.toUnicode(this.host);
}
/**
* Normalizes this IRI's components.
* <p>
* Because IRIs exist to identify resources, presumably they should be considered equivalent when they identify the
* same resource. However, this definition of equivalence is not of much practical use, as there is no way for an
* implementation to compare two resources unless it has full knowledge or control of them. Therefore, IRI
* normalization is designed to minimize false negatives while strictly avoiding false positives.
* <p>
* <b>Case Normalization</b> the hexadecimal digits within a percent-encoding triplet (e.g., "%3a" versus "%3A") are
* case-insensitive and are normalized to use uppercase letters for the digits A - F. The scheme and host are case
* insensitive and are normalized to lowercase.
* <p>
* <b>Character Normalization</b> The Unicode Standard defines various equivalences between sequences of characters
* for various purposes. Unicode Standard Annex defines various Normalization Forms for these equivalences and is
* applied to the IRI components.
* <p>
* <b>Percent-Encoding Normalization</b> decodes any percent-encoded octet sequence that corresponds to an
* unreserved character anywhere in the IRI.
* <p>
* <b>Path Segment Normalization</b> is the process of removing unnecessary {@code "."} and {@code ".."} segments
* from the path component of a hierarchical IRI. Each {@code "."} segment is simply removed. A {@code ".."} segment
* is removed only if it is preceded by a non-{@code ".."} segment or the start of the path.
* <p>
* <b>HTTP(S) Scheme Normalization</b> if the port uses the default port number or not given it is set to undefined.
* An empty path is replaced with "/".
* <p>
* <b>File Scheme Normalization</b> if the host is "localhost" or empty it is set to undefined.
* <p>
* <b>Internationalized Domain Name Normalization</b> of the host component to Unicode.
*
* @return normalized IRI
*/
public ParsedIRI normalize() {
String _scheme = toLowerCase(scheme);
boolean optionalPort = isScheme("http") && 80 == port || isScheme("https") && 443 == port;
int _port = optionalPort ? -1 : port;
boolean localhost = isScheme("file") && userInfo == null && -1 == port
&& ("".equals(host) || "localhost".equals(host));
String _host = localhost ? null
: host == null || host.length() == 0 ? host
: IDN.toUnicode(pctEncodingNormalization(toLowerCase(host)),
IDN.USE_STD3_ASCII_RULES | IDN.ALLOW_UNASSIGNED);
String _path = _scheme != null && path == null ? "" : normalizePath(path);
String _userInfo = pctEncodingNormalization(userInfo);
String _query = pctEncodingNormalization(query);
String _fragment = pctEncodingNormalization(fragment);
ParsedIRI normalized = new ParsedIRI(_scheme, _userInfo, _host, _port, _path, _query, _fragment);
if (this.iri.equals(normalized.iri)) {
return this;
} else {
return normalized;
}
}
/**
* @return the hostname in IDN encoding
*/
public String getDecodedHostname() {
return (hostname == null) ? null : IDN.toUnicode(hostname);
}
/**
* Returns host that is used as a parameter in {@link io.netty.handler.codec.socks.SocksCmdType}.
* Host (BND.ADDR field in response) is address that server used when connecting to the target host.
* This is typically different from address which client uses to connect to the SOCKS server.
*
* @return host that is used as a parameter in {@link io.netty.handler.codec.socks.SocksCmdType}
* or null when there was no host specified during response construction
*/
public String host() {
if (host != null) {
return IDN.toUnicode(host);
} else {
return null;
}
}
/**
* Decodes a domain name into UTF-8 if it is in Punycode ASCII.
*
* If the domain name was already UTF-8 nothing will happen. This
* method saves the original format (Punycode or UTF-8) in
* {@link #decoded}. {@link #recode(String)} can return the string
* in the saved format.
*
* @param domain the domain name, may be null
* @return the UTF-8 domain name
*/
public String decode(final String domain) {
if (domain == null) {
return null;
}
String asciiDomain = IDN.toUnicode(domain);
decoded = !asciiDomain.equals(domain);
return asciiDomain;
}
/**
* Get the primary hostname for this server
* If hostname is IDN, it is decoded from Puny encoding
* @return Returns the primary hostname for this server
*/
public String getDecodedHostname() {
String hostname = getHostname();
return (hostname == null) ? null : IDN.toUnicode(hostname);
}
/**
* Returns host that is used as a parameter in {@link SocksCmdType}
*
* @return host that is used as a parameter in {@link SocksCmdType}
*/
public String host() {
return addressType == SocksAddressType.DOMAIN ? IDN.toUnicode(host) : host;
}
/**
* Returns host that is used as a parameter in {@link SocksCmdType}.
* Host (BND.ADDR field in response) is address that server used when connecting to the target host.
* This is typically different from address which client uses to connect to the SOCKS server.
*
* @return host that is used as a parameter in {@link SocksCmdType}
* or null when there was no host specified during response construction
*/
public String host() {
return host != null && addressType == SocksAddressType.DOMAIN ? IDN.toUnicode(host) : host;
}
/**
* Get the primary hostname for this server
* If hostname is IDN, it is decoded from Puny encoding
* @return Returns the primary hostname for this server
*/
public String getDecodedHostname() {
return (hostname == null) ? null : IDN.toUnicode(hostname);
}
/**
* Returns host that is used as a parameter in {@link SocksCmdType}
*
* @return host that is used as a parameter in {@link SocksCmdType}
*/
public String host() {
return IDN.toUnicode(host);
}