org.apache.commons.lang3.StringUtils#getLevenshteinDistance ( )源码实例Demo

下面列出了org.apache.commons.lang3.StringUtils#getLevenshteinDistance ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: Truck-Factor   文件: AliasesIdentifier.java
private static Map<Developer, List<Developer>> findAliases(List<Developer> allDevelopers, int distance, int minSize) {
	int newDistance = distance;
	List<Developer> copyList =  new CopyOnWriteArrayList<Developer>(allDevelopers);
	Map<Developer, List<Developer>> aliases =  new HashMap<Developer, List<Developer>>();
	for (Developer developer1 : copyList) {
		copyList.remove (developer1);
		for (Developer developer2 : copyList) {
			if(developer1.getId()!=developer2.getId() && developer1.getName().length()>=minSize){
				int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1.getName()), convertToUTFLower(developer2.getName()));
				if (distance == -1){
					newDistance = developer1.getName().split(" ").length;
				}
				if (!developer1.getName().equals(developer2.getName()) && localDistance<=newDistance){
					if(!aliases.containsKey(developer1))
						aliases.put(developer1, new ArrayList<Developer>());
					aliases.get(developer1).add(developer2);
					copyList.remove(developer2);
				}
			}
		}
	}
	return aliases;
}
 
源代码2 项目: Truck-Factor   文件: NewAliasHandler.java
private static Map<String, List<String>> findAliases(List<String> allDevelopers, int distance, int minSize) {
	int newDistance = distance;
	List<String> copyList =  new CopyOnWriteArrayList<String>(allDevelopers);
	Map<String, List<String>> aliases =  new HashMap<String, List<String>>();
	for (String developer1 : copyList) {
		copyList.remove (developer1);
		for (String developer2 : copyList) {
			if(developer1.length()>=minSize){
				int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1), convertToUTFLower(developer2));
				if (distance == -1){
					newDistance = developer1.split(" ").length;
				}
				if (!developer1.equals(developer2) && localDistance<=newDistance){
					if(!aliases.containsKey(developer1))
						aliases.put(developer1, new ArrayList<String>());
					aliases.get(developer1).add(developer2);
					copyList.remove(developer2);
				}
			}
		}
	}
	return aliases;
}
 
源代码3 项目: Truck-Factor   文件: AliasHandler.java
private static Map<String, List<String>> findAliases(List<String> allDevelopers, int distance, int minSize) {
	int newDistance = distance;
	List<String> copyList =  new CopyOnWriteArrayList<String>(allDevelopers);
	Map<String, List<String>> aliases =  new HashMap<String, List<String>>();
	for (String developer1 : copyList) {
		copyList.remove (developer1);
		for (String developer2 : copyList) {
			if(developer1.length()>=minSize){
				int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1), convertToUTFLower(developer2));
				if (distance == -1){
					newDistance = developer1.split(" ").length;
				}
				if (!developer1.equals(developer2) && localDistance<=newDistance){
					if(!aliases.containsKey(developer1))
						aliases.put(developer1, new ArrayList<String>());
					aliases.get(developer1).add(developer2);
					copyList.remove(developer2);
				}
			}
		}
	}
	return aliases;
}
 
源代码4 项目: haxsync   文件: ContactsSyncAdapterService.java
private static String matches(Set<String> phoneContacts, String fbContact, int maxdistance){
	if (maxdistance == 0){
		if (phoneContacts.contains(fbContact)){
			return fbContact;
		}
		return null;
		//return phoneContacts.contains(fbContact);
	}
	int bestDistance = maxdistance;
	String bestMatch = null;
	for (String contact : phoneContacts){
		int distance = StringUtils.getLevenshteinDistance(contact != null ? contact.toLowerCase() : "", fbContact != null ? fbContact.toLowerCase() : "");
		if( distance <= bestDistance){
			//Log.i("FOUND MATCH", "Phone Contact: " + contact +" FB Contact: " + fbContact +" distance: " + distance + "max distance: " +maxdistance);
			bestMatch = contact;
			bestDistance = distance;
		}
	}
	return bestMatch;
}
 
private double calcSimilarity(Invocation invocation , MockRequest request, String requestSerialized) throws SerializeException {
    String requestSerializedTarget;
    if (CollectionUtils.isNotEmpty(request.getModifiedInvocationIdentity()) &&
        request.getModifiedInvocationIdentity().contains(invocation.getIdentity())) {
        requestSerializedTarget = SerializerWrapper.hessianSerialize(invocation.getRequest(),request.getEvent().javaClassLoader);
    } else {
        requestSerializedTarget = invocation.getRequestSerialized();
    }
    int distance = StringUtils.getLevenshteinDistance(requestSerialized, requestSerializedTarget);
    return 1 - (double) distance / Math.max(requestSerialized.length(), requestSerializedTarget.length());
}
 
源代码6 项目: Airachnid-Burp-Extension   文件: RequestSender.java
/**
     * Testing if the responses of two requests are similar. This is the not the same as the same, rather there is a
     * threshold set in the static parameters of the class.
     * @param firstString
     * @param secondString
     * @return Test if similar
     */
    private static boolean testSimilar(String firstString, String secondString) {
//        int fuzzyDist = StringUtils.getFuzzyDistance(firstString, secondString, Locale.getDefault());
        double jaroDist = StringUtils.getJaroWinklerDistance(firstString, secondString);
        int levenDist = StringUtils.getLevenshteinDistance(firstString, secondString);

//        BurpExtender.print("============================================");
//        BurpExtender.print("Fuzzy Distance:" + fuzzyDist);
//        BurpExtender.print("        Jaro Winkler Distance:" + jaroDist);
//        BurpExtender.print("        Levenshtein Distance:" + levenDist);
//        BurpExtender.print("============================================");

        return jaroDist >= JARO_THRESHOLD || levenDist <= LEVENSHTEIN_THRESHOLD;
    }
 
源代码7 项目: FlareBot   文件: GuildUtils.java
/**
 * Gets a {@link Role} that matches a string. Case doesn't matter.
 *
 * @param s       The String to get a role from
 * @param guildId The id of the {@link Guild} to get the role from
 * @param channel The channel to send an error message to if anything goes wrong.
 * @return null if the role doesn't, otherwise a list of roles matching the string
 */
public static Role getRole(String s, String guildId, TextChannel channel) {
    Guild guild = Getters.getGuildById(guildId);
    Role role = guild.getRoles().stream()
            .filter(r -> r.getName().equalsIgnoreCase(s))
            .findFirst().orElse(null);
    if (role != null) return role;
    try {
        role = guild.getRoleById(Long.parseLong(s.replaceAll("[^0-9]", "")));
        if (role != null) return role;
    } catch (NumberFormatException | NullPointerException ignored) {
    }
    if (channel != null) {
        if (guild.getRolesByName(s, true).isEmpty()) {
            String closest = null;
            int distance = LEVENSHTEIN_DISTANCE;
            for (Role role1 : guild.getRoles().stream().filter(role1 -> FlareBotManager.instance().getGuild(guildId).getSelfAssignRoles()
                    .contains(role1.getId())).collect(Collectors.toList())) {
                int currentDistance = StringUtils.getLevenshteinDistance(role1.getName(), s);
                if (currentDistance < distance) {
                    distance = currentDistance;
                    closest = role1.getName();
                }
            }
            MessageUtils.sendErrorMessage("That role does not exist! "
                    + (closest != null ? "Maybe you mean `" + closest + "`" : ""), channel);
            return null;
        } else {
            return guild.getRolesByName(s, true).get(0);
        }
    }
    return null;
}
 
源代码8 项目: 10000sentences   文件: Importer.java
protected boolean sentenceOK(SentenceVO s) {
    String targ = s.getTargetSentence();
    String known = s.getKnownSentence();

    if (StringUtils.equals(targ, known)) {
        //System.out.printf("Same: %s <-> %s\n", targ, known);
        return false;
    }

    int tLen = targ.length();
    int kLen = known.length();
    if (StringUtils.getLevenshteinDistance(targ, known) < 0.2 * (tLen + kLen) / 2.) {
        //System.out.printf("Too similar: %s <-> %s\n", targ, known);
        return false;
    }

    if (tLen < 50 && kLen < 50) {
        return true;
    }

    if (Math.max(tLen, kLen) / Math.min(tLen, kLen) > 3) {
        //System.out.printf("Nope: %s <-> %s\n", known, targ);
        return false;
    }

    if (NUMBER_DELIMITER.matcher(targ).matches() || NUMBER_DELIMITER.matcher(known).matches()) {
        //System.out.printf("Has numbers: %s <-> %s\n", known, targ);
        return false;
    }

    return true;
}
 
@Override
public Float call() throws Exception {
    //Instant startComputation = Instant.now();
    float result = StringUtils.getLevenshteinDistance(s1, s2);
    //Instant endComputation = Instant.now();
    //Duration duration = Duration.between(startComputation, endComputation);
    //System.out.println("length: " + s1.length() + "/" + s2.length() + ", duration: " + (duration.toNanos() / 1000000000) + " s, " + "result: " + result);
    //System.out.println("duration levenshtein: " + (duration.toNanos() / 1000000000) + " s");
    
    return result;
}
 
源代码10 项目: dungeon   文件: StringDistanceMetrics.java
static int levenshteinDistance(final String a, final String b) {
  if (!CommandLimits.isWithinMaximumCommandLength(a)) {
    throw new IllegalArgumentException("input is too big.");
  }
  if (!CommandLimits.isWithinMaximumCommandLength(b)) {
    throw new IllegalArgumentException("input is too big.");
  }
  return StringUtils.getLevenshteinDistance(a, b);
}
 
源代码11 项目: dictomaton   文件: LevenshteinAutomatonTest.java
/**
 * Generate a word, create a dictionary of permutations permutations that are created using random edit operations,
 * and check that Levenshtein automaton for that word finds permutations within its edit distance.
 *
 * @param minLength             The minimum lenth of the generated word.
 * @param maxLength             The maximum length of the generated word.
 * @param nPermutations         The number of permutations to generate.
 * @param nRandomEditOperations The maximum number of random edit operations.
 * @param distance              Test the levenshtein automaton with this edit distance.
 * @throws DictionaryBuilderException
 */
private void generateAndCheckPermutations(int minLength, int maxLength, int nPermutations, int nRandomEditOperations,
                                          int distance) throws DictionaryBuilderException {
    String str = randomString(minLength + (maxLength - minLength + 1));

    TreeSet<String> all = new TreeSet<>();
    Set<String> shouldHave = new HashSet<>();

    for (int i = 0; i < nPermutations; ++i) {
        int n = d_rng.nextInt(nRandomEditOperations);

        StringBuilder permutedBuilder = new StringBuilder(str);
        for (int perm = 0; perm < n; ++perm)
            d_editOperations[d_rng.nextInt(d_editOperations.length)].apply(permutedBuilder);

        String permuted = permutedBuilder.toString();

        all.add(permuted);

        if (StringUtils.getLevenshteinDistance(str, permuted) <= distance)
            shouldHave.add(permuted);
    }

    Dictionary dict = new DictionaryBuilder().addAll(all).build();
    LevenshteinAutomaton la = new LevenshteinAutomaton(str, distance);

    Assert.assertEquals(shouldHave, la.intersectionLanguage(dict));
}
 
源代码12 项目: Stargraph   文件: LevenshteinRanker.java
@Override
double computeStringDistance(CharSequence s1, CharSequence s2) {
    return StringUtils.getLevenshteinDistance(s1, s2);
}
 
源代码13 项目: Indra   文件: DistanceStringFilter.java
@Override
public boolean matches(String t1, String t2) {
    return t1.length() >= this.threshold && StringUtils.getLevenshteinDistance(t1, t2) < min;
}
 
源代码14 项目: CogStack-Pipeline   文件: StringTools.java
public static int getLevenshteinDistance(String str1, String str2) {
    return StringUtils.getLevenshteinDistance(str1, str2);
}
 
源代码15 项目: lucene-geo-gazetteer   文件: GeoNameResolver.java
/**
 * Select the best match for each location name extracted from a document,
 * choosing from among a list of lists of candidate matches. Filter uses the
 * following features: 1) edit distance between name and the resolved name,
 * choose smallest one 2) content (haven't implemented)
 *
 * @param resolvedEntities
 *            final result for the input stream
 * @param allCandidates
 *            each location name may hits several documents, this is the
 *            collection for all hitted documents
 * @param count
 * 			  Number of results for one locations
 * @throws IOException
 * @throws RuntimeException
 */

private void pickBestCandidates(
		HashMap<String, List<Location>> resolvedEntities,
		HashMap<String, List<Location>> allCandidates, int count) {

	for (String extractedName : allCandidates.keySet()) {

		List<Location> cur = allCandidates.get(extractedName);
		if(cur.isEmpty())
			continue;//continue if no results found

		int maxWeight = Integer.MIN_VALUE ;
		//In case weight is equal for all return top element
		int bestIndex = 0;
		//Priority queue to return top elements
		PriorityQueue<Location> pq = new PriorityQueue<>(cur.size(), new Comparator<Location>() {
			@Override
			public int compare(Location o1, Location o2) {
				return Integer.compare(o2.getWeight(), o1.getWeight());
			}
		});

		for (int i = 0; i < cur.size(); ++i) {
			int weight = 0;
			// get cur's ith resolved entry's name
			String resolvedName = String.format(" %s ", cur.get(i).getName());
			if (resolvedName.contains(String.format(" %s ", extractedName))) {
				// Assign a weight as per configuration if extracted name is found as a exact word in name
				weight = WEIGHT_NAME_MATCH;
			} else if (resolvedName.contains(extractedName)) {
				// Assign a weight as per configuration if extracted name is found partly in name
				weight = WEIGHT_NAME_PART_MATCH;
			}
			// get all alternate names of cur's ith resolved entry's
			String[] altNames = cur.get(i).getAlternateNames().split(",");
			float altEditDist = 0;
			for(String altName : altNames){
				if(altName.contains(extractedName)){
					altEditDist+=StringUtils.getLevenshteinDistance(extractedName, altName);
				}
			}
			//lesser the edit distance more should be the weight
			weight += getCalibratedWeight(altNames.length, altEditDist);

			//Give preference to sorted results. 0th result should have more priority
			weight += (cur.size()-i) * WEIGHT_SORT_ORDER;

			cur.get(i).setWeight(weight);

			if (weight > maxWeight) {
				maxWeight = weight;
				bestIndex = i;
			}

			pq.add(cur.get(i)) ;
		}
		if (bestIndex == -1)
			continue;

		List<Location> resultList = new ArrayList<>();

		for(int i =0 ; i< count && !pq.isEmpty() ; i++){
			resultList.add(pq.poll());
		}

		resolvedEntities.put(extractedName, resultList);
	}
}
 
源代码16 项目: datacollector   文件: FuzzyMatch.java
private static int calculateLevenshteinDistance(String s1, String s2) {
  int distance = StringUtils.getLevenshteinDistance(s1, s2);
  double ratio = ((double) distance) / (Math.max(s1.length(), s2.length()));
  return 100 - (int)(ratio * 100);
}
 
源代码17 项目: TranskribusCore   文件: IngestHTRIntoAbbyyXML.java
private static void compareVersions() {
	 StringUtils.getLevenshteinDistance("fly", "ant");
	
}
 
源代码18 项目: TableDisentangler   文件: CandidateIEObject.java
/**
 * Calculate levenshtein. Calculation should be done with normalized pattern.
 *
 * @param pattern the pattern
 * @return the int
 */
public int calculateLevenshtein(String pattern)
{
	return StringUtils.getLevenshteinDistance(NormalizedPattern,pattern);
}
 
 同类方法