下面列出了org.apache.lucene.index.BaseCompositeReader#org.apache.lucene.util.OpenBitSet 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public FDTreeElement addGeneralization(OpenBitSet lhs, OpenBitSet rhs) {
FDTreeElement currentNode = this;
currentNode.addRhsAttributes(rhs);
boolean newElement = false;
for (int i = lhs.nextSetBit(0); i >= 0; i = lhs.nextSetBit(i + 1)) {
if (currentNode.getChildren() == null) {
currentNode.setChildren(new FDTreeElement[this.numAttributes]);
currentNode.getChildren()[i] = new FDTreeElement(this.numAttributes);
newElement = true;
}
else if (currentNode.getChildren()[i] == null) {
currentNode.getChildren()[i] = new FDTreeElement(this.numAttributes);
newElement = true;
}
currentNode = currentNode.getChildren()[i];
currentNode.addRhsAttributes(rhs);
}
if (newElement)
return currentNode;
return null;
}
public boolean skipTo(int i) throws IOException
{
if (!in.skipTo(i))
{
return false;
}
OpenBitSet deletedDocuments = getDeletedDocuments();
while (deletedDocuments.get(in.doc()))
{
if (!in.next())
{
return false;
}
}
return true;
}
private void fetchNonFdsWindowingOverRecordsProgressive(Set<OpenBitSet> negCover, int[][] compressedRecords) {
System.out.println("\tMoving window over records ...");
int numRecords = compressedRecords.length;
int currentWindowDistance = 1;
int newNonFDs = 0;
do {
newNonFDs = 0;
for (int recordId = 0; recordId < numRecords; recordId++) {
int partnerRecordId = recordId + currentWindowDistance;
if (partnerRecordId >= numRecords)
continue;
if (negCover.add(this.getViolatedFds(compressedRecords[recordId], compressedRecords[partnerRecordId])))
newNonFDs++;
}
currentWindowDistance++;
}
while (newNonFDs > this.progressiveThreshold);
}
private void fetchNonFdsWindowingOverClusters(Set<OpenBitSet> negCover, int[][] compressedRecords, List<PositionListIndex> plis) {
System.out.println("\tMoving window over small clusters ...");
for (PositionListIndex pli : plis) {
boolean selectSmallClustersOnly = pli.getClusters().size() < this.attributeThreshold; // If there are too few clusters, then the clusters are large and we have already executed sufficient comparisons between the records of these clusters
for (IntArrayList cluster : pli.getClusters()) {
if (selectSmallClustersOnly && (cluster.size() > this.windowSize)) // But if the current cluster is very small, we should still use it for comparisons (the other cluster(s) must be very large)
continue;
for (int recordIndex = 0; recordIndex < cluster.size(); recordIndex++) {
int recordId = cluster.getInt(recordIndex);
for (int partnerRecordIndex = recordIndex + 1; partnerRecordIndex < Math.min(recordIndex + this.windowSize, cluster.size()); partnerRecordIndex++) {
int partnerRecordId = cluster.getInt(partnerRecordIndex);
negCover.add(this.getViolatedFds(compressedRecords[recordId], compressedRecords[partnerRecordId]));
}
}
}
}
}
private void fetchNonFdsFromClustersTopsAndBottoms(Set<OpenBitSet> negCover, int[][] compressedRecords, List<PositionListIndex> plis) {
System.out.println("\tComparing window on clusters tops and bottoms ...");
for (PositionListIndex pli : plis) {
for (IntArrayList cluster : pli.getClusters()) {
if (cluster.size() < this.windowSize)
continue;
for (int recordIndex = 0; recordIndex < this.windowSize; recordIndex++) {
int recordId = cluster.getInt(recordIndex);
for (int partnerRecordIndex = cluster.size() - 1; partnerRecordIndex > cluster.size() - this.windowSize; partnerRecordIndex--) {
int partnerRecordId = cluster.getInt(partnerRecordIndex);
if (recordId == partnerRecordId)
continue;
negCover.add(this.getViolatedFds(compressedRecords[recordId], compressedRecords[partnerRecordId]));
}
}
}
}
}
protected int specializePositiveCover(FDTree posCoverTree, OpenBitSet lhs, int rhs) {
int newFDs = 0;
List<OpenBitSet> specLhss = posCoverTree.getFdAndGeneralizations(lhs, rhs);
for (OpenBitSet specLhs : specLhss) {
posCoverTree.removeFunctionalDependency(specLhs, rhs);
if (specLhs.cardinality() == posCoverTree.getMaxDepth())
continue;
for (int attr = this.numAttributes - 1; attr >= 0; attr--) { // TODO: Is iterating backwards a good or bad idea?
if (!lhs.get(attr) && (attr != rhs)) {
specLhs.set(attr);
if (!posCoverTree.containsFdOrGeneralization(specLhs, rhs)) {
posCoverTree.addFunctionalDependency(specLhs, rhs);
newFDs++;
}
specLhs.clear(attr);
}
}
}
return newFDs;
}
public FDTreeElement addFunctionalDependency(OpenBitSet lhs, int rhs) {
FDTreeElement currentNode = this;
currentNode.addRhsAttribute(rhs);
int lhsLength = 0;
for (int i = lhs.nextSetBit(0); i >= 0; i = lhs.nextSetBit(i + 1)) {
lhsLength++;
if (currentNode.getChildren() == null) {
currentNode.setChildren(new FDTreeElement[this.numAttributes]);
currentNode.getChildren()[i] = new FDTreeElement(this.numAttributes);
}
else if (currentNode.getChildren()[i] == null) {
currentNode.getChildren()[i] = new FDTreeElement(this.numAttributes);
}
currentNode = currentNode.getChildren()[i];
currentNode.addRhsAttribute(rhs);
}
currentNode.markFd(rhs);
this.depth = Math.max(this.depth, lhsLength);
return currentNode;
}
public void removeLhs(OpenBitSet lhs) {
LhsTrieElement[] path = new LhsTrieElement[(int)lhs.cardinality()];
int currentPathIndex = 0;
LhsTrieElement currentNode = this;
path[currentPathIndex] = currentNode;
currentPathIndex++;
for (int i = lhs.nextSetBit(0); i >= 0; i = lhs.nextSetBit(i + 1)) {
currentNode = currentNode.getChildren()[i];
path[currentPathIndex] = currentNode;
currentPathIndex++;
}
for (int i = path.length - 1; i >= 0; i --) {
path[i].removeChild(i);
if (path[i].getChildren() != null)
break;
}
}
@Test
public void testDeleteGeneralizations() {
fdtree = new FDTree(4, -1);
OpenBitSet lhs = new OpenBitSet();
lhs.set(0);
lhs.set(1);
this.fdtree.addFunctionalDependency(lhs, 3);
lhs.clear(1);
lhs.set(2);
this.fdtree.addFunctionalDependency(lhs, 3);
//lhs.set(1);
//this.fdtree.deleteGeneralizations(lhs, 3, 0);
//assertTrue(this.fdtree.isEmpty());
}
protected ClusterIdentifier buildClusterIdentifier(int recordId, int[][] invertedPlis, OpenBitSet lhs, int lhsSize) {
int[] cluster = new int[lhsSize];
int index = 0;
for (int lhsAttr = lhs.nextSetBit(0); lhsAttr >= 0; lhsAttr = lhs.nextSetBit(lhsAttr + 1)) {
int clusterId = invertedPlis[lhsAttr][recordId];
if (clusterId < 0)
return null;
cluster[index] = clusterId;
index++;
}
return new ClusterIdentifier(cluster);
}
protected void getLhsAndGeneralizations(OpenBitSet lhs, int currentLhsAttr, OpenBitSet currentLhs, List<OpenBitSet> foundLhs) {
if (this.children == null) {
foundLhs.add(currentLhs.clone());
return;
}
while (currentLhsAttr >= 0) {
int nextLhsAttr = lhs.nextSetBit(currentLhsAttr + 1);
if (this.children[currentLhsAttr] != null) {
currentLhs.set(currentLhsAttr);
this.children[currentLhsAttr].getLhsAndGeneralizations(lhs, nextLhsAttr, currentLhs, foundLhs);
currentLhs.clear(currentLhsAttr);
}
currentLhsAttr = nextLhsAttr;
}
}
public FDTreeElement addGeneralization(OpenBitSet lhs, int rhs) {
FDTreeElement currentNode = this;
currentNode.addRhsAttribute(rhs);
boolean newElement = false;
for (int i = lhs.nextSetBit(0); i >= 0; i = lhs.nextSetBit(i + 1)) {
if (currentNode.getChildren() == null) {
currentNode.setChildren(new FDTreeElement[this.numAttributes]);
currentNode.getChildren()[i] = new FDTreeElement(this.numAttributes);
newElement = true;
}
else if (currentNode.getChildren()[i] == null) {
currentNode.getChildren()[i] = new FDTreeElement(this.numAttributes);
newElement = true;
}
currentNode = currentNode.getChildren()[i];
currentNode.addRhsAttribute(rhs);
}
if (newElement)
return currentNode;
return null;
}
public void generalize() {
int maxLevel = this.numAttributes;
// Build an index level->nodes for the top-down, level-wise traversal
Int2ObjectOpenHashMap<ArrayList<ElementLhsPair>> level2elements = new Int2ObjectOpenHashMap<>(maxLevel);
for (int level = 0; level < maxLevel; level++)
level2elements.put(level, new ArrayList<ElementLhsPair>());
this.addToIndex(level2elements, 0, new OpenBitSet(this.numAttributes));
// Traverse the levels top-down and add all direct generalizations
for (int level = maxLevel - 1; level >= 0; level--) {
for (ElementLhsPair pair : level2elements.get(level)) {
// Remove isFDs, because we will mark valid FDs later on
pair.element.removeAllFds();
// Generate and add generalizations
for (int lhsAttr = pair.lhs.nextSetBit(0); lhsAttr >= 0; lhsAttr = pair.lhs.nextSetBit(lhsAttr + 1)) {
pair.lhs.clear(lhsAttr);
FDTreeElement generalization = this.addGeneralization(pair.lhs, pair.element.getRhsAttributes());
if (generalization != null)
level2elements.get(level - 1).add(new ElementLhsPair(generalization, pair.lhs.clone()));
pair.lhs.set(lhsAttr);
}
}
}
}
protected void filterGeneralizations(OpenBitSet lhs, int rhs, int currentLhsAttr, OpenBitSet currentLhs) {
if (currentLhs.equals(lhs))
return;
this.rhsFds.clear(rhs);
// Is the dependency already read and we have not yet found a generalization?
if (currentLhsAttr < 0)
return;
if (this.children != null) {
for (int nextLhsAttr = lhs.nextSetBit(currentLhsAttr); nextLhsAttr >= 0; nextLhsAttr = lhs.nextSetBit(nextLhsAttr + 1)) {
if ((this.children[nextLhsAttr] != null) && (this.children[nextLhsAttr].hasRhsAttribute(rhs))) {
currentLhs.set(nextLhsAttr);
this.children[nextLhsAttr].filterGeneralizations(lhs, rhs, lhs.nextSetBit(nextLhsAttr + 1), currentLhs);
currentLhs.clear(nextLhsAttr);
}
}
}
}
protected boolean containsFdOrGeneralization(OpenBitSet lhs, int rhs, int currentLhsAttr) {
if (this.isFd(rhs))
return true;
// Is the dependency already read and we have not yet found a generalization?
if (currentLhsAttr < 0)
return false;
int nextLhsAttr = lhs.nextSetBit(currentLhsAttr + 1);
if ((this.children != null) && (this.children[currentLhsAttr] != null) && (this.children[currentLhsAttr].hasRhsAttribute(rhs)))
if (this.children[currentLhsAttr].containsFdOrGeneralization(lhs, rhs, nextLhsAttr))
return true;
return this.containsFdOrGeneralization(lhs, rhs, nextLhsAttr);
}
public void getLevel(int level, int currentLevel, OpenBitSet currentLhs, List<FDTreeElementLhsPair> result) {
if (level == currentLevel) {
result.add(new FDTreeElementLhsPair(this, currentLhs.clone()));
}
else {
currentLevel++;
if (this.children == null)
return;
for (int child = 0; child < this.numAttributes; child++) {
if (this.children[child] == null)
continue;
currentLhs.set(child);
this.children[child].getLevel(level, currentLevel, currentLhs, result);
currentLhs.clear(child);
}
}
}
/**
* Checks, whether the dependent attribute ends in the current tree element.
*
* @param rhs
* the i'th dependent attribute.
* @return true, if the tree element does not have any children with the
* same dependent attribute. false, otherwise.
*/
/* public boolean isLastNodeOf(int rhs) {
if (!this.hasRhsAttribute(rhs))
return false;
// Check all children for the rhs
for (int attr = 0; attr < this.maxAttributeNumber; attr++)
if ((this.children[attr] != null) && (this.children[attr].hasRhsAttribute(rhs)))
return false;
return true;
}
*/
// FUDEBS
protected void addOneSmallerGeneralizations(OpenBitSet currentLhs, int maxCurrentLhsAttribute, int rhs, FDTree tree) {
for (int lhsAttribute = currentLhs.nextSetBit(0); lhsAttribute != maxCurrentLhsAttribute; lhsAttribute = currentLhs.nextSetBit(lhsAttribute + 1)) {
currentLhs.clear(lhsAttribute);
tree.addGeneralization(currentLhs, rhs);
currentLhs.set(lhsAttribute);
}
}
public int read(int[] docs, int[] freqs) throws IOException
{
int[] innerDocs = new int[docs.length];
int[] innerFreq = new int[docs.length];
int count = in.read(innerDocs, innerFreq);
// Is the stream exhausted
if (count == 0)
{
return 0;
}
OpenBitSet deletedDocuments = getDeletedDocuments();
while (allDeleted(innerDocs, count, deletedDocuments))
{
count = in.read(innerDocs, innerFreq);
// Is the stream exhausted
if (count == 0)
{
return 0;
}
}
// Add non deleted
int insertPosition = 0;
for (int i = 0; i < count; i++)
{
if (!deletedDocuments.get(innerDocs[i]))
{
docs[insertPosition] = innerDocs[i];
freqs[insertPosition] = innerFreq[i];
insertPosition++;
}
}
return insertPosition;
}
public void filterGeneralizations() {
// Traverse the tree depth-first
// For each node, iterate all FDs
// For each FD, store the FD, then remove all this.getFdOrGeneralization(lhs, rhs) and add the FD again
OpenBitSet currentLhs = new OpenBitSet(this.numAttributes);
this.filterGeneralizations(currentLhs, this);
}
protected OpenBitSet getBitSet(Set<TableColumn> columnCombination) {
OpenBitSet set = new OpenBitSet(fds.getNumAttributes());
for(TableColumn c : columnCombination) {
set.set(attToIdx.get(c));
}
return set;
}
public void removeNonFunctionalDependency(Pair<Set<TableColumn>,Set<TableColumn>> nonFd) {
OpenBitSet lhs = getBitSet(nonFd.getFirst());
for(TableColumn c : nonFd.getSecond()) {
fds.removeFunctionalDependency(lhs, attToIdx.get(c));
}
}
public void grow(OpenBitSet lhs, FDTree fdTree) {
// Add specializations of all nodes an mark them as isFD, but if specialization exists, then it is invalid and should not be marked; only add specializations of nodes not marked as isFD!
OpenBitSet rhs = this.rhsAttributes;
OpenBitSet invalidRhs = rhs.clone();
invalidRhs.remove(this.rhsFds);
// Add specializations that are not invalid
if (invalidRhs.cardinality() > 0) {
for (int extensionAttr = 0; extensionAttr < this.numAttributes; extensionAttr++) {
if (lhs.get(extensionAttr) || rhs.get(extensionAttr))
continue;
lhs.set(extensionAttr);
fdTree.addFunctionalDependencyIfNotInvalid(lhs, invalidRhs);
lhs.clear(extensionAttr);
}
}
// Traverse children and let them add their specializations
if (this.children != null) {
for (int childAttr = 0; childAttr < this.numAttributes; childAttr++) {
FDTreeElement element = this.children[childAttr];
if (element != null) {
lhs.set(childAttr);
element.grow(lhs, fdTree);
lhs.clear(childAttr);
}
}
}
}
protected SuperScorer(Scorer scorer, OpenBitSet bitSet, String originalQueryStr, ScoreType scoreType) {
super(scorer.getWeight());
this.scorer = scorer;
this.bitSet = bitSet;
this.originalQueryStr = originalQueryStr;
this.scoreType = scoreType;
}
public List<FDTreeElementLhsPair> getLevel(int level) {
List<FDTreeElementLhsPair> result = new ArrayList<>();
OpenBitSet currentLhs = new OpenBitSet();
int currentLevel = 0;
this.getLevel(level, currentLevel, currentLhs, result);
return result;
}
private void fetchNonFdsSPIDER(Set<OpenBitSet> negCover, int[][] compressedRecords, List<PositionListIndex> plis) {
System.out.println("\tSPIDERing over clusters ...");
for (PositionListIndex pli : plis) {
for (IntArrayList cluster : pli.getClusters()) {
int pivotRecord = cluster.getInt(0);
IntArrayList[] clusters = new IntArrayList[this.numAttributes];
for (int i = 0; i < this.numAttributes; i++)
if (compressedRecords[pivotRecord][i] >= 0) // Maybe the record has no duplicate value in some attributes
clusters[i] = plis.get(i).getClusters().get(compressedRecords[pivotRecord][i]);
this.spider(clusters, pivotRecord, negCover);
}
}
}
public boolean runNext(Set<OpenBitSet> negCover, int[][] compressedRecords) {
this.windowDistance++;
int numNewNonFds = 0;
int numComparisons = 0;
int previousNegCoverSize = negCover.size();
Iterator<IntArrayList> clusterIterator = this.clusters.iterator();
while (clusterIterator.hasNext()) {
IntArrayList cluster = clusterIterator.next();
if (cluster.size() <= this.windowDistance) {
clusterIterator.remove();
continue;
}
for (int recordIndex = 0; recordIndex < (cluster.size() - this.windowDistance); recordIndex++) {
int recordId = cluster.getInt(recordIndex);
int partnerRecordId = cluster.getInt(recordIndex + this.windowDistance);
negCover.add(this.getViolatedFds(compressedRecords[recordId], compressedRecords[partnerRecordId]));
numComparisons++;
}
}
numNewNonFds = negCover.size() - previousNegCoverSize;
this.numNewNonFds.add(numNewNonFds);
this.numComparisons.add(numComparisons);
if (numComparisons == 0)
return false;
return true;
}
private static int getStartingPosition(OpenBitSet docsInRowSpanToFetch, int start) {
int docStartingPosition = docsInRowSpanToFetch.nextSetBit(0);
int offset = 0;
while (offset < start) {
docStartingPosition = docsInRowSpanToFetch.nextSetBit(docStartingPosition + 1);
offset++;
}
return docStartingPosition;
}
private static OpenBitSet getDocsToFetch(AtomicReader atomicReader, Selector selector, int primeDocRowId,
int numberOfDocsInRow, Bits liveDocs, Filter filter, AtomicInteger totalRecords) throws IOException {
Set<String> alreadyProcessed = new HashSet<String>();
OpenBitSet bits = new OpenBitSet(numberOfDocsInRow);
OpenBitSet mask = null;
if (filter != null) {
DocIdSet docIdSet = filter.getDocIdSet(atomicReader.getContext(), liveDocs);
mask = getMask(docIdSet, primeDocRowId, numberOfDocsInRow);
}
Set<String> columnFamiliesToFetch = selector.getColumnFamiliesToFetch();
boolean fetchAll = true;
if (columnFamiliesToFetch != null) {
fetchAll = false;
applyFamilies(alreadyProcessed, bits, columnFamiliesToFetch, atomicReader, primeDocRowId, numberOfDocsInRow,
liveDocs);
}
Map<String, Set<String>> columnsToFetch = selector.getColumnsToFetch();
if (columnsToFetch != null) {
fetchAll = false;
applyColumns(alreadyProcessed, bits, columnsToFetch, atomicReader, primeDocRowId, numberOfDocsInRow, liveDocs);
}
if (fetchAll) {
bits.set(0, numberOfDocsInRow);
}
if (mask != null) {
bits.intersect(mask);
}
totalRecords.set((int) bits.cardinality());
return bits;
}
private OpenBitSet getViolatedFds(int[] t1, int[] t2) {
OpenBitSet equalAttrs = new OpenBitSet(t1.length);
for (int i = 0; i < t1.length; i++)
if (this.valueComparator.isEqual(t1[i], t2[i]))
equalAttrs.set(i);
return equalAttrs;
}
/**
* Find the least general functional dependencies violated by t1 and t2 and update the negative cover accordingly.
* Note: t1 and t2 must have the same length.
*/
protected void addViolatedFdsToCover(int[] t1, int[] t2, FDTree negCoverTree) {
OpenBitSet equalAttrs = new OpenBitSet(t1.length);
for (int i = 0; i < t1.length; i++)
if (this.valueComparator.isEqual(t1[i], t2[i]))
equalAttrs.set(i);
OpenBitSet diffAttrs = new OpenBitSet(t1.length);
diffAttrs.set(0, this.numAttributes);
diffAttrs.andNot(equalAttrs);
negCoverTree.addFunctionalDependency(equalAttrs, diffAttrs);
}