com.google.common.collect.PeekingIterator#next ( )源码实例Demo

下面列出了com.google.common.collect.PeekingIterator#next ( ) 实例代码，或者点击链接到github查看源代码，也可以在右侧发表评论。

源代码1 项目： lsmtree 文件： MergingIterator.java

@Override
protected Generation.Entry<K, V> computeNext() {
    if (heap.isEmpty()) {
        return endOfData();
    }

    PeekingIterator<EntryAndGenerationId<K,V>> first = heap.poll();
    EntryAndGenerationId<K,V> ret = first.next();
    if (first.hasNext()) {
        temp.add(first);
    }
    while (!heap.isEmpty() && keyComparator.compare(ret.entry.getKey(), heap.peek().peek().entry.getKey()) == 0) {
        PeekingIterator<EntryAndGenerationId<K, V>> iter = heap.poll();
        iter.next();
        if (iter.hasNext()) {
            temp.add(iter);
        }
    }
    heap.addAll(temp);
    temp.clear();
    return ret.entry;
}

源代码2 项目： google-java-format 文件： JavaInputAstVisitor.java

protected void visitStatements(List<? extends StatementTree> statements) {
  boolean first = true;
  PeekingIterator<StatementTree> it = Iterators.peekingIterator(statements.iterator());
  dropEmptyDeclarations();
  while (it.hasNext()) {
    StatementTree tree = it.next();
    builder.forcedBreak();
    if (!first) {
      builder.blankLineWanted(BlankLineWanted.PRESERVE);
    }
    markForPartialFormat();
    first = false;
    List<VariableTree> fragments = variableFragments(it, tree);
    if (!fragments.isEmpty()) {
      visitVariables(
          fragments,
          DeclarationKind.NONE,
          canLocalHaveHorizontalAnnotations(fragments.get(0).getModifiers()));
    } else {
      scan(tree, null);
    }
  }
}

源代码3 项目： javaide 文件： JavaInputAstVisitor.java

private void visitStatements(List<? extends StatementTree> statements) {
    boolean first = true;
    PeekingIterator<StatementTree> it =
            Iterators.<StatementTree>peekingIterator(statements.iterator());
    dropEmptyDeclarations();
    while (it.hasNext()) {
        StatementTree tree = it.next();
        builder.forcedBreak();
        if (!first) {
            builder.blankLineWanted(BlankLineWanted.PRESERVE);
        }
        markForPartialFormat();
        first = false;
        List<VariableTree> fragments = variableFragments(it, tree);
        if (!fragments.isEmpty()) {
            visitVariables(
                    fragments,
                    DeclarationKind.NONE,
                    canLocalHaveHorizontalAnnotations(fragments.get(0).getModifiers()));
        } else {
            scan(tree, null);
        }
    }
}

源代码4 项目： emodb 文件： TimeLimitedIteratorTest.java

@Test
public void testExpirationTime() {
    long start = System.currentTimeMillis();
    Iterator<Long> unlimitedIter = countForever();
    PeekingIterator<Long> limitedIter = TimeLimitedIterator.create(unlimitedIter, Duration.ofMillis(10), 0);
    long previous = -1;
    while (limitedIter.hasNext()) {
        long next = limitedIter.next();
        assertEquals(next, previous + 1);
        previous = next;
    }
    long stop = System.currentTimeMillis();
    long elapsed = stop - start;

    assertTrue(elapsed >= 10);
    assertTrue(elapsed < 100);  // be fairly relaxed about the upper bound to avoid spurious test failures on slow machines.
    assertEquals(unlimitedIter.next(), (previous + 1));
}

源代码5 项目： xtext-core 文件： IndentationAwareCompletionPrefixProvider.java

private PeekingIterator<ILeafNode> createReversedLeafIterator(INode root, INode candidate, LinkedList<ILeafNode> sameGrammarElement) {
	EObject grammarElement = null;
	PeekingIterator<ILeafNode> iterator = Iterators.peekingIterator(Iterators.filter(root.getAsTreeIterable().reverse().iterator(), ILeafNode.class));
	// traverse until we find the current candidate
	while(iterator.hasNext()) {
		ILeafNode next = iterator.next();
		if (candidate.equals(next)) {
			break;
		} else if (next.getTotalLength() == 0) {
			EObject otherGrammarElement = tryGetGrammarElementAsRule(next);
			if (grammarElement == null) {
				grammarElement = otherGrammarElement;
			}
			if (otherGrammarElement.equals(grammarElement)) {
				sameGrammarElement.add(next);
			} else {
				sameGrammarElement.removeLast();
			}
		}
	}
	return iterator;
}

源代码6 项目： nomulus 文件： RestoreCommitLogsAction.java

/**
 * Restore the contents of one transaction to Datastore.
 *
 * <p>The objects to delete are listed in the {@link CommitLogManifest}, which will be the first
 * object in the iterable. The objects to save follow, each as a {@link CommitLogMutation}. We
 * restore by deleting the deletes and recreating the saves from their proto form. We also save
 * the commit logs themselves back to Datastore, so that the commit log system itself is
 * transparently restored alongside the data.
 *
 * @return the manifest, for use in restoring the {@link CommitLogBucket}.
 */
private CommitLogManifest restoreOneTransaction(PeekingIterator<ImmutableObject> commitLogs) {
  final CommitLogManifest manifest = (CommitLogManifest) commitLogs.next();
  Result<?> deleteResult = deleteAsync(manifest.getDeletions());
  List<Entity> entitiesToSave = Lists.newArrayList(ofy().save().toEntity(manifest));
  while (commitLogs.hasNext() && commitLogs.peek() instanceof CommitLogMutation) {
    CommitLogMutation mutation = (CommitLogMutation) commitLogs.next();
    entitiesToSave.add(ofy().save().toEntity(mutation));
    entitiesToSave.add(EntityTranslator.createFromPbBytes(mutation.getEntityProtoBytes()));
  }
  saveRaw(entitiesToSave);
  try {
    deleteResult.now();
  } catch (Exception e) {
    retrier.callWithRetry(
        () -> deleteAsync(manifest.getDeletions()).now(), RuntimeException.class);
  }
  return manifest;
}

源代码7 项目： presto 文件： LocalProperties.java

public static <T> List<LocalProperty<T>> stripLeadingConstants(List<? extends LocalProperty<T>> properties)
{
    PeekingIterator<? extends LocalProperty<T>> iterator = peekingIterator(properties.iterator());
    while (iterator.hasNext() && iterator.peek() instanceof ConstantProperty) {
        iterator.next();
    }
    return ImmutableList.copyOf(iterator);
}

源代码8 项目： arcusplatform 文件： CassandraVideoV2Dao.java

private void advance() {
	VideoRecordingSize latestTs = new VideoRecordingSize(IrisUUID.minTimeUUID(), false);
	Iterator<VideoRecordingSize> nextIt = null; 
	// find the largest value
	for(PeekingIterator<VideoRecordingSize> it: iterators) {
		if(!it.hasNext()) {
			continue;
		}
		
		if(currentId != null && (currentId.equals(it.peek()) || (currentId.getRecordingId().equals(it.peek().getRecordingId()) && !it.peek().isFavorite()))) {
			// throw away duplicate entry when the next record's recordingId is the same as currentId and not favorite
			it.next();
			if(!it.hasNext()) {
				continue;
			}
		}
		int comp = compare(latestTs.getRecordingId(), it.peek().getRecordingId());
		if(comp > 0){
			latestTs = it.peek(); //Set it because recordingId is larger 
			nextIt = it;
		}else if(comp == 0) {
			if(latestTs.isFavorite() || !it.peek().isFavorite()) {
				it.next(); //skip
			}else {
				latestTs = it.peek(); //Set it because the next one is favorite and the current on is not 
				nextIt = it;
			}
		}
	}
	if(nextIt == null) {
		currentId = null;
	}
	else {
		currentId = nextIt.next();
	}
}

源代码9 项目： arcusplatform 文件： CassandraVideoV2Dao.java

private void advance() {
	currentId = null;
	
	UUID nextId = null;
	int matches = 0;
	while(matches < iterators.size()) {
		matches = 0;
		for(PeekingIterator<UUID> it: iterators) {
			while(nextId != null && it.hasNext() && compare(nextId, it.peek()) > 0) {
				// fast forward to where the current id is
				it.next();
			}
			if(!it.hasNext()) {
				// since its an intersection if any iterator is done, the whole thing is done
				return;
			}
			else if(nextId == null || it.peek().equals(nextId)) {
				// advance the iterator if it matches the current id
				nextId = it.next();
				matches++;
			}
			else if(nextId != null && compare(nextId, it.peek()) < 0) {
				// if this iterator is farther along then the others, reset nextId and start the loop over
				nextId = it.peek();
				break;
			}
		}
	}
	currentId = nextId;
}

源代码10 项目： arcusplatform 文件： VideoUtil.java

private void advance() {
	UUID latestTs = IrisUUID.minTimeUUID();
	Iterator<UUID> nextIt = null;
	// find the largest value
	for(PeekingIterator<UUID> it: iterators) {
		if(!it.hasNext()) {
			continue;
		}
		if(currentId != null && currentId.equals(it.peek())) {
			// throw away duplicate entry
			it.next();
			if(!it.hasNext()) {
				continue;
			}
		}
		int comp = compare(latestTs, it.peek());
		if(comp > 0) {
			latestTs = it.peek();
			nextIt = it;
		}
	}
	if(nextIt == null) {
		currentId = null;
	}
	else {
		currentId = nextIt.next();
	}
}

源代码11 项目： xtext-core 文件： IndentationAwareCompletionPrefixProvider.java

private LinkedList<ILeafNode> collectLeafsWithSameOffset(ILeafNode candidate, PeekingIterator<ILeafNode> iterator) {
	LinkedList<ILeafNode> sameOffset = Lists.newLinkedList();
	sameOffset.add(candidate);
	int offset = candidate.getTotalOffset();
	while(iterator.hasNext()) {
		ILeafNode peek = iterator.peek();
		if (peek.getTotalOffset() == offset) {
			sameOffset.add(peek);
			iterator.next();
		} else {
			break;
		}
	}
	return sameOffset;
}

源代码12 项目： SciGraph 文件： EntityProcessorImpl.java

/***
 * Convert a list of annotations into annotation groups
 * 
 * @param annotationList
 *          Annotations
 * @param longestOnly
 *          If shorter entities from annotation groups should be removed
 * @return annotation groups
 */
static List<EntityAnnotationGroup> getAnnotationGroups(List<EntityAnnotation> annotationList,
    boolean longestOnly) {
  List<EntityAnnotationGroup> groups = new ArrayList<>();
  Collections.sort(annotationList, Collections.reverseOrder());
  PeekingIterator<EntityAnnotation> iter = Iterators.peekingIterator(annotationList.iterator());
  while (iter.hasNext()) {
    EntityAnnotationGroup group = new EntityAnnotationGroup();
    group.add(iter.next());
    Set<Entity> entitiesInGroup = new HashSet<>();
    while (iter.hasNext() && group.intersects(iter.peek())) {
      if (!entitiesInGroup.contains(iter.peek().getToken())) {
        entitiesInGroup.add(iter.peek().getToken());
        group.add(iter.next());
      } else {
        iter.next();
      }
    }

    if (longestOnly) {
      // Remove any entries that aren't as long as the first one
      Iterator<EntityAnnotation> groupIter = group.iterator();
      int longest = group.peek().length();
      while (groupIter.hasNext()) {
        EntityAnnotation annot = groupIter.next();
        if (annot.length() < longest) {
          groupIter.remove();
        }
      }
    }

    groups.add(group);
  }

  return groups;
}

源代码13 项目： incubator-gobblin 文件： LoopingDatasetFinderSource.java

/**
 * Advance an iterator until the next value is larger than the reference.
 * @return the last value polled if it is equal to reference, or null otherwise.
 */
@Nullable
private <T extends URNIdentified> T advanceUntilLargerThan(PeekingIterator<T> it, String reference) {
  if (reference == null) {
    return null;
  }

  int comparisonResult = -1;
  while (it.hasNext() && (comparisonResult = lexicographicalComparator.compare(it.peek(), reference)) < 0) {
    it.next();
  }
  return comparisonResult == 0 ? it.next() : null;
}

源代码14 项目： accumulo-examples 文件： FileDataQuery.java

public ChunkInputStream getData(String hash) throws IOException {
  scanner.setRange(new Range(hash));
  scanner.setBatchSize(1);
  lastRefs.clear();
  PeekingIterator<Entry<Key,Value>> pi = Iterators.peekingIterator(scanner.iterator());
  if (pi.hasNext()) {
    while (!pi.peek().getKey().getColumnFamily().equals(FileDataIngest.CHUNK_CF)) {
      lastRefs.add(pi.peek());
      pi.next();
    }
  }
  cis.clear();
  cis.setSource(pi);
  return cis;
}

源代码15 项目： presto 文件： TableMetadataSystemTable.java

private static List<Page> buildPages(MetadataDao dao, ConnectorTableMetadata tableMetadata, TupleDomain<Integer> tupleDomain)
{
    Map<Integer, NullableValue> domainValues = extractFixedValues(tupleDomain).orElse(ImmutableMap.of());
    String schemaName = getStringValue(domainValues.get(getColumnIndex(tableMetadata, SCHEMA_NAME)));
    String tableName = getStringValue(domainValues.get(getColumnIndex(tableMetadata, TABLE_NAME)));

    PageListBuilder pageBuilder = new PageListBuilder(tableMetadata.getColumns().stream()
            .map(ColumnMetadata::getType)
            .collect(toList()));

    List<TableMetadataRow> tableRows = dao.getTableMetadataRows(schemaName, tableName);
    PeekingIterator<ColumnMetadataRow> columnRowIterator = peekingIterator(dao.getColumnMetadataRows(schemaName, tableName).iterator());

    for (TableMetadataRow tableRow : tableRows) {
        while (columnRowIterator.hasNext() && columnRowIterator.peek().getTableId() < tableRow.getTableId()) {
            columnRowIterator.next();
        }

        String temporalColumnName = null;
        SortedMap<Integer, String> sortColumnNames = new TreeMap<>();
        SortedMap<Integer, String> bucketColumnNames = new TreeMap<>();
        OptionalLong temporalColumnId = tableRow.getTemporalColumnId();
        while (columnRowIterator.hasNext() && columnRowIterator.peek().getTableId() == tableRow.getTableId()) {
            ColumnMetadataRow columnRow = columnRowIterator.next();
            if (temporalColumnId.isPresent() && columnRow.getColumnId() == temporalColumnId.getAsLong()) {
                temporalColumnName = columnRow.getColumnName();
            }
            OptionalInt sortOrdinalPosition = columnRow.getSortOrdinalPosition();
            if (sortOrdinalPosition.isPresent()) {
                sortColumnNames.put(sortOrdinalPosition.getAsInt(), columnRow.getColumnName());
            }
            OptionalInt bucketOrdinalPosition = columnRow.getBucketOrdinalPosition();
            if (bucketOrdinalPosition.isPresent()) {
                bucketColumnNames.put(bucketOrdinalPosition.getAsInt(), columnRow.getColumnName());
            }
        }

        pageBuilder.beginRow();

        // schema_name, table_name
        VARCHAR.writeSlice(pageBuilder.nextBlockBuilder(), utf8Slice(tableRow.getSchemaName()));
        VARCHAR.writeSlice(pageBuilder.nextBlockBuilder(), utf8Slice(tableRow.getTableName()));

        // temporal_column
        if (temporalColumnId.isPresent()) {
            if (temporalColumnName == null) {
                throw new PrestoException(RAPTOR_CORRUPT_METADATA, format("Table ID %s has corrupt metadata (invalid temporal column ID)", tableRow.getTableId()));
            }
            VARCHAR.writeSlice(pageBuilder.nextBlockBuilder(), utf8Slice(temporalColumnName));
        }
        else {
            pageBuilder.nextBlockBuilder().appendNull();
        }

        // ordering_columns
        writeArray(pageBuilder.nextBlockBuilder(), sortColumnNames.values());

        // distribution_name
        Optional<String> distributionName = tableRow.getDistributionName();
        if (distributionName.isPresent()) {
            VARCHAR.writeSlice(pageBuilder.nextBlockBuilder(), utf8Slice(distributionName.get()));
        }
        else {
            pageBuilder.nextBlockBuilder().appendNull();
        }

        // bucket_count
        OptionalInt bucketCount = tableRow.getBucketCount();
        if (bucketCount.isPresent()) {
            BIGINT.writeLong(pageBuilder.nextBlockBuilder(), bucketCount.getAsInt());
        }
        else {
            pageBuilder.nextBlockBuilder().appendNull();
        }

        // bucketing_columns
        writeArray(pageBuilder.nextBlockBuilder(), bucketColumnNames.values());

        // organized
        BOOLEAN.writeBoolean(pageBuilder.nextBlockBuilder(), tableRow.isOrganized());
    }

    return pageBuilder.build();
}

源代码16 项目： science-result-extractor 文件： TaggingTokenClusteror.java

public List<TaggingTokenCluster> cluster() {
        
        List<TaggingTokenCluster> result = new ArrayList<>();

        PeekingIterator<LabeledTokensContainer> it = Iterators.peekingIterator(taggingTokenSynchronizer);
        if (!it.hasNext() || (it.peek() == null)) {
            return Collections.emptyList();
        }

        // a boolean is introduced to indicate the start of the sequence in the case the label
        // has no beginning indicator (e.g. I-)
        boolean begin = true;
        TaggingTokenCluster curCluster = new TaggingTokenCluster(it.peek().getTaggingLabel());
        BoundingBox curBox=null;
 
        
        
        while (it.hasNext()) {
            LabeledTokensContainer cont = it.next();
            BoundingBox b = BoundingBox.fromLayoutToken(cont.getLayoutTokens().get(0));
            if(!curCluster.concatTokens().isEmpty()){
                curBox = BoundingBox.fromLayoutToken(curCluster.concatTokens().get(0));
                if(b.distanceTo(curBox)>600){
                    curCluster = new TaggingTokenCluster(cont.getTaggingLabel());
                    result.add(curCluster);
                }
            }
            
            if (begin || cont.isBeginning() || cont.getTaggingLabel() != curCluster.getTaggingLabel()) {
                curCluster = new TaggingTokenCluster(cont.getTaggingLabel());
                result.add(curCluster);
            }
            
            //for table, seperate caption and content
            if(curCluster!=null){
                String tableStr = LayoutTokensUtil.normalizeText(curCluster.concatTokens());
                if(tableStr.matches(".*?(Table|TABLE) \\d+(:|\\.| [A-Z]).*?")){
//                if(tableStr.matches(".*?(Table|TABLE|Figure|FIGURE) \\d+(:|\\.).*?")){
                    if(toText(curCluster.getLastContainer().getLayoutTokens()).equalsIgnoreCase(". \n\n")){ 
                        curCluster = new TaggingTokenCluster(cont.getTaggingLabel());
                        result.add(curCluster);
                    }
                }
            }
                    
                    
            curCluster.addLabeledTokensContainer(cont);
            if (begin)
                begin = false;
        }

        return result;
    }

源代码17 项目： java-n-IDE-for-Android 文件： JavaInputAstVisitor.java

/**
 * Add a list of declarations.
 */
void addBodyDeclarations(
        List<? extends Tree> bodyDeclarations, BracesOrNot braces, FirstDeclarationsOrNot first0) {
    if (bodyDeclarations.isEmpty()) {
        if (braces.isYes()) {
            builder.space();
            tokenBreakTrailingComment("{", plusTwo);
            builder.blankLineWanted(BlankLineWanted.NO);
            builder.open(ZERO);
            token("}", plusTwo);
            builder.close();
        }
    } else {
        if (braces.isYes()) {
            builder.space();
            tokenBreakTrailingComment("{", plusTwo);
            builder.open(ZERO);
        }
        builder.open(plusTwo);
        boolean first = first0.isYes();
        boolean lastOneGotBlankLineBefore = false;
        PeekingIterator<Tree> it = Iterators.<Tree>peekingIterator(bodyDeclarations.iterator());
        while (it.hasNext()) {
            Tree bodyDeclaration = it.next();
            dropEmptyDeclarations();
            builder.forcedBreak();
            boolean thisOneGetsBlankLineBefore =
                    bodyDeclaration.getKind() != VARIABLE || hasJavaDoc(bodyDeclaration);
            if (first) {
                builder.blankLineWanted(PRESERVE);
            } else if (!first && (thisOneGetsBlankLineBefore || lastOneGotBlankLineBefore)) {
                builder.blankLineWanted(YES);
            }
            markForPartialFormat();

            if (bodyDeclaration.getKind() == VARIABLE) {
                visitVariables(
                        variableFragments(it, bodyDeclaration),
                        DeclarationKind.FIELD,
                        fieldAnnotationDirection(((VariableTree) bodyDeclaration).getModifiers()));
            } else {
                scan(bodyDeclaration, null);
            }
            first = false;
            lastOneGotBlankLineBefore = thisOneGetsBlankLineBefore;
        }
        dropEmptyDeclarations();
        builder.forcedBreak();
        builder.close();
        builder.forcedBreak();
        markForPartialFormat();
        if (braces.isYes()) {
            builder.blankLineWanted(BlankLineWanted.NO);
            token("}", plusTwo);
            builder.close();
        }
    }
}

源代码18 项目： javaide 文件： JavaInputAstVisitor.java

/**
 * Add a list of declarations.
 */
void addBodyDeclarations(
        List<? extends Tree> bodyDeclarations, BracesOrNot braces, FirstDeclarationsOrNot first0) {
    if (bodyDeclarations.isEmpty()) {
        if (braces.isYes()) {
            builder.space();
            tokenBreakTrailingComment("{", plusTwo);
            builder.blankLineWanted(BlankLineWanted.NO);
            builder.open(ZERO);
            token("}", plusTwo);
            builder.close();
        }
    } else {
        if (braces.isYes()) {
            builder.space();
            tokenBreakTrailingComment("{", plusTwo);
            builder.open(ZERO);
        }
        builder.open(plusTwo);
        boolean first = first0.isYes();
        boolean lastOneGotBlankLineBefore = false;
        PeekingIterator<Tree> it = Iterators.<Tree>peekingIterator(bodyDeclarations.iterator());
        while (it.hasNext()) {
            Tree bodyDeclaration = it.next();
            dropEmptyDeclarations();
            builder.forcedBreak();
            boolean thisOneGetsBlankLineBefore =
                    bodyDeclaration.getKind() != VARIABLE || hasJavaDoc(bodyDeclaration);
            if (first) {
                builder.blankLineWanted(PRESERVE);
            } else if (!first && (thisOneGetsBlankLineBefore || lastOneGotBlankLineBefore)) {
                builder.blankLineWanted(YES);
            }
            markForPartialFormat();

            if (bodyDeclaration.getKind() == VARIABLE) {
                visitVariables(
                        variableFragments(it, bodyDeclaration),
                        DeclarationKind.FIELD,
                        fieldAnnotationDirection(((VariableTree) bodyDeclaration).getModifiers()));
            } else {
                scan(bodyDeclaration, null);
            }
            first = false;
            lastOneGotBlankLineBefore = thisOneGetsBlankLineBefore;
        }
        dropEmptyDeclarations();
        builder.forcedBreak();
        builder.close();
        builder.forcedBreak();
        markForPartialFormat();
        if (braces.isYes()) {
            builder.blankLineWanted(BlankLineWanted.NO);
            token("}", plusTwo);
            builder.close();
        }
    }
}

源代码19 项目： qpid-broker-j 文件： ServerAssembler.java

public void received(final List<ServerFrame> frames)
{
    if (!frames.isEmpty())
    {
        PeekingIterator<ServerFrame> itr = Iterators.peekingIterator(frames.iterator());

        boolean cleanExit = false;
        try
        {
            while(itr.hasNext())
            {
                final ServerFrame frame = itr.next();
                final int frameChannel = frame.getChannel();

                ServerSession channel = _connection.getSession(frameChannel);
                if (channel != null)
                {
                    final AccessControlContext context = channel.getAccessControllerContext();
                    AccessController.doPrivileged((PrivilegedAction<Void>) () ->
                    {
                        ServerFrame channelFrame = frame;
                        boolean nextIsSameChannel;
                        do
                        {
                            received(channelFrame);
                            nextIsSameChannel = itr.hasNext() && frameChannel == itr.peek().getChannel();
                            if (nextIsSameChannel)
                            {
                                channelFrame = itr.next();
                            }
                        }
                        while (nextIsSameChannel);
                        return null;
                    }, context);
                }
                else
                {
                    received(frame);
                }
            }
            cleanExit = true;
        }
        finally
        {
            if (!cleanExit)
            {
                while (itr.hasNext())
                {
                    final QpidByteBuffer body = itr.next().getBody();
                    if (body != null)
                    {
                        body.dispose();
                    }
                }
            }
        }
    }
}

源代码20 项目： incubator-gobblin 文件： CsvFileDownloader.java

/**
 * Provide iterator via OpenCSV's CSVReader.
 * Provides a way to skip top rows by providing regex.(This is useful when CSV file comes with comments on top rows, but not in fixed size.
 * It also provides validation on schema by matching header names between property's schema and header name in CSV file.
 *
 * {@inheritDoc}
 * @see org.apache.gobblin.source.extractor.filebased.FileDownloader#downloadFile(java.lang.String)
 */
@SuppressWarnings("unchecked")
@Override
public Iterator<String[]> downloadFile(String file) throws IOException {

  log.info("Beginning to download file: " + file);
  final State state = fileBasedExtractor.workUnitState;

  CSVReader reader;
  try {
    if (state.contains(DELIMITER)) {
      String delimiterStr = state.getProp(DELIMITER).trim();
      Preconditions.checkArgument(delimiterStr.length() == 1, "Delimiter should be a character.");

      char delimiter = delimiterStr.charAt(0);
      log.info("Using " + delimiter + " as a delimiter.");

      reader = this.fileBasedExtractor.getCloser().register(
          new CSVReader(new InputStreamReader(
                            this.fileBasedExtractor.getFsHelper().getFileStream(file),
                            ConfigurationKeys.DEFAULT_CHARSET_ENCODING), delimiter));
    } else {
      reader = this.fileBasedExtractor.getCloser().register(
          new CSVReader(new InputStreamReader(
                            this.fileBasedExtractor.getFsHelper().getFileStream(file),
                            ConfigurationKeys.DEFAULT_CHARSET_ENCODING)));
    }

  } catch (FileBasedHelperException e) {
    throw new IOException(e);
  }

  PeekingIterator<String[]> iterator = Iterators.peekingIterator(reader.iterator());

  if (state.contains(SKIP_TOP_ROWS_REGEX)) {
    String regex = state.getProp(SKIP_TOP_ROWS_REGEX);
    log.info("Trying to skip with regex: " + regex);
    while (iterator.hasNext()) {
      String[] row = iterator.peek();
      if (row.length == 0) {
        break;
      }

      if (!row[0].matches(regex)) {
        break;
      }
      iterator.next();
    }
  }

  if (this.fileBasedExtractor.isShouldSkipFirstRecord() && iterator.hasNext()) {
    log.info("Skipping first record");
    iterator.next();
  }
  return iterator;
}

方法所在类

com.google.common.collect.PeekingIterator