org.apache.lucene.index.ConcurrentMergeScheduler#org.apache.lucene.codecs.Codec源码实例Demo

下面列出了org.apache.lucene.index.ConcurrentMergeScheduler#org.apache.lucene.codecs.Codec 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: lucene-solr   文件: CommitsImpl.java

@Override
public Optional<Codec> getSegmentCodec(long commitGen, String name) throws LukeException {
  try {
    SegmentInfos infos = findSegmentInfos(commitGen);
    if (infos == null) {
      return Optional.empty();
    }

    return infos.asList().stream()
        .filter(seg -> seg.info.name.equals(name))
        .findAny()
        .map(seg -> seg.info.getCodec());
  } catch (IOException e) {
    throw new LukeException(String.format(Locale.ENGLISH, "Failed to load segment infos for commit generation %d", commitGen), e);
  }
}
 
源代码2 项目: lucene-solr   文件: TestSuggestField.java

static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
  IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  Codec filterCodec = new Lucene86Codec() {
    CompletionPostingsFormat.FSTLoadMode fstLoadMode =
        RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values());
    PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode);

    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      if (suggestFields.contains(field)) {
        return postingsFormat;
      }
      return super.getPostingsFormatForField(field);
    }
  };
  iwc.setCodec(filterCodec);
  return iwc;
}
 

/** Indexes all fields/terms at the specified
 *  IndexOptions, and fully tests at that IndexOptions. */
public void testFull(Codec codec, Path path, IndexOptions options, boolean withPayloads) throws Exception {
  Directory dir = LuceneTestCase.newFSDirectory(path);

  // TODO test thread safety of buildIndex too
  FieldsProducer fieldsProducer = buildIndex(codec, dir, options, withPayloads, true);

  testFields(fieldsProducer);

  IndexOptions[] allOptions = IndexOptions.values();
  int maxIndexOption = Arrays.asList(allOptions).indexOf(options);

  for(int i=0;i<=maxIndexOption;i++) {
    testTerms(fieldsProducer, EnumSet.allOf(Option.class), allOptions[i], options, true);
    if (withPayloads) {
      // If we indexed w/ payloads, also test enums w/o accessing payloads:
      testTerms(fieldsProducer, EnumSet.complementOf(EnumSet.of(Option.PAYLOADS)), allOptions[i], options, true);
    }
  }

  fieldsProducer.close();
  dir.close();
}
 

/** Test field infos attributes coming back are not mutable */
public void testImmutableAttributes() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  SegmentInfo segmentInfo = newSegmentInfo(dir, "_123");
  FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null));
  FieldInfo fi = builder.getOrAdd("field");
  fi.setIndexOptions(TextField.TYPE_STORED.indexOptions());
  addAttributes(fi);
  fi.putAttribute("foo", "bar");
  fi.putAttribute("bar", "baz");
  FieldInfos infos = builder.finish();
  codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
  FieldInfos infos2 = codec.fieldInfosFormat().read(dir, segmentInfo, "", IOContext.DEFAULT);
  assertEquals(1, infos2.size());
  assertNotNull(infos2.fieldInfo("field"));
  Map<String,String> attributes = infos2.fieldInfo("field").attributes();
  // shouldn't be able to modify attributes
  expectThrows(UnsupportedOperationException.class, () -> {
    attributes.put("bogus", "bogus");
  });

  dir.close();
}
 

public void testReader() throws IOException {
  TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg.info, fieldInfos, newIOContext(random()));
  for (int j = 0; j < 5; j++) {
    Terms vector = reader.get(j).terms(testFields[0]);
    assertNotNull(vector);
    assertEquals(testTerms.length, vector.size());
    TermsEnum termsEnum = vector.iterator();
    for (int i = 0; i < testTerms.length; i++) {
      final BytesRef text = termsEnum.next();
      assertNotNull(text);
      String term = text.utf8ToString();
      //System.out.println("Term: " + term);
      assertEquals(testTerms[i], term);
    }
    assertNull(termsEnum.next());
  }
  reader.close();
}
 

/** Test attributes map */
public void testAttributes() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  byte id[] = StringHelper.randomId();
  Map<String,String> attributes = new HashMap<>();
  attributes.put("key1", "value1");
  attributes.put("key2", "value2");
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     Collections.emptyMap(), id, attributes, null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertEquals(attributes, info2.getAttributes());
  
  // attributes map should be immutable
  expectThrows(UnsupportedOperationException.class, () -> {
    info2.getAttributes().put("bogus", "bogus");
  });

  dir.close();
}
 

/**
 * Check codec restrictions.
 * 
 * @throws AssumptionViolatedException if the class does not work with a given codec.
 */
private void checkCodecRestrictions(Codec codec) {
  assumeFalse("Class not allowed to use codec: " + codec.getName() + ".",
      shouldAvoidCodec(codec.getName()));

  if (codec instanceof RandomCodec && !avoidCodecs.isEmpty()) {
    for (String name : ((RandomCodec)codec).formatNames) {
      assumeFalse("Class not allowed to use postings format: " + name + ".",
          shouldAvoidCodec(name));
    }
  }

  PostingsFormat pf = codec.postingsFormat();
  assumeFalse("Class not allowed to use postings format: " + pf.getName() + ".",
      shouldAvoidCodec(pf.getName()));

  assumeFalse("Class not allowed to use postings format: " + LuceneTestCase.TEST_POSTINGSFORMAT + ".", 
      shouldAvoidCodec(LuceneTestCase.TEST_POSTINGSFORMAT));
}
 
源代码8 项目: lucene-solr   文件: TestSegmentInfos.java

public void testVersionsOneSegment() throws IOException {
  BaseDirectoryWrapper dir = newDirectory();
  dir.setCheckIndexOnClose(false);
  byte id[] = StringHelper.randomId();
  Codec codec = Codec.getDefault();

  SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
  SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(),
                                     Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId());

  sis.add(commitInfo);
  sis.commit(dir);
  sis = SegmentInfos.readLatestCommit(dir);
  assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion());
  assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
  dir.close();
}
 
源代码9 项目: lucene-solr   文件: SolrResourceLoader.java

/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after {@link #addToClassLoader(List)}
 * and before using this ResourceLoader.
 */
synchronized void reloadLuceneSPI() {
  // TODO improve to use a static Set<URL> to check when we need to
  if (!needToReloadLuceneSPI) {
    return;
  }
  needToReloadLuceneSPI = false; // reset
  log.debug("Reloading Lucene SPI");

  // Codecs:
  PostingsFormat.reloadPostingsFormats(this.classLoader);
  DocValuesFormat.reloadDocValuesFormats(this.classLoader);
  Codec.reloadCodecs(this.classLoader);
  // Analysis:
  CharFilterFactory.reloadCharFilters(this.classLoader);
  TokenFilterFactory.reloadTokenFilters(this.classLoader);
  TokenizerFactory.reloadTokenizers(this.classLoader);
}
 
源代码10 项目: lucene-solr   文件: TestPendingDeletes.java

public void testDeleteDoc() throws IOException {
  Directory dir = new ByteBuffersDirectory();
  SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(),
      Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId());
  PendingDeletes deletes = newPendingDeletes(commitInfo);
  assertNull(deletes.getLiveDocs());
  int docToDelete = TestUtil.nextInt(random(), 0, 7);
  assertTrue(deletes.delete(docToDelete));
  assertNotNull(deletes.getLiveDocs());
  assertEquals(1, deletes.numPendingDeletes());

  Bits liveDocs = deletes.getLiveDocs();
  assertFalse(liveDocs.get(docToDelete));
  assertFalse(deletes.delete(docToDelete)); // delete again

  assertTrue(liveDocs.get(8));
  assertTrue(deletes.delete(8));
  assertTrue(liveDocs.get(8)); // we have a snapshot
  assertEquals(2, deletes.numPendingDeletes());

  assertTrue(liveDocs.get(9));
  assertTrue(deletes.delete(9));
  assertTrue(liveDocs.get(9));

  // now make sure new live docs see the deletions
  liveDocs = deletes.getLiveDocs();
  assertFalse(liveDocs.get(9));
  assertFalse(liveDocs.get(8));
  assertFalse(liveDocs.get(docToDelete));
  assertEquals(3, deletes.numPendingDeletes());
  dir.close();
}
 
源代码11 项目: lucene-solr   文件: CommitsImplTest.java

@Test
public void testSegmentCodec_invalid_name() {
  CommitsImpl commits = new CommitsImpl(reader, indexDir.toString());
  Optional<Codec> codec = commits.getSegmentCodec(1, "xxx");
  assertFalse(codec.isPresent());

}
 
源代码12 项目: lucene-solr   文件: TestDoc.java

private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile)
  throws Exception {
  IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)));
  SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context);
  SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context);

  final Codec codec = Codec.getDefault();
  TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
  final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);

  SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2),
                                           si, InfoStream.getDefault(), trackingDir,
                                           new FieldInfos.FieldNumbers(null), context);

  MergeState mergeState = merger.merge();
  r1.close();
  r2.close();;
  si.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
    
  if (useCompoundFile) {
    Collection<String> filesToDelete = si.files();
    codec.compoundFormat().write(dir, si, context);
    si.setUseCompoundFile(true);
    for(String name : filesToDelete) {
      si1.info.dir.deleteFile(name);
    }
  }

  return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId());
}
 
源代码13 项目: lucene-solr   文件: CrankyCodec.java

/** 
 * Wrap the provided codec with crankiness.
 * Try passing Asserting for the most fun.
 */
public CrankyCodec(Codec delegate, Random random) {
  // we impersonate the passed-in codec, so we don't need to be in SPI,
  // and so we dont change file formats
  super(delegate.getName(), delegate);
  this.random = random;
}
 

/** Test field infos read/write with random fields, with different values. */
public void testRandom() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  SegmentInfo segmentInfo = newSegmentInfo(dir, "_123");
  
  // generate a bunch of fields
  int numFields = atLeast(2000);
  Set<String> fieldNames = new HashSet<>();
  for (int i = 0; i < numFields; i++) {
    fieldNames.add(TestUtil.randomUnicodeString(random()));
  }
  FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null));
  for (String field : fieldNames) {
    IndexableFieldType fieldType = randomFieldType(random());
    FieldInfo fi = builder.getOrAdd(field);
    IndexOptions indexOptions = fieldType.indexOptions();
    if (indexOptions != IndexOptions.NONE) {
      fi.setIndexOptions(indexOptions);
      if (fieldType.omitNorms()) {      
        fi.setOmitsNorms();
      }
    }
    fi.setDocValuesType(fieldType.docValuesType());
    if (fieldType.indexOptions() != IndexOptions.NONE && fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
      if (random().nextBoolean()) {
        fi.setStorePayloads();
      }
    }
    addAttributes(fi);
  }
  FieldInfos infos = builder.finish();
  codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
  FieldInfos infos2 = codec.fieldInfosFormat().read(dir, segmentInfo, "", IOContext.DEFAULT);
  assertEquals(infos, infos2);
  dir.close();
}
 
源代码15 项目: lucene-solr   文件: TestCodecSupport.java

public void testDocValuesFormats() {
  // NOTE: Direct (and Disk) DocValues formats were removed, so we use "Asserting" 
  // as a way to vet that the configuration actually matters.
  Codec codec = h.getCore().getCodec();
  Map<String, SchemaField> fields = h.getCore().getLatestSchema().getFields();
  SchemaField schemaField = fields.get("string_disk_f");
  PerFieldDocValuesFormat format = (PerFieldDocValuesFormat) codec.docValuesFormat();
  assertEquals(TestUtil.getDefaultDocValuesFormat().getName(), format.getDocValuesFormatForField(schemaField.getName()).getName());
  schemaField = fields.get("string_direct_f");
  assertEquals("Asserting", format.getDocValuesFormatForField(schemaField.getName()).getName());
  schemaField = fields.get("string_f");
  assertEquals(TestUtil.getDefaultDocValuesFormat().getName(),
      format.getDocValuesFormatForField(schemaField.getName()).getName());
}
 

@Override
protected Codec getCodec() {
  return new FilterCodec("Lucene84", Codec.forName("Lucene84")) {
    @Override
    public SegmentInfoFormat segmentInfoFormat() {
      return new Lucene70RWSegmentInfoFormat();
    }
  };
}
 
源代码17 项目: crate   文件: PluginsService.java

/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after the new classloader has been created to
 * register the services for use.
 */
static void reloadLuceneSPI(ClassLoader loader) {
    // do NOT change the order of these method calls!

    // Codecs:
    PostingsFormat.reloadPostingsFormats(loader);
    DocValuesFormat.reloadDocValuesFormats(loader);
    Codec.reloadCodecs(loader);
    // Analysis:
    CharFilterFactory.reloadCharFilters(loader);
    TokenFilterFactory.reloadTokenFilters(loader);
    TokenizerFactory.reloadTokenizers(loader);
}
 
源代码18 项目: lucene-solr   文件: TestCodecSupport.java

public void testPostingsFormats() {
  Codec codec = h.getCore().getCodec();
  Map<String, SchemaField> fields = h.getCore().getLatestSchema().getFields();
  SchemaField schemaField = fields.get("string_direct_f");
  PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat();
  assertEquals("Direct", format.getPostingsFormatForField(schemaField.getName()).getName());
  schemaField = fields.get("string_standard_f");
  assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField(schemaField.getName()).getName());
  schemaField = fields.get("string_f");
  assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField(schemaField.getName()).getName());
}
 

/**
 * After suite cleanup (always invoked).
 */
@Override
protected void after() throws Exception {
  Codec.setDefault(savedCodec);
  InfoStream.setDefault(savedInfoStream);
  if (savedLocale != null) Locale.setDefault(savedLocale);
  if (savedTimeZone != null) TimeZone.setDefault(savedTimeZone);
}
 
源代码20 项目: lucene-solr   文件: TestUtil.java

public static String getDocValuesFormat(Codec codec, String field) {
  DocValuesFormat f = codec.docValuesFormat();
  if (f instanceof PerFieldDocValuesFormat) {
    return ((PerFieldDocValuesFormat) f).getDocValuesFormatForField(field).getName();
  } else {
    return f.getName();
  }
}
 

@Override
protected Codec getCodec() {
  if (TEST_NIGHTLY) {
    return CompressingCodec.randomInstance(random());
  } else {
    return CompressingCodec.reasonableInstance(random());
  }
}
 
源代码22 项目: lucene-solr   文件: SegmentInfo.java

/** Can only be called once. */
public void setCodec(Codec codec) {
  assert this.codec == null;
  if (codec == null) {
    throw new IllegalArgumentException("codec must be non-null");
  }
  this.codec = codec;
}
 
源代码23 项目: lucene-solr   文件: SegmentInfos.java

private static Codec readCodec(DataInput input) throws IOException {
  final String name = input.readString();
  try {
    return Codec.forName(name);
  } catch (IllegalArgumentException e) {
    // maybe it's an old default codec that moved
    if (name.startsWith("Lucene")) {
      throw new IllegalArgumentException("Could not load codec '" + name + "'.  Did you forget to add lucene-backward-codecs.jar?", e);
    }
    throw e;
  }
}
 
源代码24 项目: lucene-solr   文件: IndexWriterConfig.java

/**
 * Set the {@link Codec}.
 * 
 * <p>
 * Only takes effect when IndexWriter is first created.
 */
public IndexWriterConfig setCodec(Codec codec) {
  if (codec == null) {
    throw new IllegalArgumentException("codec must not be null");
  }
  this.codec = codec;
  return this;
}
 

public void testSameCodecDifferentInstance() throws Exception {
  Codec codec = new AssertingCodec() {
    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      if ("id".equals(field)) {
        return new DirectPostingsFormat();
      } else if ("date".equals(field)) {
        return new DirectPostingsFormat();
      } else {
        return super.getPostingsFormatForField(field);
      }
    }
  };
  doTestMixedPostings(codec);
}
 
源代码26 项目: inception   文件: MtasDocumentIndex.java

private synchronized IndexWriter getIndexWriter() throws IOException
{
    if (_indexWriter == null) {
        log.debug("Opening index for project [{}]({})", project.getName(), project.getId());

        OPEN_INDEXES.put(project.getId(), this);
        
        // Initialize and populate the hash maps for the layers and features
        features = schemaService.listAnnotationFeature(project).stream()
                .filter(feat -> feat.getLayer().isEnabled())
                .filter(feat -> feat.isEnabled())
                .collect(Collectors.toList());
        
        // Add the project id to the configuration
        JSONObject jsonParserConfiguration = new JSONObject();
        jsonParserConfiguration.put(PARAM_PROJECT_ID, project.getId());
        
        // Tokenizer parameters
        Map<String, String> tokenizerArguments = new HashMap<>();
        tokenizerArguments.put(ARGUMENT_PARSER, MtasUimaParser.class.getName());
        tokenizerArguments.put(ARGUMENT_PARSER_ARGS, jsonParserConfiguration.toString());
        
        // Build analyzer
        Analyzer mtasAnalyzer = CustomAnalyzer.builder()
                .withTokenizer(MtasTokenizerFactory.class, tokenizerArguments)
                .build();
        
        Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
        analyzerPerField.put(FIELD_CONTENT, mtasAnalyzer);
        
        PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(),
                analyzerPerField);
        
        // Build IndexWriter
        FileUtils.forceMkdir(getIndexDir());
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        config.setCodec(Codec.forName(MTAS_CODEC_NAME));
        IndexWriter indexWriter = new IndexWriter(FSDirectory.open(getIndexDir().toPath()),
                config);
        
        // Initialize the index
        indexWriter.commit();
        
        // After the index has been initialized, assign the _indexWriter - this is also used
        // by isOpen() to check if the index writer is available.
        _indexWriter = indexWriter;
    }
    
    return _indexWriter;
}
 
源代码27 项目: lucene-solr   文件: TestSegmentInfos.java

/** Test toString method */
public void testToString() throws Throwable{
  SegmentInfo si;
  final Directory dir = newDirectory();
  Codec codec = Codec.getDefault();

  // diagnostics map
  Map<String, String> diagnostics = Map.of("key1", "value1", "key2", "value2");

  // attributes map
  Map<String,String> attributes =  Map.of("akey1", "value1", "akey2", "value2");

  // diagnostics X, attributes X
  si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "TEST", 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), Sort.INDEXORDER);
  assertEquals("TEST(" + Version.LATEST.toString() + ")" +
      ":C10000" +
      ":[indexSort=<doc>]", si.toString());

  // diagnostics O, attributes X
  si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "TEST", 10000, false, codec, diagnostics, StringHelper.randomId(), new HashMap<>(), Sort.INDEXORDER);
  assertEquals("TEST(" + Version.LATEST.toString() + ")" +
      ":C10000" +
      ":[indexSort=<doc>]" +
      ":[diagnostics=" + diagnostics + "]", si.toString());

  // diagnostics X, attributes O
  si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "TEST", 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), attributes, Sort.INDEXORDER);
  assertEquals("TEST(" + Version.LATEST.toString() + ")" +
      ":C10000" +
      ":[indexSort=<doc>]" +
      ":[attributes=" + attributes + "]", si.toString());

  // diagnostics O, attributes O
  si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "TEST", 10000, false, codec, diagnostics, StringHelper.randomId(), attributes, Sort.INDEXORDER);
  assertEquals("TEST(" + Version.LATEST.toString() + ")" +
      ":C10000" +
      ":[indexSort=<doc>]" +
      ":[diagnostics=" + diagnostics + "]" +
      ":[attributes=" + attributes + "]", si.toString());

  dir.close();
}
 

@Override
protected Codec getCodec() {
  return TestUtil.getDefaultCodec();
}
 
源代码29 项目: lucene-solr   文件: Test2BPoints.java

private static Codec getCodec() {
  return Codec.forName("Lucene84");
}
 

@Override
protected Codec getCodec() {
  return codec;
}