org.apache.hadoop.mapred.Partitioner#org.apache.hadoop.mapred.lib.HashPartitioner源码实例Demo

下面列出了org.apache.hadoop.mapred.Partitioner#org.apache.hadoop.mapred.lib.HashPartitioner 实例代码，或者点击链接到github查看源代码，也可以在右侧发表评论。

源代码1 项目： hadoop 文件： SortValidator.java

public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get(JobContext.MAP_INPUT_FILE));
      String inputFile = inputURI.getPath();
      // part file is of the form part-r-xxxxx
      partition = Integer.valueOf(inputFile.substring(
        inputFile.lastIndexOf("part") + 7)).intValue();
      noSortReducers = job.getInt(SORT_REDUCES, -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}

源代码2 项目： big-c 文件： SortValidator.java

public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get(JobContext.MAP_INPUT_FILE));
      String inputFile = inputURI.getPath();
      // part file is of the form part-r-xxxxx
      partition = Integer.valueOf(inputFile.substring(
        inputFile.lastIndexOf("part") + 7)).intValue();
      noSortReducers = job.getInt(SORT_REDUCES, -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}

源代码3 项目： anthelion 文件： LoopReader.java

/**
 * Prints loopset for a single url.  The loopset information will show any
 * outlink url the eventually forms a link cycle.
 * 
 * @param webGraphDb The WebGraph to check for loops
 * @param url The url to check.
 * 
 * @throws IOException If an error occurs while printing loopset information.
 */
public void dumpUrl(Path webGraphDb, String url)
  throws IOException {

  // open the readers
  fs = FileSystem.get(getConf());
  loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
    Loops.LOOPS_DIR), getConf());

  // get the loopset for a given url, if any
  Text key = new Text(url);
  LoopSet loop = new LoopSet();
  MapFileOutputFormat.getEntry(loopReaders,
    new HashPartitioner<Text, LoopSet>(), key, loop);

  // print out each loop url in the set
  System.out.println(url + ":");
  for (String loopUrl : loop.getLoopSet()) {
    System.out.println("  " + loopUrl);
  }

  // close the readers
  FSUtils.closeReaders(loopReaders);
}

源代码4 项目： anthelion 文件： NodeReader.java

/**
 * Prints the content of the Node represented by the url to system out.
 * 
 * @param webGraphDb The webgraph from which to get the node.
 * @param url The url of the node.
 * 
 * @throws IOException If an error occurs while getting the node.
 */
public void dumpUrl(Path webGraphDb, String url)
  throws IOException {

  fs = FileSystem.get(getConf());
  nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
    WebGraph.NODE_DIR), getConf());

  // open the readers, get the node, print out the info, and close the readers
  Text key = new Text(url);
  Node node = new Node();
  MapFileOutputFormat.getEntry(nodeReaders,
    new HashPartitioner<Text, Node>(), key, node);
  System.out.println(url + ":");
  System.out.println("  inlink score: " + node.getInlinkScore());
  System.out.println("  outlink score: " + node.getOutlinkScore());
  System.out.println("  num inlinks: " + node.getNumInlinks());
  System.out.println("  num outlinks: " + node.getNumOutlinks());
  FSUtils.closeReaders(nodeReaders);
}

源代码5 项目： RDFS 文件： SortValidator.java

public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get("map.input.file"));
      String inputFile = inputURI.getPath();
      partition = Integer.valueOf(
                                  inputFile.substring(inputFile.lastIndexOf("part")+5)
                                  ).intValue();
      noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}

源代码6 项目： nutch-htmlunit 文件： LoopReader.java

/**
 * Prints loopset for a single url.  The loopset information will show any
 * outlink url the eventually forms a link cycle.
 * 
 * @param webGraphDb The WebGraph to check for loops
 * @param url The url to check.
 * 
 * @throws IOException If an error occurs while printing loopset information.
 */
public void dumpUrl(Path webGraphDb, String url)
  throws IOException {

  // open the readers
  fs = FileSystem.get(getConf());
  loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
    Loops.LOOPS_DIR), getConf());

  // get the loopset for a given url, if any
  Text key = new Text(url);
  LoopSet loop = new LoopSet();
  MapFileOutputFormat.getEntry(loopReaders,
    new HashPartitioner<Text, LoopSet>(), key, loop);

  // print out each loop url in the set
  System.out.println(url + ":");
  for (String loopUrl : loop.getLoopSet()) {
    System.out.println("  " + loopUrl);
  }

  // close the readers
  FSUtils.closeReaders(loopReaders);
}

源代码7 项目： nutch-htmlunit 文件： NodeReader.java

/**
 * Prints the content of the Node represented by the url to system out.
 * 
 * @param webGraphDb The webgraph from which to get the node.
 * @param url The url of the node.
 * 
 * @throws IOException If an error occurs while getting the node.
 */
public void dumpUrl(Path webGraphDb, String url)
  throws IOException {

  fs = FileSystem.get(getConf());
  nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
    WebGraph.NODE_DIR), getConf());

  // open the readers, get the node, print out the info, and close the readers
  Text key = new Text(url);
  Node node = new Node();
  MapFileOutputFormat.getEntry(nodeReaders,
    new HashPartitioner<Text, Node>(), key, node);
  System.out.println(url + ":");
  System.out.println("  inlink score: " + node.getInlinkScore());
  System.out.println("  outlink score: " + node.getOutlinkScore());
  System.out.println("  num inlinks: " + node.getNumInlinks());
  System.out.println("  num outlinks: " + node.getNumOutlinks());
  FSUtils.closeReaders(nodeReaders);
}

源代码8 项目： hadoop-gpu 文件： SortValidator.java

public void configure(JobConf job) {
  // 'key' == sortInput for sort-input; key == sortOutput for sort-output
  key = deduceInputFile(job);
  
  if (key == sortOutput) {
    partitioner = new HashPartitioner<WritableComparable, Writable>();
    
    // Figure the 'current' partition and no. of reduces of the 'sort'
    try {
      URI inputURI = new URI(job.get("map.input.file"));
      String inputFile = inputURI.getPath();
      partition = Integer.valueOf(
                                  inputFile.substring(inputFile.lastIndexOf("part")+5)
                                  ).intValue();
      noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1);
    } catch (Exception e) {
      System.err.println("Caught: " + e);
      System.exit(-1);
    }
  }
}

源代码9 项目： anthelion 文件： CrawlDbReader.java

public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException {
  Text key = new Text(url);
  CrawlDatum val = new CrawlDatum();
  openReaders(crawlDb, config);
  CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers,
      new HashPartitioner<Text, CrawlDatum>(), key, val);
  return res;
}

源代码10 项目： anthelion 文件： LinkDumper.java

public static void main(String[] args)
  throws Exception {
  
  if (args == null || args.length < 2) {
    System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
    return;
  }

  // open the readers for the linkdump directory
  Configuration conf = NutchConfiguration.create();
  FileSystem fs = FileSystem.get(conf);
  Path webGraphDb = new Path(args[0]);
  String url = args[1];
  MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
    webGraphDb, DUMP_DIR), conf);

  // get the link nodes for the url
  Text key = new Text(url);
  LinkNodes nodes = new LinkNodes();
  MapFileOutputFormat.getEntry(readers,
    new HashPartitioner<Text, LinkNodes>(), key, nodes);

  // print out the link nodes
  LinkNode[] linkNodesAr = nodes.getLinks();
  System.out.println(url + ":");
  for (LinkNode node : linkNodesAr) {
    System.out.println("  " + node.getUrl() + " - "
      + node.getNode().toString());
  }

  // close the readers
  FSUtils.closeReaders(readers);
}

源代码11 项目： nutch-htmlunit 文件： CrawlDbReader.java

public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException {
  Text key = new Text(url);
  CrawlDatum val = new CrawlDatum();
  openReaders(crawlDb, config);
  CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers,
      new HashPartitioner<Text, CrawlDatum>(), key, val);
  return res;
}

源代码12 项目： nutch-htmlunit 文件： LinkDumper.java

public static void main(String[] args)
  throws Exception {
  
  if (args == null || args.length < 2) {
    System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
    return;
  }

  // open the readers for the linkdump directory
  Configuration conf = NutchConfiguration.create();
  FileSystem fs = FileSystem.get(conf);
  Path webGraphDb = new Path(args[0]);
  String url = args[1];
  MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
    webGraphDb, DUMP_DIR), conf);

  // get the link nodes for the url
  Text key = new Text(url);
  LinkNodes nodes = new LinkNodes();
  MapFileOutputFormat.getEntry(readers,
    new HashPartitioner<Text, LinkNodes>(), key, nodes);

  // print out the link nodes
  LinkNode[] linkNodesAr = nodes.getLinks();
  System.out.println(url + ":");
  for (LinkNode node : linkNodesAr) {
    System.out.println("  " + node.getUrl() + " - "
      + node.getNode().toString());
  }

  // close the readers
  FSUtils.closeReaders(readers);
}

源代码13 项目： hadoop 文件： Submitter.java

/**
 * Get the user's original partitioner.
 * @param conf the configuration to look in
 * @return the class that the user submitted
 */
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
  return conf.getClass(Submitter.PARTITIONER, 
                       HashPartitioner.class,
                       Partitioner.class);
}

源代码14 项目： big-c 文件： Submitter.java

/**
 * Get the user's original partitioner.
 * @param conf the configuration to look in
 * @return the class that the user submitted
 */
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
  return conf.getClass(Submitter.PARTITIONER, 
                       HashPartitioner.class,
                       Partitioner.class);
}

源代码15 项目： RDFS 文件： Submitter.java

/**
 * Get the user's original partitioner.
 * @param conf the configuration to look in
 * @return the class that the user submitted
 */
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
  return conf.getClass("hadoop.pipes.partitioner", 
                       HashPartitioner.class,
                       Partitioner.class);
}

源代码16 项目： hadoop-gpu 文件： Submitter.java

/**
 * Get the user's original partitioner.
 * @param conf the configuration to look in
 * @return the class that the user submitted
 */
static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) {
  return conf.getClass("hadoop.pipes.partitioner", 
                       HashPartitioner.class,
                       Partitioner.class);
}

源代码17 项目： hadoop 文件： JobConf.java

/**
 * Get the {@link Partitioner} used to partition {@link Mapper}-outputs 
 * to be sent to the {@link Reducer}s.
 * 
 * @return the {@link Partitioner} used to partition map-outputs.
 */
public Class<? extends Partitioner> getPartitionerClass() {
  return getClass("mapred.partitioner.class",
                  HashPartitioner.class, Partitioner.class);
}

源代码18 项目： big-c 文件： JobConf.java

/**
 * Get the {@link Partitioner} used to partition {@link Mapper}-outputs 
 * to be sent to the {@link Reducer}s.
 * 
 * @return the {@link Partitioner} used to partition map-outputs.
 */
public Class<? extends Partitioner> getPartitionerClass() {
  return getClass("mapred.partitioner.class",
                  HashPartitioner.class, Partitioner.class);
}

源代码19 项目： RDFS 文件： JobConf.java

/**
 * Get the {@link Partitioner} used to partition {@link Mapper}-outputs
 * to be sent to the {@link Reducer}s.
 *
 * @return the {@link Partitioner} used to partition map-outputs.
 */
public Class<? extends Partitioner> getPartitionerClass() {
  return getClass("mapred.partitioner.class",
                  HashPartitioner.class, Partitioner.class);
}

源代码20 项目： hadoop-gpu 文件： JobConf.java

/**
 * Get the {@link Partitioner} used to partition {@link Mapper}-outputs 
 * to be sent to the {@link Reducer}s.
 * 
 * @return the {@link Partitioner} used to partition map-outputs.
 */
public Class<? extends Partitioner> getPartitionerClass() {
  return getClass("mapred.partitioner.class",
                  HashPartitioner.class, Partitioner.class);
}

方法所在类

org.apache.hadoop.mapred.Partitioner