下面列出了org.apache.hadoop.hbase.util.RegionSplitter#SplitAlgorithm ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Test
public void testSplit300_1000() throws Exception {
int numRegions = 300;
int cardinality = 1000;
RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality);
byte[][] splits = splitAlgorithm.split(numRegions);
assertEquals(numRegions - 1, splits.length);
int digits = 4;
assertEquals(String.format("%0" + digits + "d", 3), Bytes.toString(splits[0]));
assertEquals(String.format("%0" + digits + "d", 6), Bytes.toString(splits[1]));
assertEquals(String.format("%0" + digits + "d", 10), Bytes.toString(splits[2]));
assertEquals(String.format("%0" + digits + "d", 996), Bytes.toString(splits[numRegions - 2]));
}
@Test
public void testSplit3_10() throws Exception {
int numRegions = 3;
int cardinality = 10;
RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality);
byte[][] splits = splitAlgorithm.split(numRegions);
assertEquals(numRegions - 1, splits.length);
int digits = 2;
assertEquals(String.format("%0" + digits + "d", 3), Bytes.toString(splits[0]));
assertEquals(String.format("%0" + digits + "d", 6), Bytes.toString(splits[numRegions - 2]));
}
@Test
public void testSplit10_10() throws Exception {
int numRegions = 10;
int cardinality = 10;
RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality);
byte[][] splits = splitAlgorithm.split(numRegions);
assertEquals(numRegions - 1, splits.length);
int digits = 2;
assertEquals(String.format("%0" + digits + "d", 1), Bytes.toString(splits[0]));
assertEquals(String.format("%0" + digits + "d", 9), Bytes.toString(splits[numRegions - 2]));
}
@Test
public void testSplit10_10() throws Exception {
int numRegions = 10;
int cardinality = 10;
RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality);
byte[][] splits = splitAlgorithm.split(numRegions);
assertEquals(numRegions - 1, splits.length);
int digits = 2;
assertEquals(String.format("%0" + digits + "d", 1), Bytes.toString(splits[0]));
assertEquals(String.format("%0" + digits + "d", 9), Bytes.toString(splits[numRegions - 2]));
}
@Test
public void testSplit10_10() throws Exception {
int numRegions = 10;
int cardinality = 10;
RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality);
byte[][] splits = splitAlgorithm.split(numRegions);
assertEquals(numRegions - 1, splits.length);
int digits = 2;
assertEquals(String.format("%0" + digits + "d", 1), Bytes.toString(splits[0]));
assertEquals(String.format("%0" + digits + "d", 9), Bytes.toString(splits[numRegions - 2]));
}
@Test
public void testSplit3_10() throws Exception {
int numRegions = 3;
int cardinality = 10;
RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality);
byte[][] splits = splitAlgorithm.split(numRegions);
assertEquals(numRegions - 1, splits.length);
int digits = 2;
assertEquals(String.format("%0" + digits + "d", 3), Bytes.toString(splits[0]));
assertEquals(String.format("%0" + digits + "d", 6), Bytes.toString(splits[numRegions - 2]));
}
public static RegionSplitter.SplitAlgorithm getSplitAlgo(Configuration conf) throws IOException {
String splitAlgoClassName = conf.get(SPLIT_ALGO);
if (splitAlgoClassName == null) {
return null;
}
try {
return Class.forName(splitAlgoClassName).asSubclass(RegionSplitter.SplitAlgorithm.class)
.getDeclaredConstructor().newInstance();
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException |
NoSuchMethodException | InvocationTargetException e) {
throw new IOException("SplitAlgo class " + splitAlgoClassName + " is not found", e);
}
}
@Test
public void testSplit10_10() throws Exception {
int numRegions = 10;
int cardinality = 10;
RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality);
byte[][] splits = splitAlgorithm.split(numRegions);
assertEquals(numRegions - 1, splits.length);
int digits = 2;
assertEquals(String.format("%0" + digits + "d", 1), Bytes.toString(splits[0]));
assertEquals(String.format("%0" + digits + "d", 9), Bytes.toString(splits[numRegions - 2]));
}
@Override
public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) {
return new RegionSplitter.UniformSplit();
}
@Override
public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) {
return new RegionSplitter.HexStringSplit();
}
@Override
public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) {
return new RegionSplitter.UniformSplit();
}
public List<byte[]> split(int numRegions, int cardinality) {
List<byte[]> splitPointList = new ArrayList<>();
RegionSplitter.SplitAlgorithm splitter = createSplitter(cardinality);
Collections.addAll(splitPointList, splitter.split(numRegions));
return splitPointList;
}
@Override
public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) {
return new DecimalStringSplit(cardinality);
}
@Override
public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) {
return new RegionSplitter.UniformSplit();
}
@Override
public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) {
return new RegionSplitter.HexStringSplit();
}
@Override
public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) {
return new RegionSplitter.HexStringSplit();
}
@Override
public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) {
return new DecimalStringSplit(cardinality);
}
/**
* Sets up the job for reading from a table snapshot. It bypasses hbase servers
* and read directly from snapshot files.
*
* @param snapshotName The name of the snapshot (of a table) to read from.
* @param scan The scan instance with the columns, time range etc.
* @param mapper The mapper class to use.
* @param outputKeyClass The class of the output key.
* @param outputValueClass The class of the output value.
* @param job The current job to adjust. Make sure the passed job is
* carrying all necessary HBase configuration.
* @param addDependencyJars upload HBase jars and jars for any of the configured
* job classes via the distributed cache (tmpjars).
*
* @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
* After the job is finished, restore directory can be deleted.
* @param splitAlgo algorithm to split
* @param numSplitsPerRegion how many input splits to generate per one region
* @throws IOException When setting up the details fails.
* @see TableSnapshotInputFormat
*/
public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
Class<? extends TableMapper> mapper,
Class<?> outputKeyClass,
Class<?> outputValueClass, Job job,
boolean addDependencyJars, Path tmpRestoreDir,
RegionSplitter.SplitAlgorithm splitAlgo,
int numSplitsPerRegion)
throws IOException {
TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir, splitAlgo,
numSplitsPerRegion);
initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
resetCacheConfig(job.getConfiguration());
}
/**
* Configures the job to use TableSnapshotInputFormat to read from a snapshot.
* @param job the job to configure
* @param snapshotName the name of the snapshot to read from
* @param restoreDir a temporary directory to restore the snapshot into. Current user should
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
* After the job is finished, restoreDir can be deleted.
* @param splitAlgo split algorithm to generate splits from region
* @param numSplitsPerRegion how many input splits to generate per one region
* @throws IOException if an error occurs
*/
public static void setInput(Job job, String snapshotName, Path restoreDir,
RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException {
TableSnapshotInputFormatImpl.setInput(job.getConfiguration(), snapshotName, restoreDir,
splitAlgo, numSplitsPerRegion);
}
/**
* Configures the job to use TableSnapshotInputFormat to read from a snapshot.
* @param job the job to configure
* @param snapshotName the name of the snapshot to read from
* @param restoreDir a temporary directory to restore the snapshot into. Current user should
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
* After the job is finished, restoreDir can be deleted.
* @param splitAlgo split algorithm to generate splits from region
* @param numSplitsPerRegion how many input splits to generate per one region
* @throws IOException if an error occurs
*/
public static void setInput(JobConf job, String snapshotName, Path restoreDir,
RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException {
TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir, splitAlgo, numSplitsPerRegion);
}