下面列出了怎么用org.apache.hadoop.hbase.filter.RegexStringComparator的API类实例代码及写法,或者点击链接到github查看源代码。
public static void readFilter(String projectId, String instanceId, String tableId) {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (Connection connection = BigtableConfiguration.connect(projectId, instanceId)) {
Table table = connection.getTable(TableName.valueOf(tableId));
ValueFilter valueFilter =
new ValueFilter(CompareOp.EQUAL, new RegexStringComparator("PQ2A.*"));
Scan scan = new Scan().setFilter(valueFilter);
ResultScanner rows = table.getScanner(scan);
for (Result row : rows) {
printRow(row);
}
} catch (IOException e) {
System.out.println(
"Unable to initialize service client, as a network error occurred: \n" + e.toString());
}
}
@Override
public void configure(JobConf job) {
try {
Connection connection = ConnectionFactory.createConnection(job);
Table exampleTable = connection.getTable(TableName.valueOf("exampleDeprecatedTable"));
// mandatory
initializeTable(connection, exampleTable.getName());
byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
Bytes.toBytes("columnB") };
// mandatory
setInputColumns(inputColumns);
Filter exampleFilter =
new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*"));
// optional
setRowFilter(exampleFilter);
} catch (IOException exception) {
throw new RuntimeException("Failed to configure for job.", exception);
}
}
@Override
public void configure(JobConf job) {
try {
Connection connection = ConnectionFactory.createConnection(job);
Table exampleTable = connection.getTable(TableName.valueOf(("exampleDeprecatedTable")));
// mandatory
initializeTable(connection, exampleTable.getName());
byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
Bytes.toBytes("columnB") };
// optional
Scan scan = new Scan();
for (byte[] family : inputColumns) {
scan.addFamily(family);
}
Filter exampleFilter =
new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*"));
scan.setFilter(exampleFilter);
setScan(scan);
} catch (IOException exception) {
throw new RuntimeException("Failed to configure for job.", exception);
}
}
@Override
public void configure(JobConf job) {
try {
Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
TableName tableName = TableName.valueOf("exampleJobConfigurableTable");
// mandatory
initializeTable(connection, tableName);
byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
Bytes.toBytes("columnB") };
//optional
Scan scan = new Scan();
for (byte[] family : inputColumns) {
scan.addFamily(family);
}
Filter exampleFilter =
new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*"));
scan.setFilter(exampleFilter);
setScan(scan);
} catch (IOException exception) {
throw new RuntimeException("Failed to initialize.", exception);
}
}
@Override
protected void initialize(JobContext job) throws IOException {
Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(
job.getConfiguration()));
TableName tableName = TableName.valueOf("exampleTable");
// mandatory
initializeTable(connection, tableName);
byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
Bytes.toBytes("columnB") };
//optional
Scan scan = new Scan();
for (byte[] family : inputColumns) {
scan.addFamily(family);
}
Filter exampleFilter =
new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*"));
scan.setFilter(exampleFilter);
setScan(scan);
}
public ByteArrayComparableModel(
ByteArrayComparable comparator) {
String typeName = comparator.getClass().getSimpleName();
ComparatorType type = ComparatorType.valueOf(typeName);
this.type = typeName;
switch (type) {
case BinaryComparator:
case BinaryPrefixComparator:
this.value = Bytes.toString(Base64.getEncoder().encode(comparator.getValue()));
break;
case BitComparator:
this.value = Bytes.toString(Base64.getEncoder().encode(comparator.getValue()));
this.op = ((BitComparator)comparator).getOperator().toString();
break;
case NullComparator:
break;
case RegexStringComparator:
case SubstringComparator:
this.value = Bytes.toString(comparator.getValue());
break;
default:
throw new RuntimeException("unhandled filter type: " + type);
}
}
/** Tests reading all rows using a filter. */
@Test
public void testReadingWithFilter() throws Exception {
final String table = tmpTable.getName();
final int numRows = 1001;
createAndWriteData(table, numRows);
String regex = ".*17.*";
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex));
runReadTestLength(
HBaseIO.read().withConfiguration(conf).withTableId(table).withFilter(filter), false, 20);
}
@Test
public void testReadingWithFilterSDF() throws Exception {
final String table = tmpTable.getName();
final int numRows = 1001;
createAndWriteData(table, numRows);
String regex = ".*17.*";
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex));
runReadTestLength(
HBaseIO.read().withConfiguration(conf).withTableId(table).withFilter(filter), true, 20);
}
/**
* TODO If the required field is null for a row, then this row will not be fetched. That could be a problem for counting
* Need another version of read to strictly get the number of rows which will return all the columns for a column family
*/
public void open() throws IOException {
if (isOpen)
return; // silently return
try {
tbl = EagleConfigFactory.load().getHTable(schema.getTable());
} catch (RuntimeException ex) {
throw new IOException(ex);
}
String rowkeyRegex = buildRegex2(searchTags);
RegexStringComparator regexStringComparator = new RegexStringComparator(
rowkeyRegex);
regexStringComparator.setCharset(Charset.forName("ISO-8859-1"));
RowFilter filter = new RowFilter(CompareOp.EQUAL, regexStringComparator);
FilterList filterList = new FilterList();
filterList.addFilter(filter);
Scan s1 = new Scan();
// reverse timestamp, startRow is stopKey, and stopRow is startKey
s1.setStartRow(stopKey);
s1.setStopRow(startKey);
s1.setFilter(filterList);
// TODO the # of cached rows should be minimum of (pagesize and 100)
s1.setCaching(100);
// TODO not optimized for all applications
s1.setCacheBlocks(true);
// scan specified columnfamily and qualifiers
for(byte[] qualifier : qualifiers){
s1.addColumn(schema.getColumnFamily().getBytes(), qualifier);
}
rs = tbl.getScanner(s1);
isOpen = true;
}
/**
* TODO If the required field is null for a row, then this row will not be fetched. That could be a
* problem for counting Need another version of read to strictly get the number of rows which will return
* all the columns for a column family
*/
@Override
public void open() throws IOException {
if (isOpen) {
return; // silently return
}
try {
tbl = EagleConfigFactory.load().getHTable(schema.getTable());
} catch (RuntimeException ex) {
throw new IOException(ex);
}
String rowkeyRegex = buildRegex2(searchTags);
RegexStringComparator regexStringComparator = new RegexStringComparator(rowkeyRegex);
regexStringComparator.setCharset(Charset.forName("ISO-8859-1"));
RowFilter filter = new RowFilter(CompareOp.EQUAL, regexStringComparator);
FilterList filterList = new FilterList();
filterList.addFilter(filter);
Scan s1 = new Scan();
// reverse timestamp, startRow is stopKey, and stopRow is startKey
s1.setStartRow(stopKey);
s1.setStopRow(startKey);
s1.setFilter(filterList);
// TODO the # of cached rows should be minimum of (pagesize and 100)
s1.setCaching(100);
// TODO not optimized for all applications
s1.setCacheBlocks(true);
// scan specified columnfamily and qualifiers
for (byte[] qualifier : qualifiers) {
s1.addColumn(schema.getColumnFamily().getBytes(), qualifier);
}
rs = tbl.getScanner(s1);
isOpen = true;
}
public static void filterLimitColFamilyRegex(
String projectId, String instanceId, String tableId) {
// A filter that matches cells whose column family satisfies the given regex
Filter filter = new FamilyFilter(CompareOp.EQUAL, new RegexStringComparator("stats_.*$"));
Scan scan = new Scan().setFilter(filter);
readWithFilter(projectId, instanceId, tableId, scan);
}
public static void filterLimitColQualifierRegex(
String projectId, String instanceId, String tableId) {
// A filter that matches cells whose column qualifier satisfies the given regex
Filter filter =
new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator("connected_.*$"));
Scan scan = new Scan().setFilter(filter);
readWithFilter(projectId, instanceId, tableId, scan);
}
public static void filterLimitValueRegex(String projectId, String instanceId, String tableId) {
// A filter that matches cells whose value satisfies the given regex
Filter filter = new ValueFilter(CompareOp.EQUAL, new RegexStringComparator("PQ2A.*$"));
Scan scan = new Scan().setFilter(filter);
readWithFilter(projectId, instanceId, tableId, scan);
}
static private void removeTablePermissions(TableName tableName, byte[] column, Table table,
boolean closeTable) throws IOException {
Scan scan = new Scan();
scan.addFamily(ACL_LIST_FAMILY);
String columnName = Bytes.toString(column);
scan.setFilter(new QualifierFilter(CompareOperator.EQUAL, new RegexStringComparator(
String.format("(%s%s%s)|(%s%s)$",
ACL_KEY_DELIMITER, columnName, ACL_KEY_DELIMITER,
ACL_KEY_DELIMITER, columnName))));
Set<byte[]> qualifierSet = new TreeSet<>(Bytes.BYTES_COMPARATOR);
ResultScanner scanner = null;
try {
scanner = table.getScanner(scan);
for (Result res : scanner) {
for (byte[] q : res.getFamilyMap(ACL_LIST_FAMILY).navigableKeySet()) {
qualifierSet.add(q);
}
}
if (qualifierSet.size() > 0) {
Delete d = new Delete(tableName.getName());
for (byte[] qualifier : qualifierSet) {
d.addColumns(ACL_LIST_FAMILY, qualifier);
}
table.delete(d);
}
} finally {
if (scanner != null) {
scanner.close();
}
if (closeTable) {
table.close();
}
}
}
@Test public void testFilters() throws Exception {
final TableName tableName = name.getTableName();
try (Table ht = TEST_UTIL.createTable(tableName, FAMILY)) {
byte[][] ROWS = makeN(ROW, 10);
byte[][] QUALIFIERS =
{ Bytes.toBytes("col0-<d2v1>-<d3v2>"), Bytes.toBytes("col1-<d2v1>-<d3v2>"),
Bytes.toBytes("col2-<d2v1>-<d3v2>"), Bytes.toBytes("col3-<d2v1>-<d3v2>"),
Bytes.toBytes("col4-<d2v1>-<d3v2>"), Bytes.toBytes("col5-<d2v1>-<d3v2>"),
Bytes.toBytes("col6-<d2v1>-<d3v2>"), Bytes.toBytes("col7-<d2v1>-<d3v2>"),
Bytes.toBytes("col8-<d2v1>-<d3v2>"), Bytes.toBytes("col9-<d2v1>-<d3v2>") };
for (int i = 0; i < 10; i++) {
Put put = new Put(ROWS[i]);
put.setDurability(Durability.SKIP_WAL);
put.addColumn(FAMILY, QUALIFIERS[i], VALUE);
ht.put(put);
}
Scan scan = new Scan();
scan.addFamily(FAMILY);
Filter filter = new QualifierFilter(CompareOperator.EQUAL,
new RegexStringComparator("col[1-5]"));
scan.setFilter(filter);
try (ResultScanner scanner = ht.getScanner(scan)) {
int expectedIndex = 1;
for (Result result : scanner) {
assertEquals(1, result.size());
assertTrue(Bytes.equals(CellUtil.cloneRow(result.rawCells()[0]), ROWS[expectedIndex]));
assertTrue(Bytes.equals(CellUtil.cloneQualifier(result.rawCells()[0]),
QUALIFIERS[expectedIndex]));
expectedIndex++;
}
assertEquals(6, expectedIndex);
}
}
}
protected void initialize(JobConf job, String table) throws IOException {
Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
TableName tableName = TableName.valueOf(table);
// mandatory
initializeTable(connection, tableName);
byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
Bytes.toBytes("columnB") };
// mandatory
setInputColumns(inputColumns);
Filter exampleFilter =
new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*"));
// optional
setRowFilter(exampleFilter);
}
public static void filterLimitRowRegex(String projectId, String instanceId, String tableId) {
// A filter that matches cells from rows whose keys satisfy the given regex
Filter filter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(".*#20190501$"));
Scan scan = new Scan().setFilter(filter).setMaxVersions();
readWithFilter(projectId, instanceId, tableId, scan);
}
@Test
public void testFiltersWithReverseScan() throws Exception {
final TableName tableName = name.getTableName();
try (Table ht = TEST_UTIL.createTable(tableName, FAMILY)) {
byte[][] ROWS = makeN(ROW, 10);
byte[][] QUALIFIERS = {Bytes.toBytes("col0-<d2v1>-<d3v2>"),
Bytes.toBytes("col1-<d2v1>-<d3v2>"),
Bytes.toBytes("col2-<d2v1>-<d3v2>"),
Bytes.toBytes("col3-<d2v1>-<d3v2>"),
Bytes.toBytes("col4-<d2v1>-<d3v2>"),
Bytes.toBytes("col5-<d2v1>-<d3v2>"),
Bytes.toBytes("col6-<d2v1>-<d3v2>"),
Bytes.toBytes("col7-<d2v1>-<d3v2>"),
Bytes.toBytes("col8-<d2v1>-<d3v2>"),
Bytes.toBytes("col9-<d2v1>-<d3v2>")};
for (int i = 0; i < 10; i++) {
Put put = new Put(ROWS[i]);
put.addColumn(FAMILY, QUALIFIERS[i], VALUE);
ht.put(put);
}
Scan scan = new Scan();
scan.setReversed(true);
scan.addFamily(FAMILY);
Filter filter = new QualifierFilter(CompareOperator.EQUAL,
new RegexStringComparator("col[1-5]"));
scan.setFilter(filter);
try (ResultScanner scanner = ht.getScanner(scan)) {
int expectedIndex = 5;
for (Result result : scanner) {
assertEquals(1, result.size());
Cell c = result.rawCells()[0];
assertTrue(Bytes.equals(c.getRowArray(), c.getRowOffset(), c.getRowLength(),
ROWS[expectedIndex], 0, ROWS[expectedIndex].length));
assertTrue(Bytes.equals(c.getQualifierArray(), c.getQualifierOffset(),
c.getQualifierLength(), QUALIFIERS[expectedIndex], 0,
QUALIFIERS[expectedIndex].length));
expectedIndex--;
}
assertEquals(0, expectedIndex);
}
}
}
/**
* converts quotafilter to serializeable filterlists.
*/
public static Filter makeFilter(final QuotaFilter filter) {
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
if (StringUtils.isNotEmpty(filter.getUserFilter())) {
FilterList userFilters = new FilterList(FilterList.Operator.MUST_PASS_ONE);
boolean hasFilter = false;
if (StringUtils.isNotEmpty(filter.getNamespaceFilter())) {
FilterList nsFilters = new FilterList(FilterList.Operator.MUST_PASS_ALL);
nsFilters.addFilter(new RowFilter(CompareOperator.EQUAL,
new RegexStringComparator(getUserRowKeyRegex(filter.getUserFilter()), 0)));
nsFilters.addFilter(new QualifierFilter(CompareOperator.EQUAL,
new RegexStringComparator(
getSettingsQualifierRegexForUserNamespace(filter.getNamespaceFilter()), 0)));
userFilters.addFilter(nsFilters);
hasFilter = true;
}
if (StringUtils.isNotEmpty(filter.getTableFilter())) {
FilterList tableFilters = new FilterList(FilterList.Operator.MUST_PASS_ALL);
tableFilters.addFilter(new RowFilter(CompareOperator.EQUAL,
new RegexStringComparator(getUserRowKeyRegex(filter.getUserFilter()), 0)));
tableFilters.addFilter(new QualifierFilter(CompareOperator.EQUAL,
new RegexStringComparator(
getSettingsQualifierRegexForUserTable(filter.getTableFilter()), 0)));
userFilters.addFilter(tableFilters);
hasFilter = true;
}
if (!hasFilter) {
userFilters.addFilter(new RowFilter(CompareOperator.EQUAL,
new RegexStringComparator(getUserRowKeyRegex(filter.getUserFilter()), 0)));
}
filterList.addFilter(userFilters);
} else if (StringUtils.isNotEmpty(filter.getTableFilter())) {
filterList.addFilter(new RowFilter(CompareOperator.EQUAL,
new RegexStringComparator(getTableRowKeyRegex(filter.getTableFilter()), 0)));
} else if (StringUtils.isNotEmpty(filter.getNamespaceFilter())) {
filterList.addFilter(new RowFilter(CompareOperator.EQUAL,
new RegexStringComparator(getNamespaceRowKeyRegex(filter.getNamespaceFilter()), 0)));
} else if (StringUtils.isNotEmpty(filter.getRegionServerFilter())) {
filterList.addFilter(new RowFilter(CompareOperator.EQUAL, new RegexStringComparator(
getRegionServerRowKeyRegex(filter.getRegionServerFilter()), 0)));
}
return filterList;
}
private void initScan() throws IOException{
scan = new Scan();
scan.setCacheBlocks(cacheBlocks_);
scan.setCaching(caching_);
// Set filters, if any.
if (configuredOptions_.hasOption("gt")) {
gt_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("gt")));
addRowFilter(CompareOp.GREATER, gt_);
scan.setStartRow(gt_);
}
if (configuredOptions_.hasOption("lt")) {
lt_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("lt")));
addRowFilter(CompareOp.LESS, lt_);
scan.setStopRow(lt_);
}
if (configuredOptions_.hasOption("gte")) {
gte_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("gte")));
scan.setStartRow(gte_);
}
if (configuredOptions_.hasOption("lte")) {
lte_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("lte")));
byte[] lt = increment(lte_);
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Incrementing lte value of %s from bytes %s to %s to set stop row",
Bytes.toString(lte_), toString(lte_), toString(lt)));
}
if (lt != null) {
scan.setStopRow(increment(lte_));
}
// The WhileMatchFilter will short-circuit the scan after we no longer match. The
// setStopRow call will limit the number of regions we need to scan
addFilter(new WhileMatchFilter(new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(lte_))));
}
if (configuredOptions_.hasOption("regex")) {
regex_ = Utils.slashisize(configuredOptions_.getOptionValue("regex"));
addFilter(new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regex_)));
}
if (configuredOptions_.hasOption("minTimestamp") || configuredOptions_.hasOption("maxTimestamp")){
scan.setTimeRange(minTimestamp_, maxTimestamp_);
}
if (configuredOptions_.hasOption("timestamp")){
scan.setTimeStamp(timestamp_);
}
// if the group of columnInfos for this family doesn't contain a prefix, we don't need
// to set any filters, we can just call addColumn or addFamily. See javadocs below.
boolean columnPrefixExists = false;
for (ColumnInfo columnInfo : columnInfo_) {
if (columnInfo.getColumnPrefix() != null) {
columnPrefixExists = true;
break;
}
}
if (!columnPrefixExists) {
addFiltersWithoutColumnPrefix(columnInfo_);
}
else {
addFiltersWithColumnPrefix(columnInfo_);
}
}
/**
* Returns the {@link Flow} runs' stats - summed up per flow If the
* {@code version} parameter is non-null, the returned results will be
* restricted to those matching this app version.
*
* <p>
* <strong>Note:</strong> this retrieval method will omit the configuration
* data from all of the returned jobs.
* </p>
*
* @param cluster the cluster where the jobs were run
* @param user the user running the jobs
* @param appId the application identifier for the jobs
* @param version if non-null, only flows matching this application version
* will be returned
* @param startTime the start time for the flows to be looked at
* @param endTime the end time for the flows to be looked at
* @param limit the maximum number of flows to return
* @return
*/
public List<Flow> getFlowTimeSeriesStats(String cluster, String user,
String appId, String version, long startTime, long endTime, int limit,
byte[] startRow) throws IOException {
// app portion of row key
byte[] rowPrefix = Bytes.toBytes((cluster + Constants.SEP + user
+ Constants.SEP + appId + Constants.SEP));
byte[] scanStartRow;
if (startRow != null) {
scanStartRow = startRow;
} else {
if (endTime != 0) {
// use end time in start row, if present
long endRunId = FlowKey.encodeRunId(endTime);
scanStartRow =
Bytes.add(rowPrefix, Bytes.toBytes(endRunId), Constants.SEP_BYTES);
} else {
scanStartRow = rowPrefix;
}
}
// TODO: use RunMatchFilter to limit scan on the server side
Scan scan = new Scan();
scan.setStartRow(scanStartRow);
FilterList filters = new FilterList(FilterList.Operator.MUST_PASS_ALL);
if (startTime != 0) {
// if limited by start time, early out as soon as we hit it
long startRunId = FlowKey.encodeRunId(startTime);
// zero byte at the end makes the startRunId inclusive
byte[] scanEndRow = Bytes.add(rowPrefix, Bytes.toBytes(startRunId),
Constants.ZERO_SINGLE_BYTE);
scan.setStopRow(scanEndRow);
} else {
// require that all rows match the app prefix we're looking for
filters.addFilter(new WhileMatchFilter(new PrefixFilter(rowPrefix)));
}
// if version is passed, restrict the rows returned to that version
if (version != null && version.length() > 0) {
filters.addFilter(new SingleColumnValueFilter(Constants.INFO_FAM_BYTES,
Constants.VERSION_COLUMN_BYTES, CompareFilter.CompareOp.EQUAL,
Bytes.toBytes(version)));
}
// filter out all config columns except the queue name
filters.addFilter(new QualifierFilter(CompareFilter.CompareOp.NOT_EQUAL,
new RegexStringComparator(
"^c\\!((?!" + Constants.HRAVEN_QUEUE + ").)*$")));
scan.setFilter(filters);
LOG.info("scan : \n " + scan.toJSON() + " \n");
return createFromResults(scan, false, limit);
}