下面列出了怎么用com.amazonaws.services.s3.model.InputSerialization的API类实例代码及写法,或者点击链接到github查看源代码。
private static SelectObjectContentRequest generateBaseCSVRequest(String bucket, String key, String query) {
SelectObjectContentRequest request = new SelectObjectContentRequest();
request.setBucketName(bucket);
request.setKey(key);
request.setExpression(query);
request.setExpressionType(ExpressionType.SQL);
InputSerialization inputSerialization = new InputSerialization();
inputSerialization.setCsv(new CSVInput());
inputSerialization.setCompressionType(CompressionType.NONE);
request.setInputSerialization(inputSerialization);
OutputSerialization outputSerialization = new OutputSerialization();
outputSerialization.setCsv(new CSVOutput());
request.setOutputSerialization(outputSerialization);
return request;
}
private static SelectObjectContentRequest generateBaseCSVRequest(String bucket, String key, String query) {
SelectObjectContentRequest request = new SelectObjectContentRequest();
request.setBucketName(bucket);
request.setKey(key);
request.setExpression(query);
request.setExpressionType(ExpressionType.SQL);
InputSerialization inputSerialization = new InputSerialization();
inputSerialization.setCsv(new CSVInput());
inputSerialization.setCompressionType(CompressionType.NONE);
request.setInputSerialization(inputSerialization);
OutputSerialization outputSerialization = new OutputSerialization();
outputSerialization.setCsv(new CSVOutput());
request.setOutputSerialization(outputSerialization);
return request;
}
@Test
public void testGetInputSerializationDefaults() {
RequestContext context = getDefaultRequestContext();
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertNotNull(inputSerialization);
assertNotNull(inputSerialization.getCsv());
assertNull(inputSerialization.getCsv().getAllowQuotedRecordDelimiter());
assertNull(inputSerialization.getCsv().getComments());
assertNull(inputSerialization.getCsv().getFileHeaderInfo());
assertEquals(',', inputSerialization.getCsv().getFieldDelimiter().charValue());
assertEquals('"', inputSerialization.getCsv().getQuoteCharacter().charValue());
assertEquals('"', inputSerialization.getCsv().getQuoteEscapeCharacter().charValue());
assertEquals('\n', inputSerialization.getCsv().getRecordDelimiter().charValue());
assertNull(inputSerialization.getJson());
assertNull(inputSerialization.getParquet());
assertEquals("NONE", inputSerialization.getCompressionType());
}
@Override
public SelectObjectContentRequest buildSelectObjectRequest(Properties schema, String query, Path path)
{
SelectObjectContentRequest selectObjectRequest = new SelectObjectContentRequest();
URI uri = path.toUri();
selectObjectRequest.setBucketName(PrestoS3FileSystem.extractBucketName(uri));
selectObjectRequest.setKey(PrestoS3FileSystem.keyFromPath(path));
selectObjectRequest.setExpression(query);
selectObjectRequest.setExpressionType(ExpressionType.SQL);
String fieldDelimiter = getFieldDelimiter(schema);
String quoteChar = schema.getProperty(QUOTE_CHAR, null);
String escapeChar = schema.getProperty(ESCAPE_CHAR, null);
CSVInput selectObjectCSVInputSerialization = new CSVInput();
selectObjectCSVInputSerialization.setRecordDelimiter(lineDelimiter);
selectObjectCSVInputSerialization.setFieldDelimiter(fieldDelimiter);
selectObjectCSVInputSerialization.setComments(COMMENTS_CHAR_STR);
selectObjectCSVInputSerialization.setQuoteCharacter(quoteChar);
selectObjectCSVInputSerialization.setQuoteEscapeCharacter(escapeChar);
InputSerialization selectObjectInputSerialization = new InputSerialization();
selectObjectInputSerialization.setCompressionType(getCompressionType(path));
selectObjectInputSerialization.setCsv(selectObjectCSVInputSerialization);
selectObjectRequest.setInputSerialization(selectObjectInputSerialization);
OutputSerialization selectObjectOutputSerialization = new OutputSerialization();
CSVOutput selectObjectCSVOutputSerialization = new CSVOutput();
selectObjectCSVOutputSerialization.setRecordDelimiter(lineDelimiter);
selectObjectCSVOutputSerialization.setFieldDelimiter(fieldDelimiter);
selectObjectCSVOutputSerialization.setQuoteCharacter(quoteChar);
selectObjectCSVOutputSerialization.setQuoteEscapeCharacter(escapeChar);
selectObjectOutputSerialization.setCsv(selectObjectCSVOutputSerialization);
selectObjectRequest.setOutputSerialization(selectObjectOutputSerialization);
return selectObjectRequest;
}
/**
* Generates the {@link SelectObjectContentRequest} object from
* the request context.
*
* @param context the request context
* @return a {@link SelectObjectContentRequest}
*/
SelectObjectContentRequest generateBaseCSVRequest(RequestContext context) {
InputSerialization inputSerialization = getInputSerialization(context);
String fileHeaderInfo = context.getOption(FILE_HEADER_INFO);
boolean usePositionToIdentifyColumn = inputSerialization.getCsv() != null &&
(StringUtils.isBlank(fileHeaderInfo) ||
!StringUtils.equalsIgnoreCase(FILE_HEADER_INFO_USE, fileHeaderInfo));
String query = null;
try {
S3SelectQueryBuilder queryBuilder = new S3SelectQueryBuilder(context, usePositionToIdentifyColumn);
query = queryBuilder.buildSelectQuery();
} catch (SQLException e) {
LOG.error("Unable to build select query for filter string {}", context.getFilterString());
}
LOG.trace("Select query: {}", query);
SelectObjectContentRequest request = new SelectObjectContentRequest();
request.setBucketName(name.getHost());
request.setKey(StringUtils.removeStart(name.getPath(), "/"));
request.setExpression(query);
request.setExpressionType(ExpressionType.SQL);
LOG.debug("With bucket name '{}'", request.getBucketName());
LOG.debug("With key '{}'", request.getKey());
LOG.debug("With expression query '{}'", query);
request.setInputSerialization(inputSerialization);
OutputSerialization outputSerialization = getOutputSerialization(context);
request.setOutputSerialization(outputSerialization);
return request;
}
/**
* Returns a {@link com.amazonaws.services.s3.model.InputSerialization}
* object with parsed values from the request context.
*
* @param context the request context
* @return a {@link InputSerialization} object
*/
InputSerialization getInputSerialization(RequestContext context) {
InputSerialization inputSerialization = new InputSerialization();
// We need to infer the format name from the profile (i.e. s3:parquet
// would return parquet for the format)
String format = context.inferFormatName();
String compressionType = context.getOption(COMPRESSION_TYPE);
LOG.debug("With format {}", format);
if (StringUtils.equalsIgnoreCase(format, "parquet")) {
inputSerialization.setParquet(new ParquetInput());
} else if (StringUtils.equalsIgnoreCase(format, "json")) {
inputSerialization.setJson(getJSONInput(context));
} else {
inputSerialization.setCsv(getCSVInput(context));
}
LOG.debug("With compression type {}", compressionType);
if (StringUtils.equalsIgnoreCase(compressionType, "gzip")) {
inputSerialization.setCompressionType(CompressionType.GZIP);
} else if (StringUtils.equalsIgnoreCase(compressionType, "bzip2")) {
inputSerialization.setCompressionType(CompressionType.BZIP2);
} else {
inputSerialization.setCompressionType(CompressionType.NONE);
}
return inputSerialization;
}
@Test
public void testCompressionTypeGZIP() {
RequestContext context = getDefaultRequestContext();
context.addOption("COMPRESSION_CODEC", "GZIP");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertEquals("GZIP", inputSerialization.getCompressionType());
}
@Test
public void testCompressionTypeBZIP2() {
RequestContext context = getDefaultRequestContext();
context.addOption("COMPRESSION_CODEC", "BZIP2");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertEquals("BZIP2", inputSerialization.getCompressionType());
}
@Test
public void testParquetInputSerialization() {
RequestContext context = getRequestContext("s3:parquet");
context.setFormat("parquet");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertNotNull(inputSerialization.getParquet());
assertNull(inputSerialization.getJson());
assertNull(inputSerialization.getCsv());
}
@Test
public void testJSONInputSerialization() {
RequestContext context = getRequestContext("s3:json");
context.setFormat("json");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertNotNull(inputSerialization.getJson());
assertNull(inputSerialization.getCsv());
assertNull(inputSerialization.getParquet());
}
@Test
public void testJSONInputSerializationWithDocumentJsonType() {
RequestContext context = getRequestContext("s3:json");
context.setFormat("json");
context.addOption("JSON-TYPE", "document");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertNotNull(inputSerialization.getJson());
assertNull(inputSerialization.getCsv());
assertNull(inputSerialization.getParquet());
assertEquals("document", inputSerialization.getJson().getType());
}
@Test
public void testJSONInputSerializationWithLinesJsonType() {
RequestContext context = getRequestContext("s3:json");
context.setFormat("json");
context.addOption("JSON-TYPE", "lines");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertNotNull(inputSerialization.getJson());
assertNull(inputSerialization.getCsv());
assertNull(inputSerialization.getParquet());
assertEquals("lines", inputSerialization.getJson().getType());
}
@Test
public void testPipeDelimiter() {
RequestContext context = getDefaultRequestContext();
context.getGreenplumCSV().withDelimiter("|");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertEquals('|', inputSerialization.getCsv().getFieldDelimiter().charValue());
}
@Test
public void testFileHeaderInfoIsIgnore() {
RequestContext context = getDefaultRequestContext();
context.addOption("FILE_HEADER", "IGNORE");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertEquals("IGNORE", inputSerialization.getCsv().getFileHeaderInfo());
}
@Test
public void testFileHeaderInfoIsUse() {
RequestContext context = getDefaultRequestContext();
context.addOption("FILE_HEADER", "USE");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertEquals("USE", inputSerialization.getCsv().getFileHeaderInfo());
}
@Test
public void testFileHeaderInfoIsNone() {
RequestContext context = getDefaultRequestContext();
context.addOption("FILE_HEADER", "NONE");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertEquals("NONE", inputSerialization.getCsv().getFileHeaderInfo());
}
@Test
public void testQuoteEscapeCharacter() {
RequestContext context = getDefaultRequestContext();
context.addOption("ESCAPE", "\"");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertEquals('\"', inputSerialization.getCsv().getQuoteEscapeCharacter().charValue());
}
@Test
public void testRecordDelimiter() {
RequestContext context = getDefaultRequestContext();
context.addOption("NEWLINE", "\n");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertEquals('\n', inputSerialization.getCsv().getRecordDelimiter().charValue());
}
@Test
public void testQuoteCharacter() {
RequestContext context = getDefaultRequestContext();
context.addOption("QUOTE", "\"");
InputSerialization inputSerialization =
new S3SelectAccessor().getInputSerialization(context);
assertEquals('"', inputSerialization.getCsv().getQuoteCharacter().charValue());
}