public class TextLine extends Scheme<Configuration,RecordReader,OutputCollector,Object[],Object[]>
Scheme
for plain text files. Files are broken into
lines. Either line-feed or carriage-return are used to signal end of line.
By default, this scheme returns a Tuple
with two fields, "offset" and "line".
Many of the constructors take both "sourceFields" and "sinkFields". sourceFields denote the field names
to be used instead of the names "offset" and "line". sinkFields is a selector and is by default Fields.ALL
.
Any available field names can be given if only a subset of the incoming fields should be used.
If a Fields
instance is passed on the constructor as sourceFields having only one field, the return tuples
will simply be the "line" value using the given field name.
Note that TextLine will concatenate all the Tuple values for the selected fields with a TAB delimiter before
writing out the line.
Note sink compression is TextLine.Compress.DISABLE
by default. If null
is passed to the constructor
for the compression value, it will remain disabled.
If any of the input files end with ".zip", an error will be thrown.
*
By default, all text is encoded/decoded as UTF-8. This can be changed via the charsetName
constructor
argument.Modifier and Type | Class and Description |
---|---|
static class |
TextLine.Compress |
Modifier and Type | Field and Description |
---|---|
static String |
DEFAULT_CHARSET |
static Fields |
DEFAULT_SOURCE_FIELDS
Field DEFAULT_SOURCE_FIELDS
|
Constructor and Description |
---|
TextLine()
Creates a new TextLine instance that sources "offset" and "line" fields, and sinks all incoming fields, where
"offset" is the byte offset in the input file.
|
TextLine(Fields sourceFields)
Creates a new TextLine instance.
|
TextLine(Fields sourceFields,
Fields sinkFields)
Creates a new TextLine instance.
|
TextLine(Fields sourceFields,
Fields sinkFields,
int numSinkParts)
Creates a new TextLine instance.
|
TextLine(Fields sourceFields,
Fields sinkFields,
String charsetName)
Creates a new TextLine instance.
|
TextLine(Fields sourceFields,
Fields sinkFields,
TextLine.Compress sinkCompression)
Constructor TextLine creates a new TextLine instance.
|
TextLine(Fields sourceFields,
Fields sinkFields,
TextLine.Compress sinkCompression,
int numSinkParts)
Constructor TextLine creates a new TextLine instance.
|
TextLine(Fields sourceFields,
Fields sinkFields,
TextLine.Compress sinkCompression,
int numSinkParts,
String charsetName)
Constructor TextLine creates a new TextLine instance.
|
TextLine(Fields sourceFields,
Fields sinkFields,
TextLine.Compress sinkCompression,
String charsetName)
Constructor TextLine creates a new TextLine instance.
|
TextLine(Fields sourceFields,
int numSinkParts)
Creates a new TextLine instance.
|
TextLine(Fields sourceFields,
String charsetName)
Creates a new TextLine instance.
|
TextLine(int numSinkParts)
Creates a new TextLine instance that sources "offset" and "line" fields, and sinks all incoming fields, where
"offset" is the byte offset in the input file.
|
TextLine(TextLine.Compress sinkCompression)
Creates a new TextLine instance that sources "offset" and "line" fields, and sinks all incoming fields, where
"offset" is the byte offset in the input file.
|
Modifier and Type | Method and Description |
---|---|
String |
getCharsetName() |
TextLine.Compress |
getSinkCompression()
Method getSinkCompression returns the sinkCompression of this TextLine object.
|
protected String |
makeEncodedString(Object[] context) |
void |
presentSinkFields(FlowProcess<? extends Configuration> flowProcess,
Tap tap,
Fields fields) |
void |
presentSourceFields(FlowProcess<? extends Configuration> flowProcess,
Tap tap,
Fields fields) |
protected void |
setCharsetName(String charsetName) |
void |
setSinkCompression(TextLine.Compress sinkCompression)
Method setSinkCompression sets the sinkCompression of this TextLine object.
|
void |
sink(FlowProcess<? extends Configuration> flowProcess,
SinkCall<Object[],OutputCollector> sinkCall) |
void |
sinkConfInit(FlowProcess<? extends Configuration> flowProcess,
Tap<Configuration,RecordReader,OutputCollector> tap,
Configuration conf) |
void |
sinkPrepare(FlowProcess<? extends Configuration> flowProcess,
SinkCall<Object[],OutputCollector> sinkCall) |
boolean |
source(FlowProcess<? extends Configuration> flowProcess,
SourceCall<Object[],RecordReader> sourceCall) |
void |
sourceCleanup(FlowProcess<? extends Configuration> flowProcess,
SourceCall<Object[],RecordReader> sourceCall) |
void |
sourceConfInit(FlowProcess<? extends Configuration> flowProcess,
Tap<Configuration,RecordReader,OutputCollector> tap,
Configuration conf) |
protected void |
sourceHandleInput(SourceCall<Object[],RecordReader> sourceCall) |
void |
sourcePrepare(FlowProcess<? extends Configuration> flowProcess,
SourceCall<Object[],RecordReader> sourceCall) |
protected void |
verify(Fields sourceFields) |
equals, getNumSinkParts, getSinkFields, getSourceFields, getTrace, hashCode, isSink, isSource, isSymmetrical, presentSinkFieldsInternal, presentSourceFieldsInternal, retrieveSinkFields, retrieveSourceFields, setNumSinkParts, setSinkFields, setSourceFields, sinkCleanup, toString
public static final String DEFAULT_CHARSET
public static final Fields DEFAULT_SOURCE_FIELDS
public TextLine()
@ConstructorProperties(value="numSinkParts") public TextLine(int numSinkParts)
numSinkParts
- of type int@ConstructorProperties(value="sinkCompression") public TextLine(TextLine.Compress sinkCompression)
sinkCompression
- of type Compress@ConstructorProperties(value={"sourceFields","sinkFields"}) public TextLine(Fields sourceFields, Fields sinkFields)
sourceFields
- the source fields for this schemesinkFields
- the sink fields for this scheme@ConstructorProperties(value={"sourceFields","sinkFields","charsetName"}) public TextLine(Fields sourceFields, Fields sinkFields, String charsetName)
sourceFields
- the source fields for this schemesinkFields
- the sink fields for this schemecharsetName
- of type String@ConstructorProperties(value={"sourceFields","sinkFields","numSinkParts"}) public TextLine(Fields sourceFields, Fields sinkFields, int numSinkParts)
sourceFields
- the source fields for this schemesinkFields
- the sink fields for this schemenumSinkParts
- of type int@ConstructorProperties(value={"sourceFields","sinkFields","sinkCompression"}) public TextLine(Fields sourceFields, Fields sinkFields, TextLine.Compress sinkCompression)
sourceFields
- of type FieldssinkFields
- of type FieldssinkCompression
- of type Compress@ConstructorProperties(value={"sourceFields","sinkFields","sinkCompression","charsetName"}) public TextLine(Fields sourceFields, Fields sinkFields, TextLine.Compress sinkCompression, String charsetName)
sourceFields
- of type FieldssinkFields
- of type FieldssinkCompression
- of type CompresscharsetName
- of type String@ConstructorProperties(value={"sourceFields","sinkFields","sinkCompression","numSinkParts"}) public TextLine(Fields sourceFields, Fields sinkFields, TextLine.Compress sinkCompression, int numSinkParts)
sourceFields
- of type FieldssinkFields
- of type FieldssinkCompression
- of type CompressnumSinkParts
- of type int@ConstructorProperties(value={"sourceFields","sinkFields","sinkCompression","numSinkParts","charsetName"}) public TextLine(Fields sourceFields, Fields sinkFields, TextLine.Compress sinkCompression, int numSinkParts, String charsetName)
sourceFields
- of type FieldssinkFields
- of type FieldssinkCompression
- of type CompressnumSinkParts
- of type intcharsetName
- of type String@ConstructorProperties(value="sourceFields") public TextLine(Fields sourceFields)
sourceFields
- the source fields for this scheme@ConstructorProperties(value={"sourceFields","charsetName"}) public TextLine(Fields sourceFields, String charsetName)
sourceFields
- the source fields for this schemecharsetName
- of type String@ConstructorProperties(value={"sourceFields","numSinkParts"}) public TextLine(Fields sourceFields, int numSinkParts)
sourceFields
- the source fields for this schemenumSinkParts
- of type intprotected void setCharsetName(String charsetName)
public String getCharsetName()
public TextLine.Compress getSinkCompression()
public void setSinkCompression(TextLine.Compress sinkCompression)
sinkCompression
- the sinkCompression of this TextLine object.public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration,RecordReader,OutputCollector> tap, Configuration conf)
sourceConfInit
in class Scheme<Configuration,RecordReader,OutputCollector,Object[],Object[]>
public void presentSourceFields(FlowProcess<? extends Configuration> flowProcess, Tap tap, Fields fields)
presentSourceFields
in class Scheme<Configuration,RecordReader,OutputCollector,Object[],Object[]>
public void presentSinkFields(FlowProcess<? extends Configuration> flowProcess, Tap tap, Fields fields)
presentSinkFields
in class Scheme<Configuration,RecordReader,OutputCollector,Object[],Object[]>
public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration,RecordReader,OutputCollector> tap, Configuration conf)
sinkConfInit
in class Scheme<Configuration,RecordReader,OutputCollector,Object[],Object[]>
public void sourcePrepare(FlowProcess<? extends Configuration> flowProcess, SourceCall<Object[],RecordReader> sourceCall)
sourcePrepare
in class Scheme<Configuration,RecordReader,OutputCollector,Object[],Object[]>
public boolean source(FlowProcess<? extends Configuration> flowProcess, SourceCall<Object[],RecordReader> sourceCall) throws IOException
source
in class Scheme<Configuration,RecordReader,OutputCollector,Object[],Object[]>
IOException
protected void sourceHandleInput(SourceCall<Object[],RecordReader> sourceCall)
protected String makeEncodedString(Object[] context)
public void sourceCleanup(FlowProcess<? extends Configuration> flowProcess, SourceCall<Object[],RecordReader> sourceCall)
sourceCleanup
in class Scheme<Configuration,RecordReader,OutputCollector,Object[],Object[]>
public void sinkPrepare(FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[],OutputCollector> sinkCall) throws IOException
sinkPrepare
in class Scheme<Configuration,RecordReader,OutputCollector,Object[],Object[]>
IOException
public void sink(FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[],OutputCollector> sinkCall) throws IOException
sink
in class Scheme<Configuration,RecordReader,OutputCollector,Object[],Object[]>
IOException
Copyright © 2007-2015 Concurrent, Inc. All Rights Reserved.