cascading.tap.partition
Class BasePartitionTap<Config,Input,Output>

java.lang.Object
  extended by cascading.tap.Tap<Config,Input,Output>
      extended by cascading.tap.partition.BasePartitionTap<Config,Input,Output>
All Implemented Interfaces:
FlowElement, Traceable, Serializable

public abstract class BasePartitionTap<Config,Input,Output>
extends Tap<Config,Input,Output>

See Also:
Serialized Form

Nested Class Summary
static class BasePartitionTap.Counters
           
 class BasePartitionTap.PartitionCollector
           
static class BasePartitionTap.PartitionScheme<Config,Input,Output>
           
 
Field Summary
protected  boolean keepParentOnDelete
          Field keepParentOnDelete
protected static int OPEN_WRITES_THRESHOLD_DEFAULT
          Field OPEN_FILES_THRESHOLD_DEFAULT
protected  int openWritesThreshold
          Field openTapsThreshold
protected  Tap parent
          Field parent
protected  Partition partition
          Field partition
 
Constructor Summary
protected BasePartitionTap(Tap parent, Partition partition, int openWritesThreshold)
           
protected BasePartitionTap(Tap parent, Partition partition, SinkMode sinkMode)
           
protected BasePartitionTap(Tap parent, Partition partition, SinkMode sinkMode, boolean keepParentOnDelete, int openWritesThreshold)
           
 
Method Summary
 boolean commitResource(Config conf)
          Method commitResource allows the underlying resource to be notified when all write processing is successful so that any additional cleanup or processing may be completed.
 boolean createResource(Config conf)
          Method createResource creates the underlying resource.
protected abstract  TupleEntrySchemeCollector createTupleEntrySchemeCollector(FlowProcess<Config> flowProcess, Tap parent, String path, long sequence)
           
protected abstract  TupleEntrySchemeIterator createTupleEntrySchemeIterator(FlowProcess<Config> flowProcess, Tap parent, String path, Input input)
           
 boolean deleteResource(Config conf)
          Method deleteResource deletes the resource represented by this instance.
 boolean equals(Object object)
           
 String[] getChildPartitionIdentifiers(FlowProcess<Config> flowProcess, boolean fullyQualified)
          Method getChildPartitionIdentifiers returns an array of all identifiers for all available partitions.
protected abstract  String getCurrentIdentifier(FlowProcess<Config> flowProcess)
           
 String getIdentifier()
          Method getIdentifier returns a String representing the resource this Tap instance represents.
 long getModifiedTime(Config conf)
          Method getModifiedTime returns the date this resource was last modified.
 int getOpenWritesThreshold()
          Method getOpenWritesThreshold returns the openTapsThreshold of this PartitionTap object.
 Tap getParent()
          Method getParent returns the parent Tap of this PartitionTap object.
 Partition getPartition()
          Method getPartition returns the Partition instance used by this PartitionTap
 int hashCode()
           
 TupleEntryIterator openForRead(FlowProcess<Config> flowProcess, Input input)
          Method openForRead opens the resource represented by this Tap instance for reading.
 TupleEntryCollector openForWrite(FlowProcess<Config> flowProcess, Output output)
          Method openForWrite opens the resource represented by this Tap instance for writing.
 boolean prepareResourceForRead(Config conf)
          Method prepareResourceForRead allows the underlying resource to be notified when reading will begin.
 boolean prepareResourceForWrite(Config conf)
          Method prepareResourceForWrite allows the underlying resource to be notified when writing will begin.
 boolean resourceExists(Config conf)
          Method resourceExists returns true if the path represented by this instance exists.
 boolean rollbackResource(Config conf)
          Method rollbackResource allows the underlying resource to be notified when any write processing has failed or was stopped so that any cleanup may be started.
 String toString()
           
 
Methods inherited from class cascading.tap.Tap
createResource, deleteResource, flowConfInit, getConfigDef, getFullIdentifier, getFullIdentifier, getModifiedTime, getScheme, getSinkFields, getSinkMode, getSourceFields, getStepConfigDef, getTrace, hasConfigDef, hasStepConfigDef, id, isEquivalentTo, isKeep, isReplace, isSink, isSource, isTemporary, isUpdate, openForRead, openForWrite, outgoingScopeFor, presentSinkFields, presentSourceFields, resolveIncomingOperationArgumentFields, resolveIncomingOperationPassThroughFields, resourceExists, retrieveSinkFields, retrieveSourceFields, setScheme, sinkConfInit, sourceConfInit, taps
 
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
 

Field Detail

OPEN_WRITES_THRESHOLD_DEFAULT

protected static final int OPEN_WRITES_THRESHOLD_DEFAULT
Field OPEN_FILES_THRESHOLD_DEFAULT

See Also:
Constant Field Values

parent

protected Tap parent
Field parent


partition

protected Partition partition
Field partition


keepParentOnDelete

protected boolean keepParentOnDelete
Field keepParentOnDelete


openWritesThreshold

protected int openWritesThreshold
Field openTapsThreshold

Constructor Detail

BasePartitionTap

protected BasePartitionTap(Tap parent,
                           Partition partition,
                           int openWritesThreshold)

BasePartitionTap

protected BasePartitionTap(Tap parent,
                           Partition partition,
                           SinkMode sinkMode)

BasePartitionTap

protected BasePartitionTap(Tap parent,
                           Partition partition,
                           SinkMode sinkMode,
                           boolean keepParentOnDelete,
                           int openWritesThreshold)
Method Detail

createTupleEntrySchemeCollector

protected abstract TupleEntrySchemeCollector createTupleEntrySchemeCollector(FlowProcess<Config> flowProcess,
                                                                             Tap parent,
                                                                             String path,
                                                                             long sequence)
                                                                      throws IOException
Throws:
IOException

createTupleEntrySchemeIterator

protected abstract TupleEntrySchemeIterator createTupleEntrySchemeIterator(FlowProcess<Config> flowProcess,
                                                                           Tap parent,
                                                                           String path,
                                                                           Input input)
                                                                    throws IOException
Throws:
IOException

getParent

public Tap getParent()
Method getParent returns the parent Tap of this PartitionTap object.

Returns:
the parent (type Tap) of this PartitionTap object.

getPartition

public Partition getPartition()
Method getPartition returns the Partition instance used by this PartitionTap

Returns:
the partition instance

getChildPartitionIdentifiers

public String[] getChildPartitionIdentifiers(FlowProcess<Config> flowProcess,
                                             boolean fullyQualified)
                                      throws IOException
Method getChildPartitionIdentifiers returns an array of all identifiers for all available partitions.

This method is used internally to set all incoming paths, override to limit applicable partitions.

Note the returns array may be large.

Parameters:
flowProcess - of type FlowProcess
fullyQualified - of type boolean
Returns:
a String[] of partition identifiers
Throws:
IOException

getIdentifier

public String getIdentifier()
Description copied from class: Tap
Method getIdentifier returns a String representing the resource this Tap instance represents.

Often, if the tap accesses a filesystem, the identifier is nothing more than the path to the file or directory. In other cases it may be a an URL or URI representing a connection string or remote resource.

Any two Tap instances having the same value for the identifier are considered equal.

Specified by:
getIdentifier in class Tap<Config,Input,Output>
Returns:
String

getCurrentIdentifier

protected abstract String getCurrentIdentifier(FlowProcess<Config> flowProcess)

getOpenWritesThreshold

public int getOpenWritesThreshold()
Method getOpenWritesThreshold returns the openTapsThreshold of this PartitionTap object.

Returns:
the openTapsThreshold (type int) of this PartitionTap object.

openForWrite

public TupleEntryCollector openForWrite(FlowProcess<Config> flowProcess,
                                        Output output)
                                 throws IOException
Description copied from class: Tap
Method openForWrite opens the resource represented by this Tap instance for writing.

This method is used internally and does not honor the SinkMode setting. If SinkMode is SinkMode.REPLACE, this call may fail. See Tap.openForWrite(cascading.flow.FlowProcess).

output value may be null, if so, sub-classes must inquire with the underlying Scheme via Scheme.sinkConfInit(cascading.flow.FlowProcess, Tap, Object) to get the proper output type and instantiate it before calling super.openForWrite().

Specified by:
openForWrite in class Tap<Config,Input,Output>
Parameters:
flowProcess - of type FlowProcess
output - of type Output
Returns:
TupleEntryCollector
Throws:
IOException - when the resource cannot be opened

openForRead

public TupleEntryIterator openForRead(FlowProcess<Config> flowProcess,
                                      Input input)
                               throws IOException
Description copied from class: Tap
Method openForRead opens the resource represented by this Tap instance for reading.

input value may be null, if so, sub-classes must inquire with the underlying Scheme via Scheme.sourceConfInit(cascading.flow.FlowProcess, Tap, Object) to get the proper input type and instantiate it before calling super.openForRead().

Note the returned iterator will return the same instance of TupleEntry on every call, thus a copy must be made of either the TupleEntry or the underlying Tuple instance if they are to be stored in a Collection.

Specified by:
openForRead in class Tap<Config,Input,Output>
Parameters:
flowProcess - of type FlowProcess
input - of type Input
Returns:
TupleEntryIterator
Throws:
IOException - when the resource cannot be opened

createResource

public boolean createResource(Config conf)
                       throws IOException
Description copied from class: Tap
Method createResource creates the underlying resource.

Specified by:
createResource in class Tap<Config,Input,Output>
Parameters:
conf - of type Config
Returns:
boolean
Throws:
IOException - when there is an error making directories

deleteResource

public boolean deleteResource(Config conf)
                       throws IOException
Description copied from class: Tap
Method deleteResource deletes the resource represented by this instance.

Specified by:
deleteResource in class Tap<Config,Input,Output>
Parameters:
conf - of type Config
Returns:
boolean
Throws:
IOException - when the resource cannot be deleted

prepareResourceForRead

public boolean prepareResourceForRead(Config conf)
                               throws IOException
Description copied from class: Tap
Method prepareResourceForRead allows the underlying resource to be notified when reading will begin.

This method will be called client side so that any remote or external resources can be initialized.

If this method returns false, an exception will be thrown halting the current Flow.

In most cases, resource initialization should happen in the Tap.openForRead(FlowProcess, Object) method.

This allows for initialization of cluster side resources, like a JDBC driver used to read data from a database, that cannot be passed client to cluster.

Overrides:
prepareResourceForRead in class Tap<Config,Input,Output>
Parameters:
conf - of type Config
Returns:
returns true if successful
Throws:
IOException

prepareResourceForWrite

public boolean prepareResourceForWrite(Config conf)
                                throws IOException
Description copied from class: Tap
Method prepareResourceForWrite allows the underlying resource to be notified when writing will begin.

This method will be called once client side so that any remote or external resources can be initialized.

If this method returns false, an exception will be thrown halting the current Flow.

In most cases, resource initialization should happen in the Tap.openForWrite(FlowProcess, Object) method.

This allows for initialization of cluster side resources, like a JDBC driver used to write data to a database, that cannot be passed client to cluster.

In the above JDBC example, overriding this method will allow for testing for the existence of and/or creating a remote table used by all individual cluster side tasks.

Overrides:
prepareResourceForWrite in class Tap<Config,Input,Output>
Parameters:
conf - of type Config
Returns:
returns true if successful
Throws:
IOException

commitResource

public boolean commitResource(Config conf)
                       throws IOException
Description copied from class: Tap
Method commitResource allows the underlying resource to be notified when all write processing is successful so that any additional cleanup or processing may be completed.

See Tap.rollbackResource(Object) to handle cleanup in the face of failures.

This method is invoked once client side and not in the cluster, if any.

If other sink Tap instance in a given Flow fail on commitResource after called on this instance, rollbackResource will not be called.

Overrides:
commitResource in class Tap<Config,Input,Output>
Parameters:
conf - of type Config
Returns:
returns true if successful
Throws:
IOException

rollbackResource

public boolean rollbackResource(Config conf)
                         throws IOException
Description copied from class: Tap
Method rollbackResource allows the underlying resource to be notified when any write processing has failed or was stopped so that any cleanup may be started.

See Tap.commitResource(Object) to handle cleanup when the write has successfully completed.

This method is invoked once client side and not in the cluster, if any.

Overrides:
rollbackResource in class Tap<Config,Input,Output>
Parameters:
conf - of type Config
Returns:
returns true if successful
Throws:
IOException

resourceExists

public boolean resourceExists(Config conf)
                       throws IOException
Description copied from class: Tap
Method resourceExists returns true if the path represented by this instance exists.

Specified by:
resourceExists in class Tap<Config,Input,Output>
Parameters:
conf - of type Config
Returns:
true if the underlying resource already exists
Throws:
IOException - when the status cannot be determined

getModifiedTime

public long getModifiedTime(Config conf)
                     throws IOException
Description copied from class: Tap
Method getModifiedTime returns the date this resource was last modified.

Specified by:
getModifiedTime in class Tap<Config,Input,Output>
Parameters:
conf - of type Config
Returns:
The date this resource was last modified.
Throws:
IOException

equals

public boolean equals(Object object)
Overrides:
equals in class Tap<Config,Input,Output>

hashCode

public int hashCode()
Overrides:
hashCode in class Tap<Config,Input,Output>

toString

public String toString()
Overrides:
toString in class Tap<Config,Input,Output>


Copyright © 2007-2015 Concurrent, Inc. All Rights Reserved.