cascading.scheme.util
Class DelimitedParser

java.lang.Object
  extended by cascading.scheme.util.DelimitedParser
All Implemented Interfaces:
Serializable

public class DelimitedParser
extends Object
implements Serializable

Class DelimitedParser is a base class for parsing text delimited files.

It maybe sub-classed to change its behavior.

The interface FieldTypeResolver maybe used to clean and prepare field names for data columns, and to infer type information from column names.

See Also:
Serialized Form

Field Summary
protected  Pattern cleanPattern
          Field cleanPattern
protected  CoercibleType[] coercibles
          Fields coercibles
protected  String delimiter
          Field delimiter *
protected  boolean enforceStrict
          Field enforceStrict
protected  Pattern escapePattern
          Field escapePattern
protected  FieldTypeResolver fieldTypeResolver
          fieldTypeResolver
protected  int numValues
          Field numValues
protected  String quote
          Field quote
protected  boolean safe
          Field safe
protected  Fields sourceFields
          Field sourceFields
protected  Pattern splitPattern
          Field splitPattern
protected  boolean strict
          Field strict
protected  Type[] types
          Field types
 
Constructor Summary
DelimitedParser(String delimiter, String quote, Class[] types)
           
DelimitedParser(String delimiter, String quote, Class[] types, boolean strict, boolean safe)
           
DelimitedParser(String delimiter, String quote, Class[] types, boolean strict, boolean safe, Fields sourceFields, Fields sinkFields)
           
DelimitedParser(String delimiter, String quote, Class[] types, boolean strict, boolean safe, Fields sourceFields, Fields sinkFields, FieldTypeResolver fieldTypeResolver)
           
DelimitedParser(String delimiter, String quote, Class[] types, boolean strict, boolean safe, FieldTypeResolver fieldTypeResolver)
           
DelimitedParser(String delimiter, String quote, FieldTypeResolver fieldTypeResolver)
           
 
Method Summary
protected  Object[] cleanFields(Object[] result)
           
protected  Object[] cleanParsedLine(Object[] split)
           
 Object[] cleanSplit(Object[] split, Pattern cleanPattern, Pattern escapePattern, String quote)
          Method cleanSplit will return a quote free array of String values, the given split array will be updated in place.
protected  Object[] coerceParsedLine(String line, Object[] split)
           
 Pattern createCleanPatternFor(String quote)
          Method createCleanPatternFor creates a regex Pattern for removing quote characters from a String.
 Pattern createEscapePatternFor(String quote)
          Method createEscapePatternFor creates a regex Pattern cleaning quote escapes from a String.
 String[] createSplit(String value, Pattern splitPattern, int numValues)
          Method createSplit will split the given value with the given splitPattern.
 Pattern createSplitPatternFor(String delimiter, String quote)
          Method createSplitPatternFor creates a regex Pattern for splitting a line of text into its component parts using the given delimiter and quote Strings.
 String getDelimiter()
           
 String getQuote()
           
protected  Type[] inferTypes(Object[] result)
           
 Appendable joinFirstLine(Iterable iterable, Appendable buffer)
           
 Appendable joinLine(Iterable iterable, Appendable buffer)
           
protected  Appendable joinNoQuote(Iterable tuple, Appendable buffer)
           
protected  Appendable joinWithQuote(Iterable tuple, Appendable buffer)
           
protected  Object[] onlyParseLine(String line)
           
 Fields parseFirstLine(FlowProcess flowProcess, Tap tap)
           
 Object[] parseLine(String line)
           
protected  Iterable prepareFields(Iterable fields)
           
 void reset(Fields sourceFields, Fields sinkFields)
           
 void reset(String delimiter, String quote, Type[] types, boolean strict, boolean safe, Fields sourceFields, Fields sinkFields, FieldTypeResolver fieldTypeResolver)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

sourceFields

protected Fields sourceFields
Field sourceFields


splitPattern

protected Pattern splitPattern
Field splitPattern


cleanPattern

protected Pattern cleanPattern
Field cleanPattern


escapePattern

protected Pattern escapePattern
Field escapePattern


delimiter

protected String delimiter
Field delimiter *


quote

protected String quote
Field quote


strict

protected boolean strict
Field strict


enforceStrict

protected boolean enforceStrict
Field enforceStrict


numValues

protected int numValues
Field numValues


types

protected Type[] types
Field types


coercibles

protected CoercibleType[] coercibles
Fields coercibles


safe

protected boolean safe
Field safe


fieldTypeResolver

protected FieldTypeResolver fieldTypeResolver
fieldTypeResolver

Constructor Detail

DelimitedParser

public DelimitedParser(String delimiter,
                       String quote,
                       Class[] types)

DelimitedParser

public DelimitedParser(String delimiter,
                       String quote,
                       Class[] types,
                       boolean strict,
                       boolean safe)

DelimitedParser

public DelimitedParser(String delimiter,
                       String quote,
                       FieldTypeResolver fieldTypeResolver)

DelimitedParser

public DelimitedParser(String delimiter,
                       String quote,
                       Class[] types,
                       boolean strict,
                       boolean safe,
                       FieldTypeResolver fieldTypeResolver)

DelimitedParser

public DelimitedParser(String delimiter,
                       String quote,
                       Class[] types,
                       boolean strict,
                       boolean safe,
                       Fields sourceFields,
                       Fields sinkFields)

DelimitedParser

public DelimitedParser(String delimiter,
                       String quote,
                       Class[] types,
                       boolean strict,
                       boolean safe,
                       Fields sourceFields,
                       Fields sinkFields,
                       FieldTypeResolver fieldTypeResolver)
Method Detail

reset

public void reset(Fields sourceFields,
                  Fields sinkFields)

reset

public void reset(String delimiter,
                  String quote,
                  Type[] types,
                  boolean strict,
                  boolean safe,
                  Fields sourceFields,
                  Fields sinkFields,
                  FieldTypeResolver fieldTypeResolver)

getDelimiter

public String getDelimiter()

getQuote

public String getQuote()

createEscapePatternFor

public Pattern createEscapePatternFor(String quote)
Method createEscapePatternFor creates a regex Pattern cleaning quote escapes from a String.

If quote is null or empty, a null value will be returned;

Parameters:
quote - of type String
Returns:
Pattern

createCleanPatternFor

public Pattern createCleanPatternFor(String quote)
Method createCleanPatternFor creates a regex Pattern for removing quote characters from a String.

If quote is null or empty, a null value will be returned;

Parameters:
quote - of type String
Returns:
Pattern

createSplitPatternFor

public Pattern createSplitPatternFor(String delimiter,
                                     String quote)
Method createSplitPatternFor creates a regex Pattern for splitting a line of text into its component parts using the given delimiter and quote Strings. quote may be null.

Parameters:
delimiter - of type String
quote - of type String
Returns:
Pattern

createSplit

public String[] createSplit(String value,
                            Pattern splitPattern,
                            int numValues)
Method createSplit will split the given value with the given splitPattern.

Parameters:
value - of type String
splitPattern - of type Pattern
numValues - of type int
Returns:
String[]

cleanSplit

public Object[] cleanSplit(Object[] split,
                           Pattern cleanPattern,
                           Pattern escapePattern,
                           String quote)
Method cleanSplit will return a quote free array of String values, the given split array will be updated in place.

If cleanPattern is null, quote cleaning will not be performed, but all empty String values will be replaces with a null value.

Parameters:
split - of type Object[]
cleanPattern - of type Pattern
escapePattern - of type Pattern
quote - of type String
Returns:
Object[] as a convenience

parseFirstLine

public Fields parseFirstLine(FlowProcess flowProcess,
                             Tap tap)

parseLine

public Object[] parseLine(String line)

cleanParsedLine

protected Object[] cleanParsedLine(Object[] split)

coerceParsedLine

protected Object[] coerceParsedLine(String line,
                                    Object[] split)

onlyParseLine

protected Object[] onlyParseLine(String line)

joinFirstLine

public Appendable joinFirstLine(Iterable iterable,
                                Appendable buffer)

joinLine

public Appendable joinLine(Iterable iterable,
                           Appendable buffer)

joinWithQuote

protected Appendable joinWithQuote(Iterable tuple,
                                   Appendable buffer)
                            throws IOException
Throws:
IOException

joinNoQuote

protected Appendable joinNoQuote(Iterable tuple,
                                 Appendable buffer)
                          throws IOException
Throws:
IOException

inferTypes

protected Type[] inferTypes(Object[] result)

prepareFields

protected Iterable prepareFields(Iterable fields)

cleanFields

protected Object[] cleanFields(Object[] result)


Copyright © 2007-2015 Concurrent, Inc. All Rights Reserved.