001    /*
002     * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.operation.regex;
022    
023    import java.beans.ConstructorProperties;
024    import java.util.regex.Matcher;
025    
026    import cascading.flow.FlowProcess;
027    import cascading.management.annotation.Property;
028    import cascading.management.annotation.PropertyDescription;
029    import cascading.management.annotation.Visibility;
030    import cascading.operation.Function;
031    import cascading.operation.FunctionCall;
032    import cascading.operation.OperationCall;
033    import cascading.tuple.Fields;
034    import cascading.tuple.Tuple;
035    import cascading.util.Pair;
036    
037    /**
038     * Class RegexReplace is used to replace a matched regex with a replacement value.
039     * <p/>
040     * RegexReplace only expects one field value. If more than one argument value is passed, only the
041     * first is handled, the remainder are ignored.
042     * <p/>
043     * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before
044     * the regex is applied.
045     * <p/>
046     * Any Object value will be coerced to a String type if type information is provided. See the
047     * {@link cascading.tuple.type.CoercibleType} interface to control how custom Object types are converted to String
048     * values.
049     */
050    public class RegexReplace extends RegexOperation<Pair<Matcher, Tuple>> implements Function<Pair<Matcher, Tuple>>
051      {
052      /** Field replacement */
053      private final String replacement;
054      /** Field replaceAll */
055      private boolean replaceAll = true;
056    
057      /**
058       * Constructor RegexReplace creates a new RegexReplace instance,
059       *
060       * @param fieldDeclaration of type Fields
061       * @param patternString    of type String
062       * @param replacement      of type String
063       * @param replaceAll       of type boolean
064       */
065      @ConstructorProperties({"fieldDeclaration", "patternString", "replacement", "replaceAll"})
066      public RegexReplace( Fields fieldDeclaration, String patternString, String replacement, boolean replaceAll )
067        {
068        this( fieldDeclaration, patternString, replacement );
069        this.replaceAll = replaceAll;
070        }
071    
072      /**
073       * Constructor RegexReplace creates a new RegexReplace instance.
074       *
075       * @param fieldDeclaration of type Fields
076       * @param patternString    of type String
077       * @param replacement      of type String
078       */
079      @ConstructorProperties({"fieldDeclaration", "patternString", "replacement"})
080      public RegexReplace( Fields fieldDeclaration, String patternString, String replacement )
081        {
082        super( 1, fieldDeclaration, patternString );
083        this.replacement = replacement;
084        }
085    
086      @Property(name = "replacement", visibility = Visibility.PUBLIC)
087      @PropertyDescription("The string replacement value.")
088      public String getReplacement()
089        {
090        return replacement;
091        }
092    
093      @Property(name = "replaceAll", visibility = Visibility.PUBLIC)
094      @PropertyDescription("Will replace all occurrences of pattern.")
095      public boolean isReplaceAll()
096        {
097        return replaceAll;
098        }
099    
100      @Override
101      public void prepare( FlowProcess flowProcess, OperationCall<Pair<Matcher, Tuple>> operationCall )
102        {
103        operationCall.setContext( new Pair<Matcher, Tuple>( getPattern().matcher( "" ), Tuple.size( 1 ) ) );
104        }
105    
106      @Override
107      public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, Tuple>> functionCall )
108        {
109        // coerce to string
110        String value = functionCall.getArguments().getString( 0 );
111    
112        // make safe
113        if( value == null )
114          value = "";
115    
116        Tuple output = functionCall.getContext().getRhs();
117        Matcher matcher = functionCall.getContext().getLhs().reset( value );
118    
119        if( replaceAll )
120          output.set( 0, matcher.replaceAll( replacement ) );
121        else
122          output.set( 0, matcher.replaceFirst( replacement ) );
123    
124        functionCall.getOutputCollector().add( output );
125        }
126    
127      @Override
128      public boolean equals( Object object )
129        {
130        if( this == object )
131          return true;
132        if( !( object instanceof RegexReplace ) )
133          return false;
134        if( !super.equals( object ) )
135          return false;
136    
137        RegexReplace that = (RegexReplace) object;
138    
139        if( replaceAll != that.replaceAll )
140          return false;
141        if( replacement != null ? !replacement.equals( that.replacement ) : that.replacement != null )
142          return false;
143    
144        return true;
145        }
146    
147      @Override
148      public int hashCode()
149        {
150        int result = super.hashCode();
151        result = 31 * result + ( replacement != null ? replacement.hashCode() : 0 );
152        result = 31 * result + ( replaceAll ? 1 : 0 );
153        return result;
154        }
155      }