001/*
002 * Copyright (c) 2016-2017 Chris K Wensel. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.operation.regex;
023
024import java.beans.ConstructorProperties;
025import java.util.regex.Matcher;
026
027import cascading.flow.FlowProcess;
028import cascading.management.annotation.Property;
029import cascading.management.annotation.PropertyDescription;
030import cascading.management.annotation.Visibility;
031import cascading.operation.Function;
032import cascading.operation.FunctionCall;
033import cascading.operation.OperationCall;
034import cascading.tuple.Fields;
035import cascading.tuple.Tuple;
036import cascading.tuple.TupleEntry;
037import cascading.util.Pair;
038
039/**
040 * Class RegexReplace is used to replace a matched regex with a replacement value.
041 * <p>
042 * RegexReplace only expects one field value. If more than one argument value is passed, only the
043 * first is handled, the remainder are ignored.
044 * <p>
045 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before
046 * the regex is applied.
047 * <p>
048 * Any Object value will be coerced to a String type if type information is provided. See the
049 * {@link cascading.tuple.type.CoercibleType} interface to control how custom Object types are converted to String
050 * values.
051 */
052public class RegexReplace extends RegexOperation<Pair<Matcher, TupleEntry>> implements Function<Pair<Matcher, TupleEntry>>
053  {
054  /** Field replacement */
055  private final String replacement;
056  /** Field replaceAll */
057  private boolean replaceAll = true;
058
059  /**
060   * Constructor RegexReplace creates a new RegexReplace instance,
061   *
062   * @param fieldDeclaration of type Fields
063   * @param patternString    of type String
064   * @param replacement      of type String
065   * @param replaceAll       of type boolean
066   */
067  @ConstructorProperties({"fieldDeclaration", "patternString", "replacement", "replaceAll"})
068  public RegexReplace( Fields fieldDeclaration, String patternString, String replacement, boolean replaceAll )
069    {
070    this( fieldDeclaration, patternString, replacement );
071    this.replaceAll = replaceAll;
072    }
073
074  /**
075   * Constructor RegexReplace creates a new RegexReplace instance.
076   *
077   * @param fieldDeclaration of type Fields
078   * @param patternString    of type String
079   * @param replacement      of type String
080   */
081  @ConstructorProperties({"fieldDeclaration", "patternString", "replacement"})
082  public RegexReplace( Fields fieldDeclaration, String patternString, String replacement )
083    {
084    super( 1, fieldDeclaration, patternString );
085    this.replacement = replacement;
086    }
087
088  @Property(name = "replacement", visibility = Visibility.PUBLIC)
089  @PropertyDescription("The string replacement value.")
090  public String getReplacement()
091    {
092    return replacement;
093    }
094
095  @Property(name = "replaceAll", visibility = Visibility.PUBLIC)
096  @PropertyDescription("Will replace all occurrences of pattern.")
097  public boolean isReplaceAll()
098    {
099    return replaceAll;
100    }
101
102  @Override
103  public void prepare( FlowProcess flowProcess, OperationCall<Pair<Matcher, TupleEntry>> operationCall )
104    {
105    TupleEntry tupleEntry = new TupleEntry( operationCall.getDeclaredFields(), Tuple.size( 1 ) );
106
107    operationCall.setContext( new Pair<>( getPattern().matcher( "" ), tupleEntry ) );
108    }
109
110  @Override
111  public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, TupleEntry>> functionCall )
112    {
113    // coerce to string
114    String value = functionCall.getArguments().getString( 0 );
115
116    // make safe
117    if( value == null )
118      value = "";
119
120    TupleEntry output = functionCall.getContext().getRhs();
121    Matcher matcher = functionCall.getContext().getLhs().reset( value );
122
123    if( replaceAll )
124      output.setString( 0, matcher.replaceAll( replacement ) );
125    else
126      output.setString( 0, matcher.replaceFirst( replacement ) );
127
128    functionCall.getOutputCollector().add( output );
129    }
130
131  @Override
132  public boolean equals( Object object )
133    {
134    if( this == object )
135      return true;
136    if( !( object instanceof RegexReplace ) )
137      return false;
138    if( !super.equals( object ) )
139      return false;
140
141    RegexReplace that = (RegexReplace) object;
142
143    if( replaceAll != that.replaceAll )
144      return false;
145    if( replacement != null ? !replacement.equals( that.replacement ) : that.replacement != null )
146      return false;
147
148    return true;
149    }
150
151  @Override
152  public int hashCode()
153    {
154    int result = super.hashCode();
155    result = 31 * result + ( replacement != null ? replacement.hashCode() : 0 );
156    result = 31 * result + ( replaceAll ? 1 : 0 );
157    return result;
158    }
159  }