001/* 002 * Copyright (c) 2016-2017 Chris K Wensel. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.operation.regex; 023 024import java.beans.ConstructorProperties; 025import java.util.regex.Matcher; 026 027import cascading.flow.FlowProcess; 028import cascading.management.annotation.Property; 029import cascading.management.annotation.PropertyDescription; 030import cascading.management.annotation.Visibility; 031import cascading.operation.Function; 032import cascading.operation.FunctionCall; 033import cascading.operation.OperationCall; 034import cascading.tuple.Fields; 035import cascading.tuple.Tuple; 036import cascading.tuple.TupleEntry; 037import cascading.util.Pair; 038 039/** 040 * Class RegexReplace is used to replace a matched regex with a replacement value. 041 * <p> 042 * RegexReplace only expects one field value. If more than one argument value is passed, only the 043 * first is handled, the remainder are ignored. 044 * <p> 045 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before 046 * the regex is applied. 047 * <p> 048 * Any Object value will be coerced to a String type if type information is provided. See the 049 * {@link cascading.tuple.type.CoercibleType} interface to control how custom Object types are converted to String 050 * values. 051 */ 052public class RegexReplace extends RegexOperation<Pair<Matcher, TupleEntry>> implements Function<Pair<Matcher, TupleEntry>> 053 { 054 /** Field replacement */ 055 private final String replacement; 056 /** Field replaceAll */ 057 private boolean replaceAll = true; 058 059 /** 060 * Constructor RegexReplace creates a new RegexReplace instance, 061 * 062 * @param fieldDeclaration of type Fields 063 * @param patternString of type String 064 * @param replacement of type String 065 * @param replaceAll of type boolean 066 */ 067 @ConstructorProperties({"fieldDeclaration", "patternString", "replacement", "replaceAll"}) 068 public RegexReplace( Fields fieldDeclaration, String patternString, String replacement, boolean replaceAll ) 069 { 070 this( fieldDeclaration, patternString, replacement ); 071 this.replaceAll = replaceAll; 072 } 073 074 /** 075 * Constructor RegexReplace creates a new RegexReplace instance. 076 * 077 * @param fieldDeclaration of type Fields 078 * @param patternString of type String 079 * @param replacement of type String 080 */ 081 @ConstructorProperties({"fieldDeclaration", "patternString", "replacement"}) 082 public RegexReplace( Fields fieldDeclaration, String patternString, String replacement ) 083 { 084 super( 1, fieldDeclaration, patternString ); 085 this.replacement = replacement; 086 } 087 088 @Property(name = "replacement", visibility = Visibility.PUBLIC) 089 @PropertyDescription("The string replacement value.") 090 public String getReplacement() 091 { 092 return replacement; 093 } 094 095 @Property(name = "replaceAll", visibility = Visibility.PUBLIC) 096 @PropertyDescription("Will replace all occurrences of pattern.") 097 public boolean isReplaceAll() 098 { 099 return replaceAll; 100 } 101 102 @Override 103 public void prepare( FlowProcess flowProcess, OperationCall<Pair<Matcher, TupleEntry>> operationCall ) 104 { 105 TupleEntry tupleEntry = new TupleEntry( operationCall.getDeclaredFields(), Tuple.size( 1 ) ); 106 107 operationCall.setContext( new Pair<>( getPattern().matcher( "" ), tupleEntry ) ); 108 } 109 110 @Override 111 public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, TupleEntry>> functionCall ) 112 { 113 // coerce to string 114 String value = functionCall.getArguments().getString( 0 ); 115 116 // make safe 117 if( value == null ) 118 value = ""; 119 120 TupleEntry output = functionCall.getContext().getRhs(); 121 Matcher matcher = functionCall.getContext().getLhs().reset( value ); 122 123 if( replaceAll ) 124 output.setString( 0, matcher.replaceAll( replacement ) ); 125 else 126 output.setString( 0, matcher.replaceFirst( replacement ) ); 127 128 functionCall.getOutputCollector().add( output ); 129 } 130 131 @Override 132 public boolean equals( Object object ) 133 { 134 if( this == object ) 135 return true; 136 if( !( object instanceof RegexReplace ) ) 137 return false; 138 if( !super.equals( object ) ) 139 return false; 140 141 RegexReplace that = (RegexReplace) object; 142 143 if( replaceAll != that.replaceAll ) 144 return false; 145 if( replacement != null ? !replacement.equals( that.replacement ) : that.replacement != null ) 146 return false; 147 148 return true; 149 } 150 151 @Override 152 public int hashCode() 153 { 154 int result = super.hashCode(); 155 result = 31 * result + ( replacement != null ? replacement.hashCode() : 0 ); 156 result = 31 * result + ( replaceAll ? 1 : 0 ); 157 return result; 158 } 159 }