001/* 002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.operation.regex; 022 023import java.beans.ConstructorProperties; 024import java.util.regex.Matcher; 025 026import cascading.flow.FlowProcess; 027import cascading.operation.Filter; 028import cascading.operation.FilterCall; 029 030/** 031 * Class RegexFilter will apply the regex patternString against every input Tuple value and filter 032 * the Tuple stream accordingly. 033 * <p/> 034 * By default, Tuples that match the given pattern are kept, and Tuples that do not 035 * match are filtered out. This can be changed by setting removeMatch to true. 036 * <p/> 037 * Also, by default, the whole Tuple is matched against the given patternString (tab delimited, unless otherwise 038 * specified). If matchEachElement is set to true, the pattern is applied to each Tuple value individually. 039 * <p/> 040 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method 041 * {@link java.util.regex.Matcher#find()}. 042 * <p/> 043 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before 044 * the regex is applied. 045 * <p/> 046 * Any Object value will be coerced to a String type via any provided {@link cascading.tuple.type.CoercibleType} on 047 * the argument selector or via its {@code toString()} method. 048 * 049 * @see java.util.regex.Matcher 050 * @see java.util.regex.Pattern 051 */ 052public class RegexFilter extends RegexMatcher implements Filter<Matcher> 053 { 054 /** Field matchEachElement */ 055 protected final boolean matchEachElement; 056 057 /** 058 * Constructor RegexFilter creates a new RegexFilter instance. 059 * 060 * @param patternString of type String 061 */ 062 @ConstructorProperties({"patternString"}) 063 public RegexFilter( String patternString ) 064 { 065 super( patternString ); 066 this.matchEachElement = false; 067 } 068 069 /** 070 * Constructor RegexFilter creates a new RegexFilter instance. 071 * 072 * @param patternString of type String 073 * @param delimiter of type String 074 */ 075 @ConstructorProperties({"patternString", "delimiter"}) 076 public RegexFilter( String patternString, String delimiter ) 077 { 078 super( patternString, delimiter ); 079 this.matchEachElement = false; 080 } 081 082 /** 083 * Constructor RegexFilter creates a new RegexFilter instance. 084 * 085 * @param patternString of type String 086 * @param removeMatch of type boolean 087 */ 088 @ConstructorProperties({"patternString", "removeMatch"}) 089 public RegexFilter( String patternString, boolean removeMatch ) 090 { 091 super( patternString, removeMatch ); 092 this.matchEachElement = false; 093 } 094 095 /** 096 * Constructor RegexFilter creates a new RegexFilter instance. 097 * 098 * @param patternString of type String 099 * @param removeMatch of type boolean 100 * @param delimiter of type String 101 */ 102 @ConstructorProperties({"patternString", "removeMatch", "delimiter"}) 103 public RegexFilter( String patternString, boolean removeMatch, String delimiter ) 104 { 105 super( patternString, removeMatch, delimiter ); 106 this.matchEachElement = false; 107 108 } 109 110 /** 111 * @param patternString of type String 112 * @param removeMatch of type boolean, set to true if a match should be filtered 113 * @param matchEachElement of type boolean, set to true if each element should be matched individually 114 */ 115 @ConstructorProperties({"patternString", "removeMatch", "matchEachElement"}) 116 public RegexFilter( String patternString, boolean removeMatch, boolean matchEachElement ) 117 { 118 super( patternString, removeMatch ); 119 this.matchEachElement = matchEachElement; 120 } 121 122 /** 123 * @param patternString of type String 124 * @param removeMatch of type boolean, set to true if a match should be filtered 125 * @param matchEachElement of type boolean, set to true if each element should be matched individually 126 * @param delimiter of type String 127 */ 128 @ConstructorProperties({"patternString", "removeMatch", "matchEachElement", "delimiter"}) 129 public RegexFilter( String patternString, boolean removeMatch, boolean matchEachElement, String delimiter ) 130 { 131 super( patternString, removeMatch, delimiter ); 132 this.matchEachElement = matchEachElement; 133 } 134 135 public boolean isMatchEachElement() 136 { 137 return matchEachElement; 138 } 139 140 @Override 141 public boolean isRemove( FlowProcess flowProcess, FilterCall<Matcher> filterCall ) 142 { 143 if( matchEachElement ) 144 return matchEachElement( filterCall.getContext(), filterCall.getArguments() ); 145 else 146 return matchWholeTuple( filterCall.getContext(), filterCall.getArguments() ); 147 } 148 149 @Override 150 public boolean equals( Object object ) 151 { 152 if( this == object ) 153 return true; 154 if( !( object instanceof RegexFilter ) ) 155 return false; 156 if( !super.equals( object ) ) 157 return false; 158 159 RegexFilter that = (RegexFilter) object; 160 161 if( matchEachElement != that.matchEachElement ) 162 return false; 163 164 return true; 165 } 166 167 @Override 168 public int hashCode() 169 { 170 int result = super.hashCode(); 171 result = 31 * result + ( matchEachElement ? 1 : 0 ); 172 return result; 173 } 174 }