001/* 002 * Copyright (c) 2007-2016 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.operation.regex; 022 023import java.beans.ConstructorProperties; 024import java.util.regex.Matcher; 025 026import cascading.flow.FlowProcess; 027import cascading.operation.OperationCall; 028import cascading.tuple.Fields; 029import cascading.tuple.TupleEntry; 030import cascading.util.Util; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034/** 035 * Class RegexMatcher is the base class for common regular expression operations. 036 * <p/> 037 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}. 038 * 039 * @see java.util.regex.Matcher 040 * @see java.util.regex.Pattern 041 */ 042public class RegexMatcher extends RegexOperation<Matcher> 043 { 044 /** Field LOG */ 045 private static final Logger LOG = LoggerFactory.getLogger( RegexMatcher.class ); 046 047 public static final String DEFAULT_DELIM = "\t"; 048 049 protected final boolean negateMatch; 050 protected final String delimiter; 051 052 @ConstructorProperties({"patternString"}) 053 protected RegexMatcher( String patternString ) 054 { 055 super( patternString ); 056 this.negateMatch = false; 057 this.delimiter = DEFAULT_DELIM; 058 } 059 060 @ConstructorProperties({"patternString", "delimiter"}) 061 protected RegexMatcher( String patternString, String delimiter ) 062 { 063 super( patternString ); 064 this.negateMatch = false; 065 this.delimiter = delimiter; 066 067 if( this.delimiter == null ) 068 throw new IllegalArgumentException( "delimiter may not be null" ); 069 } 070 071 @ConstructorProperties({"patternString", "negateMatch"}) 072 protected RegexMatcher( String patternString, boolean negateMatch ) 073 { 074 super( patternString ); 075 this.negateMatch = negateMatch; 076 this.delimiter = DEFAULT_DELIM; 077 } 078 079 @ConstructorProperties({"patternString", "negateMatch", "delimiter"}) 080 protected RegexMatcher( String patternString, boolean negateMatch, String delimiter ) 081 { 082 super( patternString ); 083 this.negateMatch = negateMatch; 084 this.delimiter = delimiter; 085 086 if( this.delimiter == null ) 087 throw new IllegalArgumentException( "delimiter may not be null" ); 088 } 089 090 @ConstructorProperties({"fieldDeclaration", "patternString"}) 091 protected RegexMatcher( Fields fieldDeclaration, String patternString ) 092 { 093 super( ANY, fieldDeclaration, patternString ); 094 this.negateMatch = false; 095 this.delimiter = DEFAULT_DELIM; 096 097 verify(); 098 } 099 100 @ConstructorProperties({"fieldDeclaration", "patternString", "delimiter"}) 101 protected RegexMatcher( Fields fieldDeclaration, String patternString, String delimiter ) 102 { 103 super( ANY, fieldDeclaration, patternString ); 104 this.negateMatch = false; 105 this.delimiter = delimiter; 106 107 if( this.delimiter == null ) 108 throw new IllegalArgumentException( "delimiter may not be null" ); 109 110 verify(); 111 } 112 113 @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch"}) 114 protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch ) 115 { 116 super( ANY, fieldDeclaration, patternString ); 117 this.negateMatch = negateMatch; 118 this.delimiter = DEFAULT_DELIM; 119 120 verify(); 121 } 122 123 @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch", "delimiter"}) 124 protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch, String delimiter ) 125 { 126 super( ANY, fieldDeclaration, patternString ); 127 this.negateMatch = negateMatch; 128 this.delimiter = delimiter; 129 130 if( this.delimiter == null ) 131 throw new IllegalArgumentException( "delimiter may not be null" ); 132 133 verify(); 134 } 135 136 public final boolean isNegateMatch() 137 { 138 return negateMatch; 139 } 140 141 public final String getDelimiter() 142 { 143 return delimiter; 144 } 145 146 private void verify() 147 { 148 if( fieldDeclaration.size() != 1 ) 149 throw new IllegalArgumentException( "num fields in fieldDeclaration must be one, found: " + fieldDeclaration.printVerbose() ); 150 } 151 152 @Override 153 public void prepare( FlowProcess flowProcess, OperationCall<Matcher> operationCall ) 154 { 155 operationCall.setContext( getPattern().matcher( "" ) ); 156 } 157 158 protected boolean matchWholeTuple( Matcher matcher, TupleEntry input ) 159 { 160 Iterable<String> iterable = input.asIterableOf( String.class ); 161 String join = Util.join( iterable, delimiter, false ); 162 163 matcher.reset( join ); 164 165 boolean matchFound = matcher.find(); 166 167 if( LOG.isDebugEnabled() ) 168 LOG.debug( "pattern: {}, matches: {}", getPatternString(), matchFound ); 169 170 return matchFound == negateMatch; 171 } 172 173 protected boolean matchEachElement( Matcher matcher, TupleEntry input ) 174 { 175 return matchEachElementPos( matcher, input ) != -1; 176 } 177 178 protected int matchEachElementPos( Matcher matcher, TupleEntry input ) 179 { 180 int pos = 0; 181 182 for( int i = 0; i < input.size(); i++ ) 183 { 184 String value = input.getString( i ); 185 186 if( value == null ) 187 value = ""; 188 189 matcher.reset( value ); 190 191 boolean matchFound = matcher.find(); 192 193 if( LOG.isDebugEnabled() ) 194 LOG.debug( "pattern: " + getPatternString() + ", matches: " + matchFound + ", element: '" + value + "'" ); 195 196 if( matchFound == negateMatch ) 197 return pos; 198 199 pos++; 200 } 201 202 return -1; 203 } 204 205 @Override 206 public boolean equals( Object object ) 207 { 208 if( this == object ) 209 return true; 210 if( !( object instanceof RegexMatcher ) ) 211 return false; 212 if( !super.equals( object ) ) 213 return false; 214 215 RegexMatcher that = (RegexMatcher) object; 216 217 if( negateMatch != that.negateMatch ) 218 return false; 219 220 return !( delimiter != null ? !delimiter.equals( that.delimiter ) : that.delimiter != null ); 221 } 222 223 @Override 224 public int hashCode() 225 { 226 int result = super.hashCode(); 227 result = 31 * result + ( negateMatch ? 1 : 0 ); 228 result = 31 * result + ( delimiter != null ? delimiter.hashCode() : 0 ); 229 return result; 230 } 231 }