001 /* 002 * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.operation.regex; 022 023 import java.beans.ConstructorProperties; 024 import java.util.regex.Matcher; 025 026 import cascading.flow.FlowProcess; 027 import cascading.operation.OperationCall; 028 import cascading.tuple.Fields; 029 import cascading.tuple.Tuple; 030 import org.slf4j.Logger; 031 import org.slf4j.LoggerFactory; 032 033 /** 034 * Class RegexMatcher is the base class for common regular expression operations. 035 * <p/> 036 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}. 037 * 038 * @see java.util.regex.Matcher 039 * @see java.util.regex.Pattern 040 */ 041 public class RegexMatcher extends RegexOperation<Matcher> 042 { 043 /** Field LOG */ 044 private static final Logger LOG = LoggerFactory.getLogger( RegexMatcher.class ); 045 046 /** Field removeMatch */ 047 protected final boolean negateMatch; 048 049 @ConstructorProperties({"patternString"}) 050 protected RegexMatcher( String patternString ) 051 { 052 super( patternString ); 053 this.negateMatch = false; 054 } 055 056 @ConstructorProperties({"patternString", "negateMatch"}) 057 protected RegexMatcher( String patternString, boolean negateMatch ) 058 { 059 super( patternString ); 060 this.negateMatch = negateMatch; 061 } 062 063 @ConstructorProperties({"fieldDeclaration", "patternString"}) 064 protected RegexMatcher( Fields fieldDeclaration, String patternString ) 065 { 066 super( ANY, fieldDeclaration, patternString ); 067 this.negateMatch = false; 068 069 verify(); 070 } 071 072 @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch"}) 073 protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch ) 074 { 075 super( ANY, fieldDeclaration, patternString ); 076 this.negateMatch = negateMatch; 077 078 verify(); 079 } 080 081 public boolean isNegateMatch() 082 { 083 return negateMatch; 084 } 085 086 private void verify() 087 { 088 if( fieldDeclaration.size() != 1 ) 089 throw new IllegalArgumentException( "num fields in fieldDeclaration must be one, found: " + fieldDeclaration.printVerbose() ); 090 } 091 092 @Override 093 public void prepare( FlowProcess flowProcess, OperationCall<Matcher> operationCall ) 094 { 095 operationCall.setContext( getPattern().matcher( "" ) ); 096 } 097 098 /** 099 * Method matchWholeTuple ... 100 * 101 * @param matcher 102 * @param input of type Tuple @return boolean 103 */ 104 protected boolean matchWholeTuple( Matcher matcher, Tuple input ) 105 { 106 matcher.reset( input.toString( "\t", false ) ); 107 108 boolean matchFound = matcher.find(); 109 110 LOG.debug( "pattern: {}, matches: {}", getPatternString(), matchFound ); 111 112 return matchFound == negateMatch; 113 } 114 115 /** 116 * Method matchEachElement ... 117 * 118 * @param matcher 119 * @param input of type Tuple @return boolean 120 */ 121 protected boolean matchEachElement( Matcher matcher, Tuple input ) 122 { 123 return matchEachElementPos( matcher, input ) != -1; 124 } 125 126 protected int matchEachElementPos( Matcher matcher, Tuple input ) 127 { 128 int pos = 0; 129 for( Object value : input ) 130 { 131 if( value == null ) 132 value = ""; 133 134 matcher.reset( value.toString() ); 135 136 boolean matchFound = matcher.find(); 137 138 if( LOG.isDebugEnabled() ) 139 LOG.debug( "pattern: " + getPatternString() + ", matches: " + matchFound + ", element: '" + value + "'" ); 140 141 if( matchFound == negateMatch ) 142 return pos; 143 144 pos++; 145 } 146 147 return -1; 148 } 149 150 @Override 151 public boolean equals( Object object ) 152 { 153 if( this == object ) 154 return true; 155 if( !( object instanceof RegexMatcher ) ) 156 return false; 157 if( !super.equals( object ) ) 158 return false; 159 160 RegexMatcher that = (RegexMatcher) object; 161 162 if( negateMatch != that.negateMatch ) 163 return false; 164 165 return true; 166 } 167 168 @Override 169 public int hashCode() 170 { 171 int result = super.hashCode(); 172 result = 31 * result + ( negateMatch ? 1 : 0 ); 173 return result; 174 } 175 }