001    /*
002     * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.operation.regex;
022    
023    import java.beans.ConstructorProperties;
024    import java.util.regex.Matcher;
025    
026    import cascading.flow.FlowProcess;
027    import cascading.operation.OperationCall;
028    import cascading.tuple.Fields;
029    import cascading.tuple.Tuple;
030    import cascading.tuple.TupleEntry;
031    import org.slf4j.Logger;
032    import org.slf4j.LoggerFactory;
033    
034    /**
035     * Class RegexMatcher is the base class for common regular expression operations.
036     * <p/>
037     * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}.
038     *
039     * @see java.util.regex.Matcher
040     * @see java.util.regex.Pattern
041     */
042    public class RegexMatcher extends RegexOperation<Matcher>
043      {
044      /** Field LOG */
045      private static final Logger LOG = LoggerFactory.getLogger( RegexMatcher.class );
046    
047      /** Field removeMatch */
048      protected final boolean negateMatch;
049    
050      @ConstructorProperties({"patternString"})
051      protected RegexMatcher( String patternString )
052        {
053        super( patternString );
054        this.negateMatch = false;
055        }
056    
057      @ConstructorProperties({"patternString", "negateMatch"})
058      protected RegexMatcher( String patternString, boolean negateMatch )
059        {
060        super( patternString );
061        this.negateMatch = negateMatch;
062        }
063    
064      @ConstructorProperties({"fieldDeclaration", "patternString"})
065      protected RegexMatcher( Fields fieldDeclaration, String patternString )
066        {
067        super( ANY, fieldDeclaration, patternString );
068        this.negateMatch = false;
069    
070        verify();
071        }
072    
073      @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch"})
074      protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch )
075        {
076        super( ANY, fieldDeclaration, patternString );
077        this.negateMatch = negateMatch;
078    
079        verify();
080        }
081    
082      public boolean isNegateMatch()
083        {
084        return negateMatch;
085        }
086    
087      private void verify()
088        {
089        if( fieldDeclaration.size() != 1 )
090          throw new IllegalArgumentException( "num fields in fieldDeclaration must be one, found: " + fieldDeclaration.printVerbose() );
091        }
092    
093      @Override
094      public void prepare( FlowProcess flowProcess, OperationCall<Matcher> operationCall )
095        {
096        operationCall.setContext( getPattern().matcher( "" ) );
097        }
098    
099      protected boolean matchWholeTuple( Matcher matcher, TupleEntry input )
100        {
101        return matchWholeTuple( matcher, input.getTuple() );
102        }
103    
104      /**
105       * @deprecated use {@link #matchWholeTuple(java.util.regex.Matcher, cascading.tuple.TupleEntry)}
106       */
107      @Deprecated
108      protected boolean matchWholeTuple( Matcher matcher, Tuple input )
109        {
110        matcher.reset( input.toString( "\t", false ) );
111    
112        boolean matchFound = matcher.find();
113    
114        if( LOG.isDebugEnabled() )
115          LOG.debug( "pattern: {}, matches: {}", getPatternString(), matchFound );
116    
117        return matchFound == negateMatch;
118        }
119    
120      protected boolean matchEachElement( Matcher matcher, TupleEntry input )
121        {
122        return matchEachElement( matcher, input.getTuple() );
123        }
124    
125      /**
126       * @deprecated use {@link #matchEachElementPos(java.util.regex.Matcher, cascading.tuple.TupleEntry)}
127       */
128      @Deprecated
129      protected boolean matchEachElement( Matcher matcher, Tuple input )
130        {
131        return matchEachElementPos( matcher, input ) != -1;
132        }
133    
134      protected int matchEachElementPos( Matcher matcher, TupleEntry input )
135        {
136        return matchEachElementPos( matcher, input.getTuple() );
137        }
138    
139      /**
140       * @deprecated use {@link #matchEachElementPos(java.util.regex.Matcher, cascading.tuple.TupleEntry)}
141       */
142      @Deprecated
143      protected int matchEachElementPos( Matcher matcher, Tuple input )
144        {
145        int pos = 0;
146        for( Object value : input )
147          {
148          if( value == null )
149            value = "";
150    
151          matcher.reset( value.toString() );
152    
153          boolean matchFound = matcher.find();
154    
155          if( LOG.isDebugEnabled() )
156            LOG.debug( "pattern: " + getPatternString() + ", matches: " + matchFound + ", element: '" + value + "'" );
157    
158          if( matchFound == negateMatch )
159            return pos;
160    
161          pos++;
162          }
163    
164        return -1;
165        }
166    
167      @Override
168      public boolean equals( Object object )
169        {
170        if( this == object )
171          return true;
172        if( !( object instanceof RegexMatcher ) )
173          return false;
174        if( !super.equals( object ) )
175          return false;
176    
177        RegexMatcher that = (RegexMatcher) object;
178    
179        if( negateMatch != that.negateMatch )
180          return false;
181    
182        return true;
183        }
184    
185      @Override
186      public int hashCode()
187        {
188        int result = super.hashCode();
189        result = 31 * result + ( negateMatch ? 1 : 0 );
190        return result;
191        }
192      }