001    /*
002     * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.operation.regex;
022    
023    import java.beans.ConstructorProperties;
024    import java.util.regex.Matcher;
025    
026    import cascading.flow.FlowProcess;
027    import cascading.operation.Filter;
028    import cascading.operation.FilterCall;
029    
030    /**
031     * Class RegexFilter will apply the regex patternString against every input Tuple value and filter
032     * the Tuple stream accordingly.
033     * <p/>
034     * By default, Tuples that match the given pattern are kept, and Tuples that do not
035     * match are filtered out. This can be changed by setting removeMatch to true.
036     * <p/>
037     * Also, by default, the whole Tuple is matched against the given patternString (tab delimited). If matchEachElement
038     * is set to true, the pattern is applied to each Tuple value individually.
039     * <p/>
040     * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}.
041     * <p/>
042     * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before
043     * the regex is applied.
044     * <p/>
045     * Any Object value will be coerced to a String type via its {@code toString()} method.
046     *
047     * @see java.util.regex.Matcher
048     * @see java.util.regex.Pattern
049     */
050    public class RegexFilter extends RegexMatcher implements Filter<Matcher>
051      {
052      /** Field matchEachElement */
053      protected final boolean matchEachElement;
054    
055      /**
056       * Constructor RegexFilter creates a new RegexFilter instance.
057       *
058       * @param patternString of type String
059       */
060      @ConstructorProperties({"patternString"})
061      public RegexFilter( String patternString )
062        {
063        super( patternString );
064        this.matchEachElement = false;
065        }
066    
067      /**
068       * Constructor RegexFilter creates a new RegexFilter instance.
069       *
070       * @param patternString of type String
071       * @param removeMatch   of type boolean
072       */
073      @ConstructorProperties({"patternString", "removeMatch"})
074      public RegexFilter( String patternString, boolean removeMatch )
075        {
076        super( patternString, removeMatch );
077        this.matchEachElement = false;
078    
079        }
080    
081      /**
082       * @param patternString    of type String
083       * @param removeMatch      of type boolean, set to true if a match should be filtered
084       * @param matchEachElement of type boolean, set to true if each element should be matched individually
085       */
086      @ConstructorProperties({"patternString", "removeMatch", "matchEachElement"})
087      public RegexFilter( String patternString, boolean removeMatch, boolean matchEachElement )
088        {
089        super( patternString, removeMatch );
090        this.matchEachElement = matchEachElement;
091        }
092    
093      public boolean isMatchEachElement()
094        {
095        return matchEachElement;
096        }
097    
098      @Override
099      public boolean isRemove( FlowProcess flowProcess, FilterCall<Matcher> filterCall )
100        {
101        if( matchEachElement )
102          return matchEachElement( filterCall.getContext(), filterCall.getArguments() );
103        else
104          return matchWholeTuple( filterCall.getContext(), filterCall.getArguments() );
105        }
106    
107      @Override
108      public boolean equals( Object object )
109        {
110        if( this == object )
111          return true;
112        if( !( object instanceof RegexFilter ) )
113          return false;
114        if( !super.equals( object ) )
115          return false;
116    
117        RegexFilter that = (RegexFilter) object;
118    
119        if( matchEachElement != that.matchEachElement )
120          return false;
121    
122        return true;
123        }
124    
125      @Override
126      public int hashCode()
127        {
128        int result = super.hashCode();
129        result = 31 * result + ( matchEachElement ? 1 : 0 );
130        return result;
131        }
132      }