001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.operation.regex;
022
023import java.beans.ConstructorProperties;
024import java.util.regex.Matcher;
025
026import cascading.flow.FlowProcess;
027import cascading.operation.Filter;
028import cascading.operation.FilterCall;
029
030/**
031 * Class RegexFilter will apply the regex patternString against every input Tuple value and filter
032 * the Tuple stream accordingly.
033 * <p/>
034 * By default, Tuples that match the given pattern are kept, and Tuples that do not
035 * match are filtered out. This can be changed by setting removeMatch to true.
036 * <p/>
037 * Also, by default, the whole Tuple is matched against the given patternString (tab delimited, unless otherwise
038 * specified). If matchEachElement is set to true, the pattern is applied to each Tuple value individually.
039 * <p/>
040 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method
041 * {@link java.util.regex.Matcher#find()}.
042 * <p/>
043 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before
044 * the regex is applied.
045 * <p/>
046 * Any Object value will be coerced to a String type via any provided {@link cascading.tuple.type.CoercibleType} on
047 * the argument selector or via its {@code toString()} method.
048 *
049 * @see java.util.regex.Matcher
050 * @see java.util.regex.Pattern
051 */
052public class RegexFilter extends RegexMatcher implements Filter<Matcher>
053  {
054  /** Field matchEachElement */
055  protected final boolean matchEachElement;
056
057  /**
058   * Constructor RegexFilter creates a new RegexFilter instance.
059   *
060   * @param patternString of type String
061   */
062  @ConstructorProperties({"patternString"})
063  public RegexFilter( String patternString )
064    {
065    super( patternString );
066    this.matchEachElement = false;
067    }
068
069  /**
070   * Constructor RegexFilter creates a new RegexFilter instance.
071   *
072   * @param patternString of type String
073   * @param delimiter     of type String
074   */
075  @ConstructorProperties({"patternString", "delimiter"})
076  public RegexFilter( String patternString, String delimiter )
077    {
078    super( patternString, delimiter );
079    this.matchEachElement = false;
080    }
081
082  /**
083   * Constructor RegexFilter creates a new RegexFilter instance.
084   *
085   * @param patternString of type String
086   * @param removeMatch   of type boolean
087   */
088  @ConstructorProperties({"patternString", "removeMatch"})
089  public RegexFilter( String patternString, boolean removeMatch )
090    {
091    super( patternString, removeMatch );
092    this.matchEachElement = false;
093    }
094
095  /**
096   * Constructor RegexFilter creates a new RegexFilter instance.
097   *
098   * @param patternString of type String
099   * @param removeMatch   of type boolean
100   * @param delimiter     of type String
101   */
102  @ConstructorProperties({"patternString", "removeMatch", "delimiter"})
103  public RegexFilter( String patternString, boolean removeMatch, String delimiter )
104    {
105    super( patternString, removeMatch, delimiter );
106    this.matchEachElement = false;
107
108    }
109
110  /**
111   * @param patternString    of type String
112   * @param removeMatch      of type boolean, set to true if a match should be filtered
113   * @param matchEachElement of type boolean, set to true if each element should be matched individually
114   */
115  @ConstructorProperties({"patternString", "removeMatch", "matchEachElement"})
116  public RegexFilter( String patternString, boolean removeMatch, boolean matchEachElement )
117    {
118    super( patternString, removeMatch );
119    this.matchEachElement = matchEachElement;
120    }
121
122  /**
123   * @param patternString    of type String
124   * @param removeMatch      of type boolean, set to true if a match should be filtered
125   * @param matchEachElement of type boolean, set to true if each element should be matched individually
126   * @param delimiter        of type String
127   */
128  @ConstructorProperties({"patternString", "removeMatch", "matchEachElement", "delimiter"})
129  public RegexFilter( String patternString, boolean removeMatch, boolean matchEachElement, String delimiter )
130    {
131    super( patternString, removeMatch, delimiter );
132    this.matchEachElement = matchEachElement;
133    }
134
135  public boolean isMatchEachElement()
136    {
137    return matchEachElement;
138    }
139
140  @Override
141  public boolean isRemove( FlowProcess flowProcess, FilterCall<Matcher> filterCall )
142    {
143    if( matchEachElement )
144      return matchEachElement( filterCall.getContext(), filterCall.getArguments() );
145    else
146      return matchWholeTuple( filterCall.getContext(), filterCall.getArguments() );
147    }
148
149  @Override
150  public boolean equals( Object object )
151    {
152    if( this == object )
153      return true;
154    if( !( object instanceof RegexFilter ) )
155      return false;
156    if( !super.equals( object ) )
157      return false;
158
159    RegexFilter that = (RegexFilter) object;
160
161    if( matchEachElement != that.matchEachElement )
162      return false;
163
164    return true;
165    }
166
167  @Override
168  public int hashCode()
169    {
170    int result = super.hashCode();
171    result = 31 * result + ( matchEachElement ? 1 : 0 );
172    return result;
173    }
174  }