001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.operation.text;
022
023import java.beans.ConstructorProperties;
024import java.text.ParseException;
025import java.text.SimpleDateFormat;
026import java.util.Arrays;
027import java.util.Calendar;
028import java.util.Date;
029import java.util.Locale;
030import java.util.TimeZone;
031
032import cascading.flow.FlowProcess;
033import cascading.operation.Function;
034import cascading.operation.FunctionCall;
035import cascading.operation.OperationException;
036import cascading.tuple.Fields;
037import cascading.tuple.Tuple;
038import cascading.util.Pair;
039
040/**
041 * Class DateParser is used to convert a text date string to a timestamp, the number of milliseconds
042 * since January 1, 1970, 00:00:00 GMT, using the {@link SimpleDateFormat} syntax.
043 * <p/>
044 * If given, individual {@link Calendar} fields can be stored in unique fields for a given {@link TimeZone} and {@link Locale}.
045 */
046public class DateParser extends DateOperation implements Function<Pair<SimpleDateFormat, Tuple>>
047  {
048  /** Field FIELD_NAME */
049  public static final String FIELD_NAME = "ts";
050
051  /** Field calendarFields */
052  private int[] calendarFields;
053
054  /**
055   * Constructor DateParser creates a new DateParser instance that creates a simple long time stamp of the parsed date.
056   *
057   * @param dateFormatString of type String
058   */
059  @ConstructorProperties({"dateFormatString"})
060  public DateParser( String dateFormatString )
061    {
062    super( 1, new Fields( FIELD_NAME ), dateFormatString );
063    }
064
065  /**
066   * Constructor DateParser creates a new DateParser instance.
067   *
068   * @param fieldDeclaration of type Fields
069   * @param dateFormatString of type String
070   */
071  @ConstructorProperties({"fieldDeclaration", "dateFormatString"})
072  public DateParser( Fields fieldDeclaration, String dateFormatString )
073    {
074    super( 1, fieldDeclaration, dateFormatString );
075    }
076
077  /**
078   * Constructor DateParser creates a new DateParser instance, where calendarFields is an int[] of {@link Calendar} field
079   * values. See {@link Calendar#get(int)}.
080   *
081   * @param fieldDeclaration of type Fields
082   * @param calendarFields   of type int[]
083   * @param dateFormatString of type String
084   */
085  @ConstructorProperties({"fieldDeclaration", "calendarFields", "dateFormatString"})
086  public DateParser( Fields fieldDeclaration, int[] calendarFields, String dateFormatString )
087    {
088    this( fieldDeclaration, calendarFields, null, null, dateFormatString );
089    }
090
091  /**
092   * Constructor DateParser creates a new DateParser instance, where zone and locale are passed to the internal
093   * {@link SimpleDateFormat} instance.
094   *
095   * @param fieldDeclaration of type Fields
096   * @param zone             of type TimeZone
097   * @param locale           of type Locale
098   * @param dateFormatString of type String
099   */
100  @ConstructorProperties({"fieldDeclaration", "zone", "locale", "dateFormatString"})
101  public DateParser( Fields fieldDeclaration, TimeZone zone, Locale locale, String dateFormatString )
102    {
103    this( fieldDeclaration, null, zone, locale, dateFormatString );
104    }
105
106  /**
107   * Constructor DateParser creates a new DateParser instance, where calendarFields is an int[] of {@link Calendar} field
108   * values. See {@link Calendar#get(int)}. The {@link TimeZone} and/or {@link Locale} may also be set.
109   *
110   * @param fieldDeclaration of type Fields
111   * @param calendarFields   of type int[]
112   * @param zone             of type TimeZone
113   * @param locale           of type Locale
114   * @param dateFormatString of type String
115   */
116  @ConstructorProperties({"fieldDeclaration", "calendarFields", "zone", "locale", "dateFormatString"})
117  public DateParser( Fields fieldDeclaration, int[] calendarFields, TimeZone zone, Locale locale, String dateFormatString )
118    {
119    super( 1, fieldDeclaration, dateFormatString, zone, locale );
120
121    if( calendarFields != null )
122      {
123      this.calendarFields = Arrays.copyOf( calendarFields, calendarFields.length );
124
125      if( fieldDeclaration.size() != calendarFields.length )
126        throw new IllegalArgumentException( "fieldDeclaration must be same size as calendarFields, was " + fieldDeclaration.print() + " with calendar size: " + calendarFields.length );
127      }
128    else
129      {
130      if( !fieldDeclaration.isSubstitution() && fieldDeclaration.size() != 1 )
131        throw new IllegalArgumentException( "fieldDeclaration may only declare one field name, got " + fieldDeclaration.print() );
132      }
133    }
134
135  @Override
136  public void operate( FlowProcess flowProcess, FunctionCall<Pair<SimpleDateFormat, Tuple>> functionCall )
137    {
138    Tuple output = functionCall.getContext().getRhs();
139
140    try
141      {
142      String value = functionCall.getArguments().getString( 0 );
143
144      if( value == null ) // if null, return null for the field
145        {
146        output.set( 0, null ); // safe to call set, tuple is size of 1
147
148        functionCall.getOutputCollector().add( output );
149
150        return;
151        }
152
153      Date date = functionCall.getContext().getLhs().parse( value );
154
155      if( calendarFields == null )
156        output.set( 0, date.getTime() ); // safe to call set, tuple is size of 1
157      else
158        makeCalendarFields( output, date );
159      }
160    catch( ParseException exception )
161      {
162      throw new OperationException( "unable to parse input value: " + functionCall.getArguments().getObject( 0 ), exception );
163      }
164
165    functionCall.getOutputCollector().add( output );
166    }
167
168  private void makeCalendarFields( Tuple output, Date date )
169    {
170    output.clear();
171
172    Calendar calendar = getCalendar();
173    calendar.setTime( date );
174
175    for( int i = 0; i < calendarFields.length; i++ )
176    //noinspection MagicConstant
177      output.add( calendar.get( calendarFields[ i ] ) );
178    }
179
180  @Override
181  public boolean equals( Object object )
182    {
183    if( this == object )
184      return true;
185    if( !( object instanceof DateParser ) )
186      return false;
187    if( !super.equals( object ) )
188      return false;
189
190    DateParser that = (DateParser) object;
191
192    if( !Arrays.equals( calendarFields, that.calendarFields ) )
193      return false;
194
195    return true;
196    }
197
198  @Override
199  public int hashCode()
200    {
201    int result = super.hashCode();
202    result = 31 * result + ( calendarFields != null ? Arrays.hashCode( calendarFields ) : 0 );
203    return result;
204    }
205  }