001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.operation.text;
023
024import java.beans.ConstructorProperties;
025import java.text.ParseException;
026import java.text.SimpleDateFormat;
027import java.util.Arrays;
028import java.util.Calendar;
029import java.util.Date;
030import java.util.Locale;
031import java.util.TimeZone;
032
033import cascading.flow.FlowProcess;
034import cascading.operation.Function;
035import cascading.operation.FunctionCall;
036import cascading.operation.OperationException;
037import cascading.tuple.Fields;
038import cascading.tuple.TupleEntry;
039import cascading.util.Pair;
040
041/**
042 * Class DateParser is used to convert a text date string to a timestamp, the number of milliseconds
043 * since January 1, 1970, 00:00:00 GMT, using the {@link SimpleDateFormat} syntax.
044 * <p>
045 * If given, individual {@link Calendar} fields can be stored in unique fields for a given {@link TimeZone} and {@link Locale}.
046 */
047public class DateParser extends DateOperation implements Function<Pair<SimpleDateFormat, TupleEntry>>
048  {
049  /** Field FIELD_NAME */
050  public static final String FIELD_NAME = "ts";
051
052  /** Field calendarFields */
053  private int[] calendarFields;
054
055  /**
056   * Constructor DateParser creates a new DateParser instance that creates a simple long time stamp of the parsed date.
057   *
058   * @param dateFormatString of type String
059   */
060  @ConstructorProperties({"dateFormatString"})
061  public DateParser( String dateFormatString )
062    {
063    super( 1, new Fields( FIELD_NAME, Long.class ), dateFormatString );
064    }
065
066  /**
067   * Constructor DateParser creates a new DateParser instance.
068   *
069   * @param fieldDeclaration of type Fields
070   * @param dateFormatString of type String
071   */
072  @ConstructorProperties({"fieldDeclaration", "dateFormatString"})
073  public DateParser( Fields fieldDeclaration, String dateFormatString )
074    {
075    super( 1, fieldDeclaration, dateFormatString );
076    }
077
078  /**
079   * Constructor DateParser creates a new DateParser instance, where calendarFields is an int[] of {@link Calendar} field
080   * values. See {@link Calendar#get(int)}.
081   *
082   * @param fieldDeclaration of type Fields
083   * @param calendarFields   of type int[]
084   * @param dateFormatString of type String
085   */
086  @ConstructorProperties({"fieldDeclaration", "calendarFields", "dateFormatString"})
087  public DateParser( Fields fieldDeclaration, int[] calendarFields, String dateFormatString )
088    {
089    this( fieldDeclaration, calendarFields, null, null, dateFormatString );
090    }
091
092  /**
093   * Constructor DateParser creates a new DateParser instance, where zone and locale are passed to the internal
094   * {@link SimpleDateFormat} instance.
095   *
096   * @param fieldDeclaration of type Fields
097   * @param zone             of type TimeZone
098   * @param locale           of type Locale
099   * @param dateFormatString of type String
100   */
101  @ConstructorProperties({"fieldDeclaration", "zone", "locale", "dateFormatString"})
102  public DateParser( Fields fieldDeclaration, TimeZone zone, Locale locale, String dateFormatString )
103    {
104    this( fieldDeclaration, null, zone, locale, dateFormatString );
105    }
106
107  /**
108   * Constructor DateParser creates a new DateParser instance, where calendarFields is an int[] of {@link Calendar} field
109   * values. See {@link Calendar#get(int)}. The {@link TimeZone} and/or {@link Locale} may also be set.
110   *
111   * @param fieldDeclaration of type Fields
112   * @param calendarFields   of type int[]
113   * @param zone             of type TimeZone
114   * @param locale           of type Locale
115   * @param dateFormatString of type String
116   */
117  @ConstructorProperties({"fieldDeclaration", "calendarFields", "zone", "locale", "dateFormatString"})
118  public DateParser( Fields fieldDeclaration, int[] calendarFields, TimeZone zone, Locale locale, String dateFormatString )
119    {
120    super( 1, fieldDeclaration, dateFormatString, zone, locale );
121
122    if( calendarFields != null )
123      {
124      this.calendarFields = Arrays.copyOf( calendarFields, calendarFields.length );
125
126      if( fieldDeclaration.size() != calendarFields.length )
127        throw new IllegalArgumentException( "fieldDeclaration must be same size as calendarFields, was " + fieldDeclaration.print() + " with calendar size: " + calendarFields.length );
128      }
129    else
130      {
131      if( !fieldDeclaration.isSubstitution() && fieldDeclaration.size() != 1 )
132        throw new IllegalArgumentException( "fieldDeclaration may only declare one field name, got " + fieldDeclaration.print() );
133      }
134    }
135
136  @Override
137  protected int getDeclaredSize()
138    {
139    if( calendarFields != null )
140      return calendarFields.length;
141
142    return super.getDeclaredSize();
143    }
144
145  @Override
146  public void operate( FlowProcess flowProcess, FunctionCall<Pair<SimpleDateFormat, TupleEntry>> functionCall )
147    {
148    TupleEntry output = functionCall.getContext().getRhs();
149
150    try
151      {
152      String value = functionCall.getArguments().getString( 0 );
153
154      if( value == null ) // if null, return null for the field
155        {
156        output.setObject( 0, null ); // safe to call set, tuple is size of 1
157
158        functionCall.getOutputCollector().add( output );
159
160        return;
161        }
162
163      Date date = functionCall.getContext().getLhs().parse( value );
164
165      if( calendarFields == null )
166        output.setLong( 0, date.getTime() ); // safe to call set, tuple is size of 1
167      else
168        makeCalendarFields( output, date );
169      }
170    catch( ParseException exception )
171      {
172      throw new OperationException( "unable to parse input value: " + functionCall.getArguments().getObject( 0 ), exception );
173      }
174
175    functionCall.getOutputCollector().add( output );
176    }
177
178  private void makeCalendarFields( TupleEntry output, Date date )
179    {
180    Calendar calendar = getCalendar();
181    calendar.setTime( date );
182
183    for( int i = 0; i < calendarFields.length; i++ )
184      output.setInteger( i, calendar.get( calendarFields[ i ] ) );
185    }
186
187  @Override
188  public boolean equals( Object object )
189    {
190    if( this == object )
191      return true;
192    if( !( object instanceof DateParser ) )
193      return false;
194    if( !super.equals( object ) )
195      return false;
196
197    DateParser that = (DateParser) object;
198
199    if( !Arrays.equals( calendarFields, that.calendarFields ) )
200      return false;
201
202    return true;
203    }
204
205  @Override
206  public int hashCode()
207    {
208    int result = super.hashCode();
209    result = 31 * result + ( calendarFields != null ? Arrays.hashCode( calendarFields ) : 0 );
210    return result;
211    }
212  }