001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.operation.text; 023 024import java.beans.ConstructorProperties; 025import java.text.ParseException; 026import java.text.SimpleDateFormat; 027import java.util.Arrays; 028import java.util.Calendar; 029import java.util.Date; 030import java.util.Locale; 031import java.util.TimeZone; 032 033import cascading.flow.FlowProcess; 034import cascading.operation.Function; 035import cascading.operation.FunctionCall; 036import cascading.operation.OperationException; 037import cascading.tuple.Fields; 038import cascading.tuple.TupleEntry; 039import cascading.util.Pair; 040 041/** 042 * Class DateParser is used to convert a text date string to a timestamp, the number of milliseconds 043 * since January 1, 1970, 00:00:00 GMT, using the {@link SimpleDateFormat} syntax. 044 * <p> 045 * If given, individual {@link Calendar} fields can be stored in unique fields for a given {@link TimeZone} and {@link Locale}. 046 */ 047public class DateParser extends DateOperation implements Function<Pair<SimpleDateFormat, TupleEntry>> 048 { 049 /** Field FIELD_NAME */ 050 public static final String FIELD_NAME = "ts"; 051 052 /** Field calendarFields */ 053 private int[] calendarFields; 054 055 /** 056 * Constructor DateParser creates a new DateParser instance that creates a simple long time stamp of the parsed date. 057 * 058 * @param dateFormatString of type String 059 */ 060 @ConstructorProperties({"dateFormatString"}) 061 public DateParser( String dateFormatString ) 062 { 063 super( 1, new Fields( FIELD_NAME, Long.class ), dateFormatString ); 064 } 065 066 /** 067 * Constructor DateParser creates a new DateParser instance. 068 * 069 * @param fieldDeclaration of type Fields 070 * @param dateFormatString of type String 071 */ 072 @ConstructorProperties({"fieldDeclaration", "dateFormatString"}) 073 public DateParser( Fields fieldDeclaration, String dateFormatString ) 074 { 075 super( 1, fieldDeclaration, dateFormatString ); 076 } 077 078 /** 079 * Constructor DateParser creates a new DateParser instance, where calendarFields is an int[] of {@link Calendar} field 080 * values. See {@link Calendar#get(int)}. 081 * 082 * @param fieldDeclaration of type Fields 083 * @param calendarFields of type int[] 084 * @param dateFormatString of type String 085 */ 086 @ConstructorProperties({"fieldDeclaration", "calendarFields", "dateFormatString"}) 087 public DateParser( Fields fieldDeclaration, int[] calendarFields, String dateFormatString ) 088 { 089 this( fieldDeclaration, calendarFields, null, null, dateFormatString ); 090 } 091 092 /** 093 * Constructor DateParser creates a new DateParser instance, where zone and locale are passed to the internal 094 * {@link SimpleDateFormat} instance. 095 * 096 * @param fieldDeclaration of type Fields 097 * @param zone of type TimeZone 098 * @param locale of type Locale 099 * @param dateFormatString of type String 100 */ 101 @ConstructorProperties({"fieldDeclaration", "zone", "locale", "dateFormatString"}) 102 public DateParser( Fields fieldDeclaration, TimeZone zone, Locale locale, String dateFormatString ) 103 { 104 this( fieldDeclaration, null, zone, locale, dateFormatString ); 105 } 106 107 /** 108 * Constructor DateParser creates a new DateParser instance, where calendarFields is an int[] of {@link Calendar} field 109 * values. See {@link Calendar#get(int)}. The {@link TimeZone} and/or {@link Locale} may also be set. 110 * 111 * @param fieldDeclaration of type Fields 112 * @param calendarFields of type int[] 113 * @param zone of type TimeZone 114 * @param locale of type Locale 115 * @param dateFormatString of type String 116 */ 117 @ConstructorProperties({"fieldDeclaration", "calendarFields", "zone", "locale", "dateFormatString"}) 118 public DateParser( Fields fieldDeclaration, int[] calendarFields, TimeZone zone, Locale locale, String dateFormatString ) 119 { 120 super( 1, fieldDeclaration, dateFormatString, zone, locale ); 121 122 if( calendarFields != null ) 123 { 124 this.calendarFields = Arrays.copyOf( calendarFields, calendarFields.length ); 125 126 if( fieldDeclaration.size() != calendarFields.length ) 127 throw new IllegalArgumentException( "fieldDeclaration must be same size as calendarFields, was " + fieldDeclaration.print() + " with calendar size: " + calendarFields.length ); 128 } 129 else 130 { 131 if( !fieldDeclaration.isSubstitution() && fieldDeclaration.size() != 1 ) 132 throw new IllegalArgumentException( "fieldDeclaration may only declare one field name, got " + fieldDeclaration.print() ); 133 } 134 } 135 136 @Override 137 protected int getDeclaredSize() 138 { 139 if( calendarFields != null ) 140 return calendarFields.length; 141 142 return super.getDeclaredSize(); 143 } 144 145 @Override 146 public void operate( FlowProcess flowProcess, FunctionCall<Pair<SimpleDateFormat, TupleEntry>> functionCall ) 147 { 148 TupleEntry output = functionCall.getContext().getRhs(); 149 150 try 151 { 152 String value = functionCall.getArguments().getString( 0 ); 153 154 if( value == null ) // if null, return null for the field 155 { 156 output.setObject( 0, null ); // safe to call set, tuple is size of 1 157 158 functionCall.getOutputCollector().add( output ); 159 160 return; 161 } 162 163 Date date = functionCall.getContext().getLhs().parse( value ); 164 165 if( calendarFields == null ) 166 output.setLong( 0, date.getTime() ); // safe to call set, tuple is size of 1 167 else 168 makeCalendarFields( output, date ); 169 } 170 catch( ParseException exception ) 171 { 172 throw new OperationException( "unable to parse input value: " + functionCall.getArguments().getObject( 0 ), exception ); 173 } 174 175 functionCall.getOutputCollector().add( output ); 176 } 177 178 private void makeCalendarFields( TupleEntry output, Date date ) 179 { 180 Calendar calendar = getCalendar(); 181 calendar.setTime( date ); 182 183 for( int i = 0; i < calendarFields.length; i++ ) 184 output.setInteger( i, calendar.get( calendarFields[ i ] ) ); 185 } 186 187 @Override 188 public boolean equals( Object object ) 189 { 190 if( this == object ) 191 return true; 192 if( !( object instanceof DateParser ) ) 193 return false; 194 if( !super.equals( object ) ) 195 return false; 196 197 DateParser that = (DateParser) object; 198 199 if( !Arrays.equals( calendarFields, that.calendarFields ) ) 200 return false; 201 202 return true; 203 } 204 205 @Override 206 public int hashCode() 207 { 208 int result = super.hashCode(); 209 result = 31 * result + ( calendarFields != null ? Arrays.hashCode( calendarFields ) : 0 ); 210 return result; 211 } 212 }