001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.pipe.joiner; 023 024import java.util.Arrays; 025import java.util.Iterator; 026 027import cascading.flow.FlowProcess; 028import cascading.tuple.Fields; 029import cascading.tuple.Tuple; 030 031/** 032 * Class JoinerClosure wraps all incoming tuple streams with iterator instances allowing for just join implementations. 033 * <p> 034 * This class is provided to a {@link Joiner#getIterator(JoinerClosure)} implementation, or to a {@link cascading.operation.Buffer} 035 * via the {@link cascading.operation.BufferCall#getJoinerClosure()} method. 036 * <p> 037 * All iterators returned by {@link #getIterator(int)} may be retrieved more than once to restart them except for the left 038 * most iterator at position {@code 0} (zero). 039 * <p> 040 * This iterator may only be iterated across once. All other iterators are backed by memory and possibly disk. 041 */ 042public abstract class JoinerClosure 043 { 044 protected final FlowProcess flowProcess; 045 046 protected final Fields[] joinFields; 047 protected final Fields[] valueFields; 048 049 public JoinerClosure( FlowProcess flowProcess, Fields[] joinFields, Fields[] valueFields ) 050 { 051 this.flowProcess = flowProcess; 052 this.joinFields = Arrays.copyOf( joinFields, joinFields.length ); 053 this.valueFields = Arrays.copyOf( valueFields, valueFields.length ); 054 } 055 056 public FlowProcess getFlowProcess() 057 { 058 return flowProcess; 059 } 060 061 /** 062 * Returns an array of {@link Fields} denoting the join fields or keys uses for each incoming pipe. 063 * <p> 064 * The most left handed pipe will be in array position 0. 065 * 066 * @return an array of Fields 067 */ 068 public Fields[] getJoinFields() 069 { 070 return joinFields; 071 } 072 073 /** 074 * Returns an array of all the incoming fields for each incoming pipe. 075 * <p> 076 * The most left handed pipe will be in array position 0; 077 * 078 * @return an array of Fields 079 */ 080 public Fields[] getValueFields() 081 { 082 return valueFields; 083 } 084 085 public boolean isSelfJoin() 086 { 087 return valueFields.length == 1 && size() != valueFields.length; 088 } 089 090 public abstract int size(); 091 092 /** 093 * Returns a Tuple Iterator for the given pipe position. Position 0 is the most left handed pipe passed to the prior 094 * {@link cascading.pipe.CoGroup}. 095 * <p> 096 * To restart an Iterator over a given pipe, this method must be called again. 097 * 098 * @param pos of type int 099 * @return an Iterator of Tuple instances. 100 */ 101 public abstract Iterator<Tuple> getIterator( int pos ); 102 103 public abstract boolean isEmpty( int pos ); 104 105 public abstract Tuple getGroupTuple( Tuple keysTuple ); 106 }