001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.pipe.joiner;
023
024import java.util.Arrays;
025import java.util.Iterator;
026
027import cascading.flow.FlowProcess;
028import cascading.tuple.Fields;
029import cascading.tuple.Tuple;
030
031/**
032 * Class JoinerClosure wraps all incoming tuple streams with iterator instances allowing for just join implementations.
033 * <p>
034 * This class is provided to a {@link Joiner#getIterator(JoinerClosure)} implementation, or to a {@link cascading.operation.Buffer}
035 * via the {@link cascading.operation.BufferCall#getJoinerClosure()} method.
036 * <p>
037 * All iterators returned by {@link #getIterator(int)} may be retrieved more than once to restart them except for the left
038 * most iterator at position {@code 0} (zero).
039 * <p>
040 * This iterator may only be iterated across once. All other iterators are backed by memory and possibly disk.
041 */
042public abstract class JoinerClosure
043  {
044  protected final FlowProcess flowProcess;
045
046  protected final Fields[] joinFields;
047  protected final Fields[] valueFields;
048
049  public JoinerClosure( FlowProcess flowProcess, Fields[] joinFields, Fields[] valueFields )
050    {
051    this.flowProcess = flowProcess;
052    this.joinFields = Arrays.copyOf( joinFields, joinFields.length );
053    this.valueFields = Arrays.copyOf( valueFields, valueFields.length );
054    }
055
056  public FlowProcess getFlowProcess()
057    {
058    return flowProcess;
059    }
060
061  /**
062   * Returns an array of {@link Fields} denoting the join fields or keys uses for each incoming pipe.
063   * <p>
064   * The most left handed pipe will be in array position 0.
065   *
066   * @return an array of Fields
067   */
068  public Fields[] getJoinFields()
069    {
070    return joinFields;
071    }
072
073  /**
074   * Returns an array of all the incoming fields for each incoming pipe.
075   * <p>
076   * The most left handed pipe will be in array position 0;
077   *
078   * @return an array of Fields
079   */
080  public Fields[] getValueFields()
081    {
082    return valueFields;
083    }
084
085  public boolean isSelfJoin()
086    {
087    return valueFields.length == 1 && size() != valueFields.length;
088    }
089
090  public abstract int size();
091
092  /**
093   * Returns a Tuple Iterator for the given pipe position. Position 0 is the most left handed pipe passed to the prior
094   * {@link cascading.pipe.CoGroup}.
095   * <p>
096   * To restart an Iterator over a given pipe, this method must be called again.
097   *
098   * @param pos of type int
099   * @return an Iterator of Tuple instances.
100   */
101  public abstract Iterator<Tuple> getIterator( int pos );
102
103  public abstract boolean isEmpty( int pos );
104
105  public abstract Tuple getGroupTuple( Tuple keysTuple );
106  }