001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.pipe;
023
024/**
025 * The Checkpoint pipe, if supported by the current planner, will force data to be persisted at the point in
026 * the tuple stream an instance of Checkpoint is inserted into the pipe assembly.
027 * <p>
028 * If a checkpoint {@link cascading.tap.Tap} is added to the {@link cascading.flow.FlowDef} via the
029 * {@link cascading.flow.FlowDef#addCheckpoint(Checkpoint, cascading.tap.Tap)} method, that Tap instance
030 * will be used to capture the intermediate result sets.
031 * <p>
032 * It is required that any Scheme used as a checkpoint must source {@link cascading.tuple.Fields#UNKNOWN} and
033 * sink {@link cascading.tuple.Fields#ALL}.
034 * <p>
035 * If used with a {@link cascading.scheme.hadoop.TextDelimited} {@link cascading.scheme.Scheme} class and
036 * the {@code hasHeader} value is {@code true}, a header with the resolved field names will be written to the file.
037 * <p>
038 * This is especially useful for debugging complex flows.
039 * <p>
040 * For the {@link cascading.flow.hadoop.HadoopFlowConnector} and Hadoop platform, a Checkpoint will force a new
041 * MapReduce job ({@link cascading.flow.hadoop.HadoopFlowStep} into the {@link cascading.flow.Flow} plan.
042 * <p>
043 * This can be important when used in conjunction with a {@link HashJoin} where all the operations upstream
044 * from the HashJoin significantly filter out data allowing it to fit in memory.
045 */
046public class Checkpoint extends Pipe
047  {
048  /**
049   * Constructor Checkpoint creates a new Checkpoint pipe which inherits the name of its previous pipe.
050   *
051   * @param previous of type Pipe
052   */
053  public Checkpoint( Pipe previous )
054    {
055    super( previous );
056    }
057
058  /**
059   * Constructor Checkpoint creates a new Checkpoint pipe with the given name.
060   *
061   * @param previous of type Pipe
062   */
063  public Checkpoint( String name, Pipe previous )
064    {
065    super( name, previous );
066    }
067  }