001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.operation;
023
024import cascading.flow.FlowProcess;
025import cascading.flow.planner.DeclaresResults;
026import cascading.tuple.Fields;
027
028/**
029 * Interface Operation is the base interface for all functions applied to {@link cascading.tuple.Tuple} streams.
030 * <p>
031 * Specifically {@link Function}, {@link Filter}, {@link Aggregator}, {@link Buffer}, and {@link Assertion}.
032 * <p>
033 * Use {@link BaseOperation} for a convenient way to create new Operation types.
034 *
035 * @see cascading.operation.BaseOperation
036 * @see Function
037 * @see Filter
038 * @see Aggregator
039 * @see Buffer
040 * @see Assertion
041 */
042public interface Operation<Context> extends DeclaresResults
043  {
044  /** Field ANY denotes that a given Operation will take any number of argument values */
045  int ANY = Integer.MAX_VALUE;
046
047  /**
048   * The prepare method is called immediately before the current Operation instance is put into play processing Tuples.
049   * This method should initialize any resources that can be shutdown or released in the
050   * {@link #cleanup(cascading.flow.FlowProcess, OperationCall)} method.
051   * <p>
052   * Any resources created should be stored in the {@code Context}, not as instance fields on the class.
053   * <p>
054   * This method may be called more than once during the life of this instance. But it will never be called multiple times
055   * without a cleanup invocation immediately before subsequent invocations.
056   * <p>
057   * If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called
058   * cluster side, not client side.
059   *
060   * @param flowProcess
061   * @param operationCall
062   */
063  void prepare( FlowProcess flowProcess, OperationCall<Context> operationCall );
064
065  /**
066   * The flush method is called when an Operation that is caching values must empty the cache. It is called before
067   * {@link #cleanup(cascading.flow.FlowProcess, OperationCall)} is invoked.
068   * <p>
069   * It is safe to cast the {@link cascading.operation.OperationCall} to a {@link FunctionCall}, or equivalent, and
070   * get its {@link cascading.operation.FunctionCall#getOutputCollector()}.
071   *
072   * @param flowProcess
073   * @param operationCall
074   */
075  void flush( FlowProcess flowProcess, OperationCall<Context> operationCall );
076
077  /**
078   * The cleanup method is called immediately after the current Operation instance is taken out of play processing Tuples.
079   * This method should shutdown any resources created or initialized during the
080   * {@link #prepare(cascading.flow.FlowProcess, OperationCall)} method.
081   * <p>
082   * This method may be called more than once during the life of this instance. But it will never be called multiple times
083   * without a prepare invocation before.
084   * <p>
085   * If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called
086   * cluster side, not client side.
087   *
088   * @param flowProcess
089   * @param operationCall
090   */
091  void cleanup( FlowProcess flowProcess, OperationCall<Context> operationCall );
092
093  /**
094   * Returns the fields created by this Operation instance. If this instance is a {@link Filter}, it should always
095   * return {@link Fields#ALL}.
096   *
097   * @return a Fields instance
098   */
099  Fields getFieldDeclaration();
100
101  /**
102   * The minimum number of arguments this Operation expects from the calling {@link cascading.pipe.Each} or
103   * {@link cascading.pipe.Every} Operator.
104   * <p>
105   * Operations should be willing to receive more arguments than expected, but should ignore them if they are unused,
106   * instead of failing.
107   *
108   * @return an int
109   */
110  int getNumArgs();
111
112  /**
113   * Returns {@code true} if this Operation instance can safely execute on the same 'record' multiple
114   * times, {@code false} otherwise.
115   * <p>
116   * That is, this Operation is safe if it has no side-effects, or if it does, they are idempotent.
117   * <p>
118   * If seeing the same 'record' more than once can cause errors (internally or externally),
119   * this method must return {@code false}.
120   *
121   * @return a boolean
122   */
123  boolean isSafe();
124  }