001    /*
002     * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.operation;
022    
023    import cascading.flow.FlowProcess;
024    import cascading.flow.planner.DeclaresResults;
025    import cascading.tuple.Fields;
026    
027    /**
028     * Interface Operation is the base interface for all functions applied to {@link cascading.tuple.Tuple} streams.
029     * <p/>
030     * Specifically {@link Function}, {@link Filter}, {@link Aggregator}, {@link Buffer}, and {@link Assertion}.
031     * <p/>
032     * Use {@link BaseOperation} for a convenient way to create new Operation types.
033     *
034     * @see cascading.operation.BaseOperation
035     * @see Function
036     * @see Filter
037     * @see Aggregator
038     * @see Buffer
039     * @see Assertion
040     */
041    public interface Operation<Context> extends DeclaresResults
042      {
043      /** Field ANY denotes that a given Operation will take any number of argument values */
044      int ANY = Integer.MAX_VALUE;
045    
046      /**
047       * The prepare method is called immediately before the current Operation instance is put into play processing Tuples.
048       * This method should initialize any resources that can be shutdown or released in the
049       * {@link #cleanup(cascading.flow.FlowProcess, OperationCall)} method.
050       * <p/>
051       * Any resources created should be stored in the {@code Context}, not as instance fields on the class.
052       * <p/>
053       * This method may be called more than once during the life of this instance. But it will never be called multiple times
054       * without a cleanup invocation immediately before subsequent invocations.
055       * <p/>
056       * If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called
057       * cluster side, not client side.
058       *
059       * @param flowProcess
060       * @param operationCall
061       */
062      void prepare( FlowProcess flowProcess, OperationCall<Context> operationCall );
063    
064      /**
065       * The flush method is called when an Operation that is caching values must empty the cache. It is called before
066       * {@link #cleanup(cascading.flow.FlowProcess, OperationCall)} is invoked.
067       * <p/>
068       * It is safe to cast the {@link cascading.operation.OperationCall} to a {@link FunctionCall}, or equivalent, and
069       * get its {@link cascading.operation.FunctionCall#getOutputCollector()}.
070       *
071       * @param flowProcess
072       * @param operationCall
073       */
074      void flush( FlowProcess flowProcess, OperationCall<Context> operationCall );
075    
076      /**
077       * The cleanup method is called immediately after the current Operation instance is taken out of play processing Tuples.
078       * This method should shutdown any resources created or initialized during the
079       * {@link #prepare(cascading.flow.FlowProcess, OperationCall)} method.
080       * <p/>
081       * This method may be called more than once during the life of this instance. But it will never be called multiple times
082       * without a prepare invocation before.
083       * <p/>
084       * If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called
085       * cluster side, not client side.
086       *
087       * @param flowProcess
088       * @param operationCall
089       */
090      void cleanup( FlowProcess flowProcess, OperationCall<Context> operationCall );
091    
092      /**
093       * Returns the fields created by this Operation instance. If this instance is a {@link Filter}, it should always
094       * return {@link Fields#ALL}.
095       *
096       * @return a Fields instance
097       */
098      Fields getFieldDeclaration();
099    
100      /**
101       * The minimum number of arguments this Operation expects from the calling {@link cascading.pipe.Each} or
102       * {@link cascading.pipe.Every} Operator.
103       * <p/>
104       * Operations should be willing to receive more arguments than expected, but should ignore them if they are unused,
105       * instead of failing.
106       *
107       * @return an int
108       */
109      int getNumArgs();
110    
111      /**
112       * Returns {@code true} if this Operation instance can safely execute on the same 'record' multiple
113       * times, {@code false} otherwise.
114       * <p/>
115       * That is, this Operation is safe if it has no side-effects, or if it does, they are idempotent.
116       * <p/>
117       * If seeing the same 'record' more than once can cause errors (internally or externally),
118       * this method must return {@code false}.
119       *
120       * @return a boolean
121       */
122      boolean isSafe();
123      }