001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.flow;
023
024import java.util.Collection;
025import java.util.List;
026import java.util.Map;
027import java.util.Set;
028
029import cascading.flow.planner.Scope;
030import cascading.flow.planner.graph.ElementGraph;
031import cascading.flow.planner.process.ProcessModel;
032import cascading.stats.FlowNodeStats;
033import cascading.tap.Tap;
034
035/**
036 * Class FlowNode represents the smallest parallelizable unit of work. It is a child to a
037 * {@link cascading.flow.FlowStep} and may have many siblings within the FlowStep.
038 * <p>
039 * A FlowNode is commonly executed as one or more slices, where a slice is a JVM executing against a portion
040 * of data.
041 * <p>
042 * Most slices within a FlowNode are identical, except for the sub-set of data they will be processing against.
043 * <p>
044 * But on some platforms, like MapReduce, a slice is executing a single flow pipeline. Thus a FlowNode may consist of
045 * some set of pipelines (or pipeline graph). One pipeline per 'streamed' input source Tap.
046 * <p>
047 * In a MapReduce model (like Apache Hadoop MapReduce) a FlowNode can by the Map or Reduce side of a job (where a job
048 * is a FlowStep).
049 * <p>
050 * In a DAG model (like Apache Tez), a FlowNode is a 'vertex', and the 'DAG' is a FlowStep.
051 */
052public interface FlowNode extends ProcessModel
053  {
054  String CASCADING_FLOW_NODE = "cascading.flow.node";
055
056  String getID();
057
058  /**
059   * Returns an immutable map of properties giving more details about the FlowNode object.
060   * <p>
061   * FlowNode descriptions provide meta-data to monitoring systems describing the workload a given FlowNode represents.
062   * For known description types, see {@link FlowNodeDescriptors}.
063   *
064   * @return Map
065   */
066  Map<String, String> getFlowNodeDescriptor();
067
068  FlowNodeStats getFlowNodeStats();
069
070  FlowStep getFlowStep();
071
072  Collection<? extends FlowElement> getFlowElementsFor( Enum annotation );
073
074  Set<? extends FlowElement> getSourceElements( Enum annotation );
075
076  Set<? extends FlowElement> getSinkElements( Enum annotation );
077
078  Set<String> getSourceElementNames();
079
080  Set<String> getSinkElementNames();
081
082  Set<String> getSourceTapNames( Tap flowElement );
083
084  Set<String> getSinkTapNames( Tap flowElement );
085
086  Tap getTrap( String branchName );
087
088  Collection<? extends Tap> getTraps();
089
090  Collection<? extends Scope> getPreviousScopes( FlowElement flowElement );
091
092  Collection<? extends Scope> getNextScopes( FlowElement flowElement );
093
094  List<? extends ElementGraph> getPipelineGraphs();
095
096  ElementGraph getPipelineGraphFor( FlowElement streamedSource );
097  }