001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.flow.hadoop;
022
023import java.beans.ConstructorProperties;
024import java.util.Map;
025import java.util.Properties;
026
027import cascading.flow.FlowConnector;
028import cascading.flow.hadoop.planner.HadoopPlanner;
029import cascading.flow.hadoop.planner.MapReduceHadoopRuleRegistry;
030import cascading.flow.planner.FlowPlanner;
031import cascading.flow.planner.rule.RuleRegistrySet;
032import cascading.pipe.Pipe;
033import cascading.scheme.Scheme;
034import cascading.scheme.hadoop.SequenceFile;
035import cascading.tap.Tap;
036
037/**
038 * Use the HadoopFlowConnector to link source and sink {@link Tap} instances with an assembly of {@link Pipe} instances into
039 * an executable {@link HadoopFlow} for execution on an Apache Hadoop cluster.
040 *
041 * @see cascading.property.AppProps
042 * @see cascading.flow.FlowConnectorProps
043 * @see cascading.flow.FlowDef
044 * @see cascading.flow.hadoop.MapReduceFlow
045 */
046public class HadoopFlowConnector extends FlowConnector
047  {
048  /**
049   * Constructor HadoopFlowConnector creates a new HadoopFlowConnector instance.
050   * <p/>
051   * All properties passed to Hadoop are retrieved from a default instantiation of the Hadoop
052   * {@link org.apache.hadoop.mapred.JobConf} which pulls all properties from the local CLASSPATH.
053   */
054  public HadoopFlowConnector()
055    {
056    }
057
058  /**
059   * Constructor HadoopFlowConnector creates a new HadoopFlowConnector instance using the given {@link Properties} instance as
060   * default value for the underlying jobs. All properties are copied to a new native configuration instance.
061   *
062   * @param properties of type Map
063   */
064  @ConstructorProperties({"properties"})
065  public HadoopFlowConnector( Map<Object, Object> properties )
066    {
067    super( properties );
068    }
069
070  /**
071   * Constructor HadoopFlowConnector creates a new HadoopFlowConnector instance.
072   * <p/>
073   * All properties passed to Hadoop are retrieved from a default instantiation of the Hadoop
074   * {@link org.apache.hadoop.mapred.JobConf} which pulls all properties from the local CLASSPATH.
075   *
076   * @param ruleRegistrySet of type RuleRegistry
077   */
078  @ConstructorProperties({"ruleRegistrySet"})
079  public HadoopFlowConnector( RuleRegistrySet ruleRegistrySet )
080    {
081    super( ruleRegistrySet );
082    }
083
084  /**
085   * Constructor HadoopFlowConnector creates a new HadoopFlowConnector instance using the given {@link Properties} instance as
086   * default value for the underlying jobs. All properties are copied to a new native configuration instance.
087   *
088   * @param properties      of type Map
089   * @param ruleRegistrySet of type RuleRegistry
090   */
091  @ConstructorProperties({"properties", "ruleRegistrySet"})
092  public HadoopFlowConnector( Map<Object, Object> properties, RuleRegistrySet ruleRegistrySet )
093    {
094    super( properties, ruleRegistrySet );
095    }
096
097  @Override
098  protected Class<? extends Scheme> getDefaultIntermediateSchemeClass()
099    {
100    return SequenceFile.class;
101    }
102
103  @Override
104  protected FlowPlanner createFlowPlanner()
105    {
106    return new HadoopPlanner();
107    }
108
109  @Override
110  protected RuleRegistrySet createDefaultRuleRegistrySet()
111    {
112    return new RuleRegistrySet( new MapReduceHadoopRuleRegistry() );
113    }
114  }