Cluster-setup BIOS config for Disk
SystemDisk Raid 1
DataDisk JBOD
OS on demand, ubuntu\fedora\centos\suse\redhat
update root password sudo passwd root
sudo免密 1 2 3 4 su chmod u+w /etc/sudoers vi /etc/sudoers # username ALL=(ALL:ALL) NOPASSWD: ALL chmod u-w /etc/sudoers
ssh 1 2 3 4 yum install openssh-server echo "PermitRootLogin yes" >> /etc/ssh/sshd_config service sshd restart systemctl enable sshd
1 2 3 4 5 ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys chmod 0600 ~/.ssh/authorized_keys ssh localhost
partition fdisk - 数据盘<2T,MBR 1 2 3 partprobe fdisk -l fdisk /dev/sdb
新建分区np1w
1 2 3 4 5 mkfs -t ext4 /dev/sdb1 # mount /dev/sdb1 /mnt/ vi /etc/fstab /dev/sdb1 /opt ext4 defaults 0 0 mount -a
parted - 数据盘>2T,GPT 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 dev="/dev/sdb" targetdir="/data" sudo mkdir $targetdir sudo chmod 755 $targetdir parted $dev mklabel gpt mkpart sdb1 ext4 0 50% p q # 不分区 或 分区完成后 mkfs.ext4 /dev/sdb lsblk # 确定数据盘设备 blkid # 如果没有目标设备返回值,先分区再sudo执行一次。或partprobe刷新 bid=`sudo blkid $dev |awk -F'"' '{print $2}'` sudo sh -c "echo ${bid}" sudo sh -c "echo 'UUID='${bid} ${targetdir} 'ext4 defaults 0 2' >> /etc/fstab" tail -2 /etc/fstab sudo mount -a df -h # 挂载后 sudo mkdir -p /data/tmp sudo chmod 757 /data/tmp sudo mkdir -p /data/soft sudo chmod 755 /data/soft sudo chown hadoop /data/soft
firewall [CentOS7开始使用firewall替代iptables]
-时间同步 sudo systemctl enable chronyd sudo systemctl start chronyd sudo vi /etc/chrony.conf chronyc sources server 10.153.1.248 iburst server 10.153.1.249 iburst -防火墙 systemctl stop firewalld systemctl disable firewalld
firewalld
1 2 3 sudo systemctl start firewalld sudo systemctl status firewalld sudo systemctl stop firewalld
firewalld开放端口-CSDN博客
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 # 信任服务器 firewall-cmd --permanent --add-rich-rule="rule family="ipv4" source address="10.x.x.x" accept" # firewall-cmd --permanent --add-rich-rule='rule family="ipv4" source address="192.168.1.1" port protocol="tcp" port="80" accept' # firewall-cmd --permanent --add-rich-rule='rule family="ipv4" source address="192.168.1.1" port protocol="tcp" port="80" reject' # firewall-cmd --permanent --remove-rich-rule='rule family="ipv4" source address="192.168.1.1" port protocol="tcp" port="80" accept' firewall-cmd --permanent --add-source=192.168.1.1 firewall-cmd --permanent --add-source=192.168.1.0/24 firewall-cmd --permanent --remove-source=192.168.1.1 # 配置端口 sudo firewall-cmd --permanent --add-port=16010/tcp sudo firewall-cmd --permanent --remove-port=8485/tcp # 刷新配置使其生效 sudo firewall-cmd --reload sudo firewall-cmd --list-all
network tcpdump -nn -s 0 -i any host 10.33.21.191 and host 10.33.21.194
traceroute -nT 10.27.5.201 -p 3306
repo 1 2 3 4 5 6 7 /etc/yum.repos.d/ sed -i 's/mirrors.bclinux.org/mirrors-internal.cmecloud.cn/' BCLinux-*.repo sed -i 's/mirrors.bclinux.org\/bclinux\/el8.2/mirrors-internal.cmecloud.cn\/bc82/' BCLinux-*.repo /etc/hosts 100.127.128.193 mirrors-internal.cmecloud.cn 10.153.1.213 mirrors-internal.cmecloud.cn
soft download Fedora-Workstation-Live-x86_64-34-1.2.iso
https://repo.huaweicloud.com/java/jdk/8u202-b08/
[[mysql#安装|MySQL]] zookeeper 1 2 3 4 5 6 7 8 9 10 11 12 13 14 cp conf/zoo_sample.cfg conf/zoo.cfg bin/zkServer.sh start bin/zkServer.sh status echo stat |nc localhost 2181 bin/zkCli.sh -server 127.0.0.1:2181 zk ip白名单,修改需全量覆盖 setAcl / ip:192.168.1.112:cdrwa,ip:192.168.1.113:cdrwa,ip:127.0.0.1:cdrwa getAcl / setAcl / ip:10.27.48.0/24:cdrwa,ip:127.0.0.1:cdrwa getAcl /
zk多节点配置 zookeeper/conf/zoo.cfg
1 2 3 4 5 6 dataDir=/opt/soft/zookeeper/data server.0=localhost:2888:3888 server.1=north-191:2888:3888 4lw.commands.whitelist=*
echo 0 > /opt/soft/zookeeper/data/myid
script https://www.oracle.com/java/technologies/javase/javase8-archive-downloads.html https://dlcdn.apache.org/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 RES_PATH=/data/tmp/pkg INSTALL_PATH=/data/soft binJDK=jdk-8u202-linux-x64.tar.gz binHadoop=hadoop-3.3.2.tar.gz binSpark=spark-3.3.1-bin-hadoop3.2.tgz binFlink=flink-1.15.0-bin-scala_2.12.tgz binHive=apache-hive-3.1.2-bin.tar.gz binZk=apache-zookeeper-3.7.1-bin.tar.gz binDS=apache-dolphinscheduler-2.0.5-bin.tar.gz mkdir -p $INSTALL_PATH sudo tar -xzf ${RES_PATH}/${binJDK} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/${binHadoop} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/${binSpark} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/${binFlink} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/${binZk} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/${binHive} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/datax-202210.tar.gz -C ${INSTALL_PATH} cd $INSTALL_PATH folder=`tar -tf ${RES_PATH}/${binJDK} |head -1` ln -s $folder jdk folder=`tar -tf ${RES_PATH}/${binHadoop} |head -1` ln -s $folder hadoop folder=`tar -tf ${RES_PATH}/${binSpark} |head -1` ln -s $folder spark folder=`tar -tf ${RES_PATH}/${binFlink} |head -1` ln -s $folder flink folder=`tar -tf ${RES_PATH}/${binZk} |head -1 | awk -F/ '{print $1}'` ln -s $folder zookeeper folder=`tar -tf ${RES_PATH}/${binHive} |head -1 | awk -F/ '{print $1}'` ln -s $folder hive ll $INSTALL_PATH sudo chown hadoop:hadoop -R $INSTALL_PATH sudo chmod 755 -R $INSTALL_PATH
profile 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 echo -e '\n\n#Java' >> /etc/profile echo 'export JAVA_HOME='${INSTALL_PATH}'/jdk' >> /etc/profile echo 'export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH' >> /etc/profile echo 'export CLASSPATH=$CLASSPATH:.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib' >> /etc/profile echo -e '\n#Hadoop' >> /etc/profile echo 'export HADOOP_HOME='${INSTALL_PATH}'/hadoop' >> /etc/profile echo 'export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH' >> /etc/profile echo 'export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop' >> /etc/profile echo -e '\n#Spark' >> /etc/profile echo 'export SPARK_HOME='${INSTALL_PATH}'/spark' >> /etc/profile echo 'export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH' >> /etc/profile echo -e '\n#Flink' >> /etc/profile echo 'export FLINK_HOME='${INSTALL_PATH}'/flink' >> /etc/profile echo 'export PATH=$FLINK_HOME/bin:$FLINK_HOME/sbin:$PATH' >> /etc/profile echo -e '\n#zookeeper' >> /etc/profile echo 'export ZK_HOME='${INSTALL_PATH}'/zookeeper' >> /etc/profile echo 'export PATH=$ZK_HOME/bin:$PATH' >> /etc/profile echo -e '\n#Hive' >> /etc/profile echo 'export HIVE_HOME='${INSTALL_PATH}'/hive' >> /etc/profile echo 'export PATH=$HIVE_HOME/bin:$PATH' >> /etc/profile
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 #ZK export ZK_HOME=/data/soft/zookeeper export PATH=$ZK_HOME/bin:$PATH #Hadoop export HADOOP_HOME=/data/soft/hadoop export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH export HADOOP_CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath` export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop #Spark export SPARK_HOME=/data/soft/spark export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH #Flink export FLINK_HOME=/data/soft/flink export PATH=$FLINK_HOME/bin:$PATH #HBase export HBASE_HOME=/data/soft/hbase export PATH=$HBASE_HOME/bin:$PATH
hadoop hadoop checknative -a
core-site.xml hadoop/etc/hadoop/core-site.xml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 <configuration > <property > <name > fs.defaultFS</name > <value > hdfs://cluster</value > </property > <property > <name > hadoop.tmp.dir</name > <value > /data/data/hdfs/tmp</value > </property > <property > <name > ha.zookeeper.quorum</name > <value > north-190:2181,north-191:2181,north-192:2181,north-193:2181,north-194:2181</value > </property > <property > <name > hadoop.proxyuser.hadoop.hosts</name > <value > *</value > </property > <property > <name > hadoop.proxyuser.hadoop.groups</name > <value > *</value > </property > <property > <name > fs.trash.interval</name > <value > 1440</value > </property > <property > <name > io.file.buffer.size</name > <value > 65536</value > </property > </configuration >
hdfs-site.xml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 <configuration > <property > <name > dfs.replication</name > <value > 2</value > </property > <property > <name > dfs.namenode.name.dir</name > <value > /data/data/hdfs/name</value > </property > <property > <name > dfs.datanode.data.dir</name > <value > /data/data/hdfs/data</value > </property > <property > <name > dfs.journalnode.edits.dir</name > <value > /data/data/hdfs/jn</value > </property > <property > <name > dfs.namenode.handler.count</name > <value > 80</value > </property > <property > <name > dfs.datanode.handler.count</name > <value > 80</value > </property > <property > <name > dfs.namenode.shared.edits.dir</name > <value > qjournal://north-190:8485;north-191:8485;north-192:8485;north-193:8485;north-194:8485/cluster</value > </property > <property > <name > dfs.nameservices</name > <value > cluster</value > </property > <property > <name > dfs.ha.namenodes.cluster</name > <value > nn1,nn2</value > </property > <property > <name > dfs.namenode.rpc-address.cluster.nn1</name > <value > north-190:9000</value > </property > <property > <name > dfs.namenode.rpc-address.cluster.nn2</name > <value > north-192:9000</value > </property > <property > <name > dfs.namenode.http-address.cluster.nn1</name > <value > north-190:50070</value > </property > <property > <name > dfs.namenode.http-address.cluster.nn2</name > <value > north-192:50070</value > </property > <property > <name > dfs.namenode.lifeline.rpc-address.cluster.nn1</name > <value > north-190:8050</value > </property > <property > <name > dfs.namenode.lifeline.rpc-address.cluster.nn2</name > <value > north-192:8050</value > </property > <property > <name > dfs.namenode.lifeline.handler.count</name > <value > 10</value > </property > <property > <name > dfs.namenode.audit.log.async</name > <value > true</value > </property > <property > <name > dfs.permissions.enable</name > <value > true</value > </property > <property > <name > dfs.ha.fencing.methods</name > <value > shell(/bin/true)</value > </property > <property > <name > dfs.client.failover.proxy.provider.cluster</name > <value > org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value > </property > <property > <name > dfs.ha.automatic-failover.enabled</name > <value > true</value > </property > <property > <name > dfs.blocksize</name > <value > 256m</value > </property > <property > <name > dfs.datanode.fsdataset.volume.choosing.policy</name > <value > org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy</value > </property > <property > <name > dfs.datanode.du.reserved</name > <value > 107374182400</value > </property > <property > <name > dfs.checksum.type</name > <value > CRC32</value > </property > <property > <name > dfs.client.socket-timeout</name > <value > 600000</value > </property > <property > <name > dfs.datanode.socket.write.timeout</name > <value > 1200000</value > </property > <property > <name > dfs.datanode.max.transfer.threads</name > <value > 16384</value > </property > </configuration >
mapred-site.xml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 <configuration > <property > <name > mapreduce.framework.name</name > <value > yarn</value > </property > <property > <name > mapreduce.map.output.compress</name > <value > true</value > </property > <property > <name > mapreduce.client.submit.file.replication</name > <value > 3</value > </property > </configuration >
yarn-site.xml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 <configuration > <property > <name > yarn.nodemanager.local-dirs</name > <value > /data/data/yarn/nm-local-dir</value > </property > <property > <name > yarn.nodemanager.log-dirs</name > <value > /data/data/yarn/logs</value > </property > <property > <name > yarn.resourcemanager.ha.enabled</name > <value > true</value > </property > <property > <name > yarn.resourcemanager.cluster-id</name > <value > cluster1</value > </property > <property > <name > yarn.resourcemanager.ha.rm-ids</name > <value > rm1,rm2</value > </property > <property > <name > yarn.resourcemanager.hostname.rm1</name > <value > north-190</value > </property > <property > <name > yarn.resourcemanager.hostname.rm2</name > <value > north-193</value > </property > <property > <name > yarn.resourcemanager.webapp.address.rm1</name > <value > north-190:8088</value > </property > <property > <name > yarn.resourcemanager.webapp.address.rm2</name > <value > north-193:8088</value > </property > <property > <name > yarn.resourcemanager.zk-address</name > <value > north-190:2181,north-191:2181,north-192:2181,north-193:2181,north-194:2181</value > </property > <property > <name > yarn.resourcemanager.store.class</name > <value > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value > </property > <property > <name > yarn.resourcemanager.recovery.enabled</name > <value > true</value > </property > <property > <name > yarn.resourcemanager.recovery.enabled</name > <value > true</value > </property > <property > <name > yarn.nodemanager.aux-services</name > <value > mapreduce_shuffle</value > </property > <property > <name > yarn.nodemanager.resource.memory-mb</name > <value > 225280</value > </property > <property > <name > yarn.nodemanager.resource.cpu-vcores</name > <value > 40</value > </property > <property > <name > yarn.log-aggregation-enable</name > <value > true</value > <description > 开启application 日志聚合功能</description > </property > <property > <name > yarn.log-aggregation.retain-seconds</name > <value > 259200</value > <description > 设置聚合日志保存时间3天</description > </property > <property > <name > yarn.log-aggregation.retain-check-interval-seconds</name > <value > 86400</value > <description > 清理过期聚合日志程序的执行间隔时间</description > </property > <property > <name > yarn.nodemanager.remote-app-log-dir</name > <value > /yarn/logs</value > <description > 聚合日志在hdfs上的目录</description > </property > <property > <name > yarn.log.server.url</name > <value > http://cluster/yarn/jobhistory/logs</value > <description > 历史日志对应路径</description > </property > <property > <name > yarn.nodemanager.vmem-check-enabled</name > <value > false</value > </property > <property > <name > yarn.nodemanager.pmem-check-enabled</name > <value > false</value > </property > <property > <name > yarn.node-labels.fs-store.root-dir</name > <value > hdfs://cluster/yarn/node-labels/</value > </property > <property > <name > yarn.node-labels.enabled</name > <value > true</value > </property > <property > <name > yarn.nodemanager.address</name > <value > ip:45454</value > </property > </configuration >
hadoop-env.sh 1 2 3 4 5 # 避免pid在tmp目录被清除 export HADOOP_PID_DIR=/data/soft/hadoop/pid export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} export HADOOP_HEAPSIZE=4096 export HADOOP_NAMENODE_INIT_HEAPSIZE=2048
capacity-scheduler.xml yarn.scheduler.capacity.resource-calculator改成 DominantResourceCalculator
slave start-stop-script 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 $ZK_HOME/bin/zkServer.sh start $HADOOP_HOME/sbin/hadoop-daemon.sh start namenode $HADOOP_HOME/sbin/hadoop-daemon.sh start datanode $HADOOP_HOME/sbin/hadoop-daemon.sh start journalnode $HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc $HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager $HADOOP_HOME/sbin/yarn-daemon.sh start nodemanager $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver yarn rmadmin -getAllServiceState service mariadb start $SPARK_HOME/sbin/start-history-server.sh hbase-daemon.sh start master hbase-daemon.sh start master --backup hbase-daemon.sh start regionserver
1 2 3 4 5 6 7 8 9 10 hdfs namenode -format hdfs dfs -mkdir /input hdfs dfs -put README.txt /input hdfs dfs -ls /input hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.3.jar wordcount /input /output hdfs dfs -ls /output hdfs dfs -tail /output/part-r-00000 hdfs dfs -rmr /output /input
simple/kerberos跨集群客户端配置 1 2 3 4 5 6 7 8 9 10 11 12 13 14 <property> <name>ipc.client.fallback-to-simple-auth-allowed</name> <value>true</value> </property> <property> <name>fs.defaultFS</name> <value>hdfs://gdhlwtz</value> </property> <property> <name>hadoop.security.authentication</name> <value>kerberos</value> </property>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 <property> <name>dfs.nameservices</name> <value>gdhlwtz</value> </property> <property> <name>dfs.ha.namenodes.gdhlwtz</name> <value>nn1,nn2</value> </property> <property> <name>dfs.namenode.rpc-address.gdhlwtz.nn1</name> <value>xxx:8020</value> </property> <property> <name>dfs.namenode.rpc-address.gdhlwtz.nn2</name> <value>xxx:8020</value> </property> <property> <name>dfs.client.failover.proxy.provider.gdhlwtz</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <property> <name>dfs.namenode.kerberos.principal</name> <value>nn/_HOST@HLWKDC</value> </property>
spark snappy usage
1 2 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native:/usr/lib64 spark-shell --master local[1]
不配置下:使用本机资源 ./sbin/start-thriftserver.sh ./bin/beeline -u jdbc:hive2://localhost:10000 -n hadoop
1 2 3 4 5 6 7 8 9 10 CREATE TABLE parquet_test ( id int, str string) STORED AS PARQUET; insert into table parquet_test values(1,'a'),(2,'b'); select * from parquet_test; drop table parquet_test;
spark-defaults.conf cd jars/ zip spark-jar.zip ./* mv spark-jar.zip ../ cd .. chmod 644 spark-jar.zip
或者 hdfs dfs -mkdir /spark hdfs dfs -put jars/ /spark
1 2 3 4 5 6 7 8 9 10 11 12 spark.master=yarn #spark.yarn.jars=hdfs:///spark/jars/*.jar spark.yarn.archive hdfs:///spark/spark-jar.zip spark.serializer=org.apache.spark.serializer.KryoSerializer spark.sql.warehouse.dir=hdfs:///hive/warehouse spark.eventLog.enabled true spark.eventLog.dir hdfs:///spark/spark-history spark.eventLog.compress true spark.history.fs.logDirectory hdfs:///spark/spark-history spark.sql.hive.metastore.version=2.3.7
1 2 export SPARK_DIST_CLASSPATH=$(hadoop classpath)export HADOOP_CONF_DIR=$HADOOP_HOME /etc/hadoop
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 #### hive-site.xml 手动先创建数据库 `create database hive character set latin1;` ```xml <?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://ip:3306/hive?createDatabaseIfNotExist=true&characterEncoding=UTF-8&useSSL=false</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>xxx</value> </property> <property> <name>hive.metastore.schema.verification</name> <value>false</value> </property> <property> <name>datanucleus.schema.autoCreateAll</name> <value>true</value> </property> </configuration>
flink 1 2 3 4 5 ./bin/start-cluster.sh ./bin/flink run examples/streaming/WordCount.jar ./bin/flink run -m yarn-cluster examples/streaming/WordCount.jar tail log/flink-*-taskexecutor-*.out ./bin/stop-cluster.sh
操作示例
dolphinscheduler 限制sudo切换用户 root ALL=(ALL) ALL dolphinscheduler ALL=(hadoop) NOPASSWD:ALL
用户 主机=(用户:用户组) 命令
用户名或者用户组,表示谁有权限来使用后面的配置。%sudo代表sudo组下的所有用户
表示来源地,即从(远程)哪执行这条命令。ALL表示所有计算机
表示sudo可以切换到什么用户。ALL表示所有用户
表示sudo可以切换到哪些组下的用户。ALL表示所有组
表示sudo之后能够执行的命令。NOPASSWD:ALL表示执行任意命令都不需要密码1 2 3 4 yum -y install psmisc tar -xf ${binDS} folder=`tar -tf ${binDS} |head -1 | awk -F/ '{print $1}'` ln -s $folder dolphinscheduler
zk info to restart service
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 echo "ls /dolphinscheduler/nodes/master" |zkCli.sh -server <ip>:2181 |tail -2 vi ds-watcher.sh #!/bin/bash restart_master() { #pid=`pgrep -f org.apache.dolphinscheduler.server.master.MasterServer` /usr/bin/pkill -f org.apache.dolphinscheduler.server.master.MasterServer rm -f /data/dolphinscheduler-1.3.6/pid/dolphinscheduler-master-server.pid sleep 3s su dolphinscheduler -c "sh /data/dolphinscheduler-1.3.6/bin/dolphinscheduler-daemon.sh start master-server" } restart_worker() { #pid=`pgrep -f org.apache.dolphinscheduler.server.worker.WorkerServer` /usr/bin/pkill -f org.apache.dolphinscheduler.server.worker.WorkerServer rm -f /data/dolphinscheduler-1.3.6/pid/dolphinscheduler-worker-server.pid sleep 3s su dolphinscheduler -c "sh /data/dolphinscheduler-1.3.6/bin/dolphinscheduler-daemon.sh start worker-server" } # master ret=`java -cp /data/dolphinscheduler-1.3.6/ds-watcher.jar ds.ZkCheckNode master:2181,slave1:2181,slave2:2181 /dolphinscheduler/nodes/master/10.17.41.129:5678` if [ $ret = 'false' ] ;then restart_master fi # worker ret=`java -cp /data/dolphinscheduler-1.3.6/ds-watcher.jar ds.ZkCheckNode master:2181,slave1:2181,slave2:2181 /dolphinscheduler/nodes/worker/10.17.41.129/10.17.41.129:1234` if [ $ret = 'false' ] ;then restart_worker fi crontab -e */5 * * * * /bin/sh /data/dolphinscheduler-1.3.6/ds-watcher.sh
hbase 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 tar -xf hbase-2.4.6-bin.tar.gz folder=`tar -tf ${binHBase} |head -1 | awk -F/ '{print $1}'` ln -s $folder hbase [本地测试请配置hbase-site.xml加入hbase.rootdir] <property> <name>hbase.rootdir</name> <value>file:///opt/soft/hbase-data</value> </property> 手动启动: --config "${HBASE_CONF_DIR}" hbase-daemon.sh start master hbase-daemon.sh start regionserver hbase-daemon.sh start master --backup bin/start-hbase.sh ./bin/hbase shell create 'test', 'cf' list 'test' describe 'test' put 'test', 'row1', 'cf:a', 'value1' scan 'test' get 'test', 'row1' disable 'test' drop 'test'
hbase-site.xml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 <configuration > <property > <name > hbase.cluster.distributed</name > <value > true</value > </property > <property > <name > hbase.rootdir</name > <value > hdfs://cluster/hbase</value > </property > <property > <name > hbase.tmp.dir</name > <value > /data/data/hbase/tmp</value > </property > <property > <name > hbase.zookeeper.property.dataDir</name > <value > /data/data/hbase/zkdata</value > </property > <property > <name > hbase.zookeeper.quorum</name > <value > north-190:2181,north-191:2181,north-192:2181,north-193:2181,north-194:2181</value > </property > <property > <name > hbase.unsafe.stream.capability.enforce</name > <value > false</value > </property > </configuration >
zeppelin 1 2 3 4 5 6 7 tar xf zeppelin-0.10.0-bin-all.tgz cp conf/zeppelin-site.xml.template conf/zeppelin-site.xml vi conf/zeppelin-site.xml cp zeppelin-env.sh.template zeppelin-env.sh vi zeppelin-env.sh bin/zeppelin-daemon.sh start
setting source /etc/profile
hadoop vi etc/hadoop/core-site.xml
1 2 3 4 5 6 7 8 9 10 11 <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://localhost:9000</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/opt/hdfs/tmp</value> <description>temporary directories.</description> </property> </configuration>
vi etc/hadoop/hdfs-site.xml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 <configuration> <property> <name>dfs.replication</name> <value>1</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>/opt/hdfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/opt/hdfs/data</value> </property> </configuration>
vi hadoop_env.sh
1 2 3 4 5 6 7 # limit who can execute certain subcommands. export JAVA_HOME=/opt/soft/jdk export HDFS_NAMENODE_USER=root export HDFS_DATANODE_USER=root export HDFS_SECONDARYNAMENODE_USER=root export YARN_RESOURCEMANAGER_USER=root export YARN_NODEMANAGER_USER=root
分发&启动
1 2 3 4 5 6 7 8 9 10 11 12 13 #scp -r hadoop-3.2.1 root@hadoop1:/data1/ cd $HADOOP_HOME mkdir -p /opt/hdfs/name mkdir -p /opt/hdfs/data mkdir -p /opt/hdfs/tmp hdfs namenode -format start-dfs.sh http://192.168.56.101:9870/ start-yarn.sh http://192.168.56.101:8088/
TestCase
1 2 3 4 5 6 hdfs dfs -mkdir /in hdfs dfs -put README.txt /in hdfs dfs -ls /in hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar wordcount /in /out hdfs dfs -cat /out/part-r-00000 |head hdfs dfs -rmr /in /out
HA 启动
1 2 3 [主]hdfs zkfc -formatZK [主]hdfs namenode -format [备]hdfs namenode -bootstrapStandby
HA切换
1 2 3 4 5 6 7 nn1 -> nn2 hdfs haadmin -getAllServiceState hdfs haadmin -failover nn1 nn2 hdfs haadmin -getAllServiceState nn2 -> nn1 hdfs haadmin -failover nn2 nn1
spark 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 cd $SPARK_HOME hdfs dfs -mkdir -p /spark/lib hdfs dfs -mkdir -p /spark/spark-history hdfs dfs -put jars/* /spark/lib cp conf/spark-env.sh.template conf/spark-env.sh echo -e '\nexport HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop' >> conf/spark-env.sh cp conf/spark-defaults.conf.template conf/spark-defaults.conf echo -e '\n\n' >> conf/spark-defaults.conf echo 'spark.master=yarn' >> conf/spark-defaults.conf echo 'spark.yarn.jars=hdfs:///spark/lib' >> conf/spark-defaults.conf echo 'spark.serializer=org.apache.spark.serializer.KryoSerializer' >> conf/spark-defaults.conf echo 'spark.sql.warehouse.dir=hdfs:///user/hive/warehouse' >> conf/spark-defaults.conf echo 'spark.eventLog.enabled true' >> conf/spark-defaults.conf echo 'spark.eventLog.dir hdfs:///spark/spark-history' >> conf/spark-defaults.conf echo 'spark.eventLog.compress true' >> conf/spark-defaults.conf echo 'spark.history.fs.logDirectory hdfs:///spark/spark-history' >> conf/spark-defaults.conf ./bin/spark-submit --master yarn --class org.apache.spark.examples.SparkPi examples/jars/spark-examples*.jar 10 ./sbin/start-thriftserver.sh --driver-memory 2g --executor-memory 4g --executor-cores 5 --num-executors 5 ./bin/beeline -n root -u jdbc:hive2://localhost:10000
控制访问权限方案
thriftserver使用proxyuser (此方式可以共用sts进行有限制的读写)
zeppelin不使用proxyuser (此方式可以支持跟踪每个用户的sql但不能写)
hdfs设定目录权限给proxyuser
sbin/start-thriftserver.sh –master yarn –driver-cores 2 –driver-memory 6G –executor-cores 5 –executor-memory 6G –num-executors 10 –proxy-user zeppelin –conf spark.default.parallelism=80 –conf spark.sql.shuffle.partitions=80 –conf spark.sql.adaptive.enabled=true –conf spark.scheduler.mode=FAIR –conf spark.network.timeout=600s –conf spark.memory.fraction=0.8 –conf spark.dynamicAllocation.shuffleTracking.enabled=true –conf spark.dynamicAllocation.shuffleTracking.timeout=180000 –conf spark.dynamicAllocation.enabled=true –conf spark.dynamicAllocation.minExecutors=3 –conf spark.dynamicAllocation.maxExecutors=50 –hiveconf hive.server2.thrift.port=10199 –hiveconf hive.default.fileformat=Orc
1 2 3 4 5 6 7 8 9 create database t;use t; create table test(id int , name string ) stored as parquet; desc formatted test;insert into table test values (1 ,'a' ),(2 ,'b' );select * from test;
hive create db and user in mysql
1 2 3 4 create database hive; grant all on hive.* to hive@'%' identified by 'Hive!@#2023'; grant all on hive.* to hive@'localhost' identified by 'Hive!@#2023'; flush privileges;
Hive表设置支持中文注释、中文表数据导入
1 2 3 4 5 alter table COLUMNS_V2 modify column COMMENT varchar(256) character set utf8; alter table TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8; alter table PARTITION_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8; alter table PARTITION_KEYS modify column PKEY_COMMENT varchar(4000) character set utf8; alter table INDEX_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
还需要在mysql的hive中执行 hive安装包中TXN schema初始化sql
1 2 3 4 5 6 7 mv mysql-connector-java-5.1.48.jar $HIVE_HOME/lib cd $HIVE_HOME/conf #cp hive-env.sh.template hive-env.sh cp hive-default.xml.template hive-site.xml cp hive-log4j2.properties.template hive-log4j2.properties cp hive-exec-log4j2.properties.template hive-exec-log4j2.properties
vi hive-site.xml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 <?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://[IP]:3306/[DB]?characterEncoding=UTF-8&useSSL=false</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>hive</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>hive</value> </property> <property> <name>hive.metastore.uris</name> <value>thrift://[hostname]:9083</value> <description>Thrift uri for the remote metastore. Used by metastore client to connect to remote metastore.</description> </property> <property> <name>hive.metastore.schema.verification</name> <value>false</value> </property> </configuration>
in vi, update system var to absolute path :%s#${system:java.io.tmpdir}#/tmp/javaiotmp#g :%s#${system:user.name}#hive#g
1 2 3 4 5 6 hadoop fs -mkdir -p /user/hive/warehouse hadoop fs -mkdir -p /user/hive/tmp hadoop fs -mkdir -p /user/hive/log hadoop fs -chmod -R 777 /user/hive/warehouse hadoop fs -chmod -R 777 /user/hive/tmp hadoop fs -chmod -R 777 /user/hive/log
initialize
1 $HIVE_HOME/bin/schematool -dbType mysql -initSchema hive hive
standalone run
1 2 3 4 5 nohup $HIVE_HOME/bin/hiveserver2 & $HIVE_HOME/bin/beeline !connect jdbc:hive2://localhost:10000 hive hive
as meta service for spark copy hive-site.xml to $SPARK-HOME/conf
1 2 hive --service metastore & spark-sql
1 2 3 4 5 6 7 8 9 10 11 12 13 CREATE TABLE emp (empno int , name string )stored as PARQUET; insert into table emp values (1 ,'a' ),(2 ,'b' );CREATE TABLE info (age int , name string )stored as PARQUET; insert into table info values (11 ,'a' ),(22 ,'b' );
role
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 https://www.cnblogs.com/yszd/p/11086677.html set role admin; show roles; SHOW CURRENT ROLES; CREATE ROLE guest; show grant on all; show grant user manhua on database default; GRANT SELECT ON TABLE default.emp TO ROLE guest; grant select on database default to user manhua; GRANT ROLE guest TO USER hadoop; REVOKE ALL PRIVILEGES on default.emp from user hadoop; revoke role role_test1 from user jayliu; revoke ALL on database default from user lisi; revoke select on database default from user hive; revoke select on TABLE default.emp from user hadoop;
dolphinscheduler 1 2 3 4 5 useradd dolphinscheduler echo "dolphinscheduler" | passwd --stdin dolphinscheduler sed -i '$adolphinscheduler ALL=(ALL) NOPASSWD: NOPASSWD: ALL' /etc/sudoers sed -i 's/Defaults requirett/#Defaults requirett/g' /etc/sudoers chown -R dolphinscheduler:dolphinscheduler $folder
su dolphinscheduler
1 2 3 ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys chmod 600 ~/.ssh/authorized_keys
mysql -uroot -p
1 2 3 4 CREATE DATABASE dolphinscheduler DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler' @'%' IDENTIFIED BY 'ds' ;GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler' @'localhost' IDENTIFIED BY 'ds' ; flush privileges;
vi conf/datasource.properties
1 2 3 4 5 6 7 8 SPRING_DATASOURCE_URL="jdbc:mysql://10.3.16.120:3306,10.3.16.121:3306/dolphinscheduler?autoReconnect=true&useUnicode=true&characterEncoding=utf-8&failOverReadOnly=false&useSSL=false" SPRING_DATASOURCE_DRIVER_CLASS_NAME=com.mysql.cj.jdbc.Driver spring.datasource.driver-class-name=com.mysql.jdbc.Driver spring.datasource.url="jdbc:mysql://localhost:3306/dolphinscheduler?useUnicode=true&characterEncoding=UTF-8&useSSL=false" spring.datasource.username=dolphinscheduler spring.datasource.password=ds
字段中文乱码 ALTER TABLE t_ds_project
CHANGE description
description
VARCHAR(255) CHARACTER SET UTF8 COLLATE utf8_general_ci;
download mysql-jar 5.1.47 to lib
link
1 2 3 4 5 vi conf/env/dolphinscheduler_env.sh vi install.sh cp core-site.xml hdfs-site.xml TO conf
frequent usage start 1 2 3 4 5 6 7 8 9 10 11 12 13 pushd $HADOOP_HOME sbin/start-all.sh popd pushd $ZK_HOME bin/zkServer.sh start popd service mariadb start pushd /opt/soft/dolphinscheduler sudo -u dolphinscheduler script/start-all.sh popd
http://[ip]:12345/dolphinscheduler/#/home
stop 1 2 3 4 5 6 7 8 9 10 11 pushd /opt/soft/dolphinscheduler sudo -u dolphinscheduler script/stop-all.sh popd pushd $ZK_HOME bin/zkServer.sh stop popd pushd $HADOOP_HOME sbin/stop-all.sh popd