Cluster-setup BIOS config for Disk
SystemDisk Raid 1
DataDisk JBOD
OS on demand, ubuntu\fedora\centos\suse\redhat
update root password sudo passwd root
sudo免密 1 2 3 4 su chmod u+w /etc/sudoers vi /etc/sudoers # username ALL=(ALL:ALL) NOPASSWD: ALL chmod u-w /etc/sudoers
ssh 1 2 3 4 yum install openssh-server echo "PermitRootLogin yes" >> /etc/ssh/sshd_config service sshd restart systemctl enable sshd
1 2 3 4 5 ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys chmod 0600 ~/.ssh/authorized_keys ssh localhost
partition fdisk - 数据盘<2T,MBR 1 2 3 partprobe fdisk -l fdisk /dev/sdb
新建分区np1w
1 2 3 4 5 mkfs -t ext4 /dev/sdb1 # mount /dev/sdb1 /mnt/ vi /etc/fstab /dev/sdb1 /opt ext4 defaults 0 0 mount -a
parted - 数据盘>2T,GPT 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 lsblk # 确定数据盘设备 dev="/dev/sdb" targetdir="/data" sudo mkdir $targetdir sudo chmod 755 $targetdir # 分区(可选) parted $dev mklabel gpt mkpart sdb1 ext4 0 50% p q # 不分区 或 分区完成后 sudo mkfs.ext4 /dev/sdb? # 如果没有目标设备返回值,先分区再sudo执行一次。或partprobe刷新 blkid bid=`sudo blkid $dev |awk -F'"' '{print $2}'` sudo sh -c "echo ${bid}" sudo sh -c "echo 'UUID='${bid} ${targetdir} 'ext4 defaults 0 2' >> /etc/fstab" tail -2 /etc/fstab sudo mount -a df -h # 挂载后 sudo mkdir -p /data/tmp sudo chmod 757 /data/tmp sudo mkdir -p /data/soft sudo chmod 755 /data/soft sudo chown hadoop /data/soft
1 2 3 4 5 6 7 8 9 10 11 12 修复磁盘超级块 mkfs.ext4 -E nodiscard /dev/sdb? Superblock backups stored on blocks: 32768, 98304, 163840, 229376, 294912, 819200, 884736, 1605632, 2654208, 4096000, 7962624, 11239424, 20480000, 23887872, 71663616, 78675968, 102400000, 214990848, 512000000, 550731776, 644972544, 1934917632, 2560000000, 3855122432, 5804752896, 12800000000, 17414258688, 26985857024 sudo e2fsck -b 20480000 /dev/sdb
firewall [CentOS7开始使用firewall替代iptables]
-时间同步 sudo systemctl enable chronyd sudo systemctl start chronyd sudo vi /etc/chrony.conf chronyc sources server 10.153.1.248 iburst server 10.153.1.249 iburst -防火墙 systemctl stop firewalld systemctl disable firewalld
firewalld
1 2 3 sudo systemctl start firewalld sudo systemctl status firewalld sudo systemctl stop firewalld
firewalld开放端口-CSDN博客
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 # 信任服务器 firewall-cmd --permanent --add-rich-rule="rule family="ipv4" source address="10.x.x.x" accept" # firewall-cmd --permanent --add-rich-rule='rule family="ipv4" source address="192.168.1.1" port protocol="tcp" port="80" accept' # firewall-cmd --permanent --add-rich-rule='rule family="ipv4" source address="192.168.1.1" port protocol="tcp" port="80" reject' # firewall-cmd --permanent --remove-rich-rule='rule family="ipv4" source address="192.168.1.1" port protocol="tcp" port="80" accept' firewall-cmd --permanent --add-source=192.168.1.1 firewall-cmd --permanent --add-source=192.168.1.0/24 firewall-cmd --permanent --remove-source=192.168.1.1 # 配置端口 sudo firewall-cmd --permanent --add-port=16010/tcp sudo firewall-cmd --permanent --remove-port=8485/tcp # 刷新配置使其生效 sudo firewall-cmd --reload sudo firewall-cmd --list-all
network tcpdump -nn -s 0 -i any host 10.33.21.191 and host 10.33.21.194
traceroute -nT 10.27.5.201 -p 3306
repo 1 2 3 4 5 6 7 /etc/yum.repos.d/ sed -i 's/mirrors.bclinux.org/mirrors-internal.cmecloud.cn/' BCLinux-*.repo sed -i 's/mirrors.bclinux.org\/bclinux\/el8.2/mirrors-internal.cmecloud.cn\/bc82/' BCLinux-*.repo /etc/hosts 100.127.128.193 mirrors-internal.cmecloud.cn 10.153.1.213 mirrors-internal.cmecloud.cn
soft download Fedora-Workstation-Live-x86_64-34-1.2.iso
https://repo.huaweicloud.com/java/jdk/8u202-b08/
[[mysql#安装|MySQL]] zookeeper 1 2 3 4 5 6 7 8 9 10 11 12 13 14 cp conf/zoo_sample.cfg conf/zoo.cfg bin/zkServer.sh start bin/zkServer.sh status echo stat |nc localhost 2181 bin/zkCli.sh -server 127.0.0.1:2181 zk ip白名单,修改需全量覆盖 setAcl / ip:192.168.1.112:cdrwa,ip:192.168.1.113:cdrwa,ip:127.0.0.1:cdrwa getAcl / setAcl / ip:10.27.48.0/24:cdrwa,ip:127.0.0.1:cdrwa getAcl /
zk多节点配置 zookeeper/conf/zoo.cfg
1 2 3 4 5 6 7 8 9 dataDir=/opt/soft/zookeeper/data server.0=localhost:2888:3888 server.1=north-191:2888:3888 4lw.commands.whitelist=* autopurge.snapRetainCount=3 autopurge.purgeInterval=24
echo 0 > /opt/soft/zookeeper/data/myid
script https://www.oracle.com/java/technologies/javase/javase8-archive-downloads.html https://dlcdn.apache.org/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 RES_PATH=/data/tmp/pkg INSTALL_PATH=/data/soft binJDK=jdk-8u202-linux-x64.tar.gz binHadoop=hadoop-3.3.2.tar.gz binSpark=spark-3.3.1-bin-hadoop3.2.tgz binFlink=flink-1.15.0-bin-scala_2.12.tgz binHive=apache-hive-3.1.2-bin.tar.gz binZk=apache-zookeeper-3.7.1-bin.tar.gz binDS=apache-dolphinscheduler-2.0.5-bin.tar.gz mkdir -p $INSTALL_PATH sudo tar -xzf ${RES_PATH}/${binJDK} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/${binHadoop} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/${binSpark} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/${binFlink} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/${binZk} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/${binHive} -C ${INSTALL_PATH} sudo tar -xzf ${RES_PATH}/datax-202210.tar.gz -C ${INSTALL_PATH} cd $INSTALL_PATH folder=`tar -tf ${RES_PATH}/${binJDK} |head -1` ln -s $folder jdk folder=`tar -tf ${RES_PATH}/${binHadoop} |head -1` ln -s $folder hadoop folder=`tar -tf ${RES_PATH}/${binSpark} |head -1` ln -s $folder spark folder=`tar -tf ${RES_PATH}/${binFlink} |head -1` ln -s $folder flink folder=`tar -tf ${RES_PATH}/${binZk} |head -1 | awk -F/ '{print $1}'` ln -s $folder zookeeper folder=`tar -tf ${RES_PATH}/${binHive} |head -1 | awk -F/ '{print $1}'` ln -s $folder hive ll $INSTALL_PATH sudo chown hadoop:hadoop -R $INSTALL_PATH sudo chmod 755 -R $INSTALL_PATH
profile 提权root则将 >> 改为| sudo tee -a
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 echo -e '\n\n#Java' >> /etc/profile echo 'export JAVA_HOME='${INSTALL_PATH}'/jdk' >> /etc/profile echo 'export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH' >> /etc/profile echo 'export CLASSPATH=$CLASSPATH:.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib' >> /etc/profile echo -e '\n#Hadoop' >> /etc/profile echo 'export HADOOP_HOME='${INSTALL_PATH}'/hadoop' >> /etc/profile echo 'export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH' >> /etc/profile echo 'export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop' >> /etc/profile echo -e '\n#Spark' >> /etc/profile echo 'export SPARK_HOME='${INSTALL_PATH}'/spark' >> /etc/profile echo 'export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH' >> /etc/profile echo -e '\n#Flink' >> /etc/profile echo 'export FLINK_HOME='${INSTALL_PATH}'/flink' >> /etc/profile echo 'export PATH=$FLINK_HOME/bin:$FLINK_HOME/sbin:$PATH' >> /etc/profile echo -e '\n#zookeeper' >> /etc/profile echo 'export ZK_HOME='${INSTALL_PATH}'/zookeeper' >> /etc/profile echo 'export PATH=$ZK_HOME/bin:$PATH' >> /etc/profile echo -e '\n#Hive' >> /etc/profile echo 'export HIVE_HOME='${INSTALL_PATH}'/hive' >> /etc/profile echo 'export PATH=$HIVE_HOME/bin:$PATH' >> /etc/profile
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 #ZK export ZK_HOME=/data/soft/zookeeper export PATH=$ZK_HOME/bin:$PATH #Hadoop export HADOOP_HOME=/data/soft/hadoop export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH export HADOOP_CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath` export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop #Spark export SPARK_HOME=/data/soft/spark export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH #Flink export FLINK_HOME=/data/soft/flink export PATH=$FLINK_HOME/bin:$PATH #HBase export HBASE_HOME=/data/soft/hbase export PATH=$HBASE_HOME/bin:$PATH
hadoop hadoop checknative -a
core-site.xml hadoop/etc/hadoop/core-site.xml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 <configuration > <property > <name > fs.defaultFS</name > <value > hdfs://cluster</value > </property > <property > <name > hadoop.tmp.dir</name > <value > /data/data/hdfs/tmp</value > </property > <property > <name > ha.zookeeper.quorum</name > <value > north-190:2181,north-191:2181,north-192:2181,north-193:2181,north-194:2181</value > </property > <property > <name > hadoop.proxyuser.hadoop.hosts</name > <value > *</value > </property > <property > <name > hadoop.proxyuser.hadoop.groups</name > <value > *</value > </property > <property > <name > fs.trash.interval</name > <value > 1440</value > </property > <property > <name > io.file.buffer.size</name > <value > 65536</value > </property > </configuration >
hdfs-site.xml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 <configuration > <property > <name > dfs.replication</name > <value > 2</value > </property > <property > <name > dfs.namenode.name.dir</name > <value > /data/data/hdfs/name</value > </property > <property > <name > dfs.datanode.data.dir</name > <value > /data/data/hdfs/data</value > </property > <property > <name > dfs.journalnode.edits.dir</name > <value > /data/data/hdfs/jn</value > </property > <property > <name > dfs.namenode.handler.count</name > <value > 80</value > </property > <property > <name > dfs.datanode.handler.count</name > <value > 80</value > </property > <property > <name > dfs.namenode.shared.edits.dir</name > <value > qjournal://north-190:8485;north-191:8485;north-192:8485;north-193:8485;north-194:8485/cluster</value > </property > <property > <name > dfs.nameservices</name > <value > cluster</value > </property > <property > <name > dfs.internal.nameservicess</name > <value > cluster</value > </property > <property > <name > dfs.ha.namenodes.cluster</name > <value > nn1,nn2</value > </property > <property > <name > dfs.namenode.rpc-address.cluster.nn1</name > <value > north-190:9000</value > </property > <property > <name > dfs.namenode.rpc-address.cluster.nn2</name > <value > north-192:9000</value > </property > <property > <name > dfs.namenode.http-address.cluster.nn1</name > <value > north-190:50070</value > </property > <property > <name > dfs.namenode.http-address.cluster.nn2</name > <value > north-192:50070</value > </property > <property > <name > dfs.namenode.lifeline.rpc-address.cluster.nn1</name > <value > north-190:8050</value > </property > <property > <name > dfs.namenode.lifeline.rpc-address.cluster.nn2</name > <value > north-192:8050</value > </property > <property > <name > dfs.namenode.lifeline.handler.count</name > <value > 10</value > </property > <property > <name > dfs.namenode.audit.log.async</name > <value > true</value > </property > <property > <name > dfs.permissions.enable</name > <value > true</value > </property > <property > <name > dfs.ha.fencing.methods</name > <value > shell(/bin/true)</value > </property > <property > <name > dfs.client.failover.proxy.provider.cluster</name > <value > org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value > </property > <property > <name > dfs.ha.automatic-failover.enabled</name > <value > true</value > </property > <property > <name > dfs.blocksize</name > <value > 256m</value > </property > <property > <name > dfs.datanode.fsdataset.volume.choosing.policy</name > <value > org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy</value > </property > <property > <name > dfs.datanode.du.reserved</name > <value > 107374182400</value > </property > <property > <name > dfs.checksum.type</name > <value > CRC32</value > </property > <property > <name > dfs.client.socket-timeout</name > <value > 600000</value > </property > <property > <name > dfs.datanode.socket.write.timeout</name > <value > 1200000</value > </property > <property > <name > dfs.datanode.max.transfer.threads</name > <value > 16384</value > </property > </configuration >
mapred-site.xml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 <configuration > <property > <name > mapreduce.framework.name</name > <value > yarn</value > </property > <property > <name > mapreduce.map.output.compress</name > <value > true</value > </property > <property > <name > mapreduce.client.submit.file.replication</name > <value > 3</value > </property > </configuration >
yarn-site.xml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 <configuration > <property > <name > yarn.nodemanager.local-dirs</name > <value > /data/data/yarn/nm-local-dir</value > </property > <property > <name > yarn.nodemanager.log-dirs</name > <value > /data/data/yarn/logs</value > </property > <property > <name > yarn.resourcemanager.ha.enabled</name > <value > true</value > </property > <property > <name > yarn.resourcemanager.cluster-id</name > <value > cluster1</value > </property > <property > <name > yarn.resourcemanager.ha.rm-ids</name > <value > rm1,rm2</value > </property > <property > <name > yarn.resourcemanager.hostname.rm1</name > <value > north-190</value > </property > <property > <name > yarn.resourcemanager.hostname.rm2</name > <value > north-193</value > </property > <property > <name > yarn.resourcemanager.webapp.address.rm1</name > <value > north-190:8088</value > </property > <property > <name > yarn.resourcemanager.webapp.address.rm2</name > <value > north-193:8088</value > </property > <property > <name > yarn.resourcemanager.zk-address</name > <value > north-190:2181,north-191:2181,north-192:2181,north-193:2181,north-194:2181</value > </property > <property > <name > yarn.resourcemanager.store.class</name > <value > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value > </property > <property > <name > yarn.resourcemanager.recovery.enabled</name > <value > true</value > </property > <property > <name > yarn.resourcemanager.recovery.enabled</name > <value > true</value > </property > <property > <name > yarn.nodemanager.aux-services</name > <value > mapreduce_shuffle</value > </property > <property > <name > yarn.nodemanager.resource.memory-mb</name > <value > 225280</value > </property > <property > <name > yarn.nodemanager.resource.cpu-vcores</name > <value > 40</value > </property > <property > <name > yarn.log-aggregation-enable</name > <value > true</value > <description > 开启application 日志聚合功能</description > </property > <property > <name > yarn.log-aggregation.retain-seconds</name > <value > 259200</value > <description > 设置聚合日志保存时间3天</description > </property > <property > <name > yarn.log-aggregation.retain-check-interval-seconds</name > <value > 86400</value > <description > 清理过期聚合日志程序的执行间隔时间</description > </property > <property > <name > yarn.nodemanager.remote-app-log-dir</name > <value > /yarn/logs</value > <description > 聚合日志在hdfs上的目录</description > </property > <property > <name > yarn.log.server.url</name > <value > http://cluster/yarn/jobhistory/logs</value > <description > 历史日志对应路径</description > </property > <property > <name > yarn.nodemanager.vmem-check-enabled</name > <value > false</value > </property > <property > <name > yarn.nodemanager.pmem-check-enabled</name > <value > false</value > </property > <property > <name > yarn.node-labels.fs-store.root-dir</name > <value > hdfs://cluster/yarn/node-labels/</value > </property > <property > <name > yarn.node-labels.enabled</name > <value > true</value > </property > <property > <name > yarn.nodemanager.address</name > <value > ip:45454</value > </property > </configuration >
hadoop-env.sh 1 2 3 4 5 # 避免pid在tmp目录被清除 export HADOOP_PID_DIR=/data/soft/hadoop/pid export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} export HADOOP_HEAPSIZE=4096 export HADOOP_NAMENODE_INIT_HEAPSIZE=2048
capacity-scheduler.xml yarn.scheduler.capacity.resource-calculator改成 DominantResourceCalculator
slave start-stop-script 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 $ZK_HOME/bin/zkServer.sh start $HADOOP_HOME/sbin/hadoop-daemon.sh start namenode $HADOOP_HOME/sbin/hadoop-daemon.sh start datanode $HADOOP_HOME/sbin/hadoop-daemon.sh start journalnode $HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc $HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager $HADOOP_HOME/sbin/yarn-daemon.sh start nodemanager $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver yarn rmadmin -getAllServiceState service mariadb start $SPARK_HOME/sbin/start-history-server.sh hbase-daemon.sh start master hbase-daemon.sh start master --backup hbase-daemon.sh start regionserver
1 2 3 4 5 6 7 8 9 10 hdfs namenode -format hdfs dfs -mkdir /input hdfs dfs -put README.txt /input hdfs dfs -ls /input hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.3.jar wordcount /input /output hdfs dfs -ls /output hdfs dfs -tail /output/part-r-00000 hdfs dfs -rmr /output /input
simple/kerberos跨集群客户端配置 1 2 3 4 5 6 7 8 9 10 11 12 13 14 <property> <name>ipc.client.fallback-to-simple-auth-allowed</name> <value>true</value> </property> <property> <name>fs.defaultFS</name> <value>hdfs://gdhlwtz</value> </property> <property> <name>hadoop.security.authentication</name> <value>kerberos</value> </property>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 <property> <name>dfs.nameservices</name> <value>gdhlwtz</value> </property> <property> <name>dfs.ha.namenodes.gdhlwtz</name> <value>nn1,nn2</value> </property> <property> <name>dfs.namenode.rpc-address.gdhlwtz.nn1</name> <value>xxx:8020</value> </property> <property> <name>dfs.namenode.rpc-address.gdhlwtz.nn2</name> <value>xxx:8020</value> </property> <property> <name>dfs.client.failover.proxy.provider.gdhlwtz</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <property> <name>dfs.namenode.kerberos.principal</name> <value>nn/_HOST@HLWKDC</value> </property>
spark snappy usage
1 2 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native:/usr/lib64 spark-shell --master local[1]
不配置下:使用本机资源 ./sbin/start-thriftserver.sh ./bin/beeline -u jdbc:hive2://localhost:10000 -n hadoop
1 2 3 4 5 6 7 8 9 10 CREATE TABLE parquet_test ( id int, str string) STORED AS PARQUET; insert into table parquet_test values(1,'a'),(2,'b'); select * from parquet_test; drop table parquet_test;
spark-defaults.conf cd jars/ zip spark-jar.zip ./* mv spark-jar.zip ../ cd .. chmod 644 spark-jar.zip hdfs dfs -put spark-jar.zip /spark/spark-jar355.zip
或者 hdfs dfs -mkdir /spark hdfs dfs -put jars/ /spark
1 2 3 4 5 6 7 8 9 10 11 12 spark.master=yarn #spark.yarn.jars=hdfs:///spark/jars/*.jar spark.yarn.archive hdfs:///spark/spark-jar.zip spark.serializer=org.apache.spark.serializer.KryoSerializer spark.sql.warehouse.dir=hdfs:///hive/warehouse spark.eventLog.enabled true spark.eventLog.dir hdfs:///spark/spark-history spark.eventLog.compress true spark.history.fs.logDirectory hdfs:///spark/spark-history spark.sql.hive.metastore.version=2.3.7
1 2 export SPARK_DIST_CLASSPATH=$(hadoop classpath)export HADOOP_CONF_DIR=$HADOOP_HOME /etc/hadoop
hive-site.xml 手动先创建数据库 create database hive character set latin1;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 <?xml version="1.0" encoding="UTF-8" standalone="no" ?> <?xml-stylesheet type="text/xsl" href="configuration.xsl" ?> <configuration > <property > <name > javax.jdo.option.ConnectionURL</name > <value > jdbc:mysql://ip:3306/hive?createDatabaseIfNotExist=true& characterEncoding=UTF-8& useSSL=false</value > </property > <property > <name > javax.jdo.option.ConnectionDriverName</name > <value > com.mysql.jdbc.Driver</value > </property > <property > <name > javax.jdo.option.ConnectionUserName</name > <value > root</value > </property > <property > <name > javax.jdo.option.ConnectionPassword</name > <value > xxx</value > </property > <property > <name > hive.metastore.schema.verification</name > <value > false</value > </property > <property > <name > datanucleus.schema.autoCreateAll</name > <value > true</value > </property > </configuration >
flink 1 2 3 4 5 ./bin/start-cluster.sh ./bin/flink run examples/streaming/WordCount.jar ./bin/flink run -m yarn-cluster examples/streaming/WordCount.jar tail log/flink-*-taskexecutor-*.out ./bin/stop-cluster.sh
操作示例
dolphinscheduler 限制sudo切换用户 root ALL=(ALL) ALL dolphinscheduler ALL=(hadoop) NOPASSWD:ALL
用户 主机=(用户:用户组) 命令
用户名或者用户组,表示谁有权限来使用后面的配置。%sudo代表sudo组下的所有用户
表示来源地,即从(远程)哪执行这条命令。ALL表示所有计算机
表示sudo可以切换到什么用户。ALL表示所有用户
表示sudo可以切换到哪些组下的用户。ALL表示所有组
表示sudo之后能够执行的命令。NOPASSWD:ALL表示执行任意命令都不需要密码1 2 3 4 yum -y install psmisc tar -xf ${binDS} folder=`tar -tf ${binDS} |head -1 | awk -F/ '{print $1}'` ln -s $folder dolphinscheduler
zk info to restart service
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 echo "ls /dolphinscheduler/nodes/master" |zkCli.sh -server <ip>:2181 |tail -2 vi ds-watcher.sh #!/bin/bash restart_master() { #pid=`pgrep -f org.apache.dolphinscheduler.server.master.MasterServer` /usr/bin/pkill -f org.apache.dolphinscheduler.server.master.MasterServer rm -f /data/dolphinscheduler-1.3.6/pid/dolphinscheduler-master-server.pid sleep 3s su dolphinscheduler -c "sh /data/dolphinscheduler-1.3.6/bin/dolphinscheduler-daemon.sh start master-server" } restart_worker() { #pid=`pgrep -f org.apache.dolphinscheduler.server.worker.WorkerServer` /usr/bin/pkill -f org.apache.dolphinscheduler.server.worker.WorkerServer rm -f /data/dolphinscheduler-1.3.6/pid/dolphinscheduler-worker-server.pid sleep 3s su dolphinscheduler -c "sh /data/dolphinscheduler-1.3.6/bin/dolphinscheduler-daemon.sh start worker-server" } # master ret=`java -cp /data/dolphinscheduler-1.3.6/ds-watcher.jar ds.ZkCheckNode master:2181,slave1:2181,slave2:2181 /dolphinscheduler/nodes/master/10.17.41.129:5678` if [ $ret = 'false' ] ;then restart_master fi # worker ret=`java -cp /data/dolphinscheduler-1.3.6/ds-watcher.jar ds.ZkCheckNode master:2181,slave1:2181,slave2:2181 /dolphinscheduler/nodes/worker/10.17.41.129/10.17.41.129:1234` if [ $ret = 'false' ] ;then restart_worker fi crontab -e */5 * * * * /bin/sh /data/dolphinscheduler-1.3.6/ds-watcher.sh
hbase 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 tar -xf hbase-2.4.6-bin.tar.gz folder=`tar -tf ${binHBase} |head -1 | awk -F/ '{print $1}'` ln -s $folder hbase [本地测试请配置hbase-site.xml加入hbase.rootdir] <property> <name>hbase.rootdir</name> <value>file:///opt/soft/hbase-data</value> </property> 手动启动: --config "${HBASE_CONF_DIR}" hbase-daemon.sh start master hbase-daemon.sh start regionserver hbase-daemon.sh start master --backup bin/start-hbase.sh ./bin/hbase shell create 'test', 'cf' list 'test' describe 'test' put 'test', 'row1', 'cf:a', 'value1' scan 'test' get 'test', 'row1' disable 'test' drop 'test'
hbase-site.xml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 <configuration > <property > <name > hbase.cluster.distributed</name > <value > true</value > </property > <property > <name > hbase.rootdir</name > <value > hdfs://cluster/hbase</value > </property > <property > <name > hbase.tmp.dir</name > <value > /data/data/hbase/tmp</value > </property > <property > <name > hbase.zookeeper.property.dataDir</name > <value > /data/data/hbase/zkdata</value > </property > <property > <name > hbase.zookeeper.quorum</name > <value > north-190:2181,north-191:2181,north-192:2181,north-193:2181,north-194:2181</value > </property > <property > <name > hbase.unsafe.stream.capability.enforce</name > <value > false</value > </property > </configuration >
zeppelin 1 2 3 4 5 6 7 tar xf zeppelin-0.10.0-bin-all.tgz cp conf/zeppelin-site.xml.template conf/zeppelin-site.xml vi conf/zeppelin-site.xml cp zeppelin-env.sh.template zeppelin-env.sh vi zeppelin-env.sh bin/zeppelin-daemon.sh start
setting source /etc/profile
hadoop vi etc/hadoop/core-site.xml
1 2 3 4 5 6 7 8 9 10 11 <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://localhost:9000</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/opt/hdfs/tmp</value> <description>temporary directories.</description> </property> </configuration>
vi etc/hadoop/hdfs-site.xml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 <configuration> <property> <name>dfs.replication</name> <value>1</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>/opt/hdfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/opt/hdfs/data</value> </property> </configuration>
vi hadoop_env.sh
1 2 3 4 5 6 7 # limit who can execute certain subcommands. export JAVA_HOME=/opt/soft/jdk export HDFS_NAMENODE_USER=root export HDFS_DATANODE_USER=root export HDFS_SECONDARYNAMENODE_USER=root export YARN_RESOURCEMANAGER_USER=root export YARN_NODEMANAGER_USER=root
分发&启动
1 2 3 4 5 6 7 8 9 10 11 12 13 #scp -r hadoop-3.2.1 root@hadoop1:/data1/ cd $HADOOP_HOME mkdir -p /opt/hdfs/name mkdir -p /opt/hdfs/data mkdir -p /opt/hdfs/tmp hdfs namenode -format start-dfs.sh http://192.168.56.101:9870/ start-yarn.sh http://192.168.56.101:8088/
TestCase
1 2 3 4 5 6 hdfs dfs -mkdir /in hdfs dfs -put README.txt /in hdfs dfs -ls /in hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar wordcount /in /out hdfs dfs -cat /out/part-r-00000 |head hdfs dfs -rmr /in /out
HA 启动
1 2 3 [主]hdfs zkfc -formatZK [主]hdfs namenode -format [备]hdfs namenode -bootstrapStandby
HA切换
1 2 3 4 5 6 7 nn1 -> nn2 hdfs haadmin -getAllServiceState hdfs haadmin -failover nn1 nn2 hdfs haadmin -getAllServiceState nn2 -> nn1 hdfs haadmin -failover nn2 nn1
spark 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 cd $SPARK_HOME hdfs dfs -mkdir -p /spark/lib hdfs dfs -mkdir -p /spark/spark-history hdfs dfs -put jars/* /spark/lib cp conf/spark-env.sh.template conf/spark-env.sh echo -e '\nexport HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop' >> conf/spark-env.sh cp conf/spark-defaults.conf.template conf/spark-defaults.conf echo -e '\n\n' >> conf/spark-defaults.conf echo 'spark.master=yarn' >> conf/spark-defaults.conf echo 'spark.yarn.jars=hdfs:///spark/lib' >> conf/spark-defaults.conf echo 'spark.serializer=org.apache.spark.serializer.KryoSerializer' >> conf/spark-defaults.conf echo 'spark.sql.warehouse.dir=hdfs:///user/hive/warehouse' >> conf/spark-defaults.conf echo 'spark.eventLog.enabled true' >> conf/spark-defaults.conf echo 'spark.eventLog.dir hdfs:///spark/spark-history' >> conf/spark-defaults.conf echo 'spark.eventLog.compress true' >> conf/spark-defaults.conf echo 'spark.history.fs.logDirectory hdfs:///spark/spark-history' >> conf/spark-defaults.conf ./bin/spark-submit --master yarn --class org.apache.spark.examples.SparkPi examples/jars/spark-examples*.jar 10 ./sbin/start-thriftserver.sh --driver-memory 2g --executor-memory 4g --executor-cores 5 --num-executors 5 ./bin/beeline -n root -u jdbc:hive2://localhost:10000
控制访问权限方案
thriftserver使用proxyuser (此方式可以共用sts进行有限制的读写)
zeppelin不使用proxyuser (此方式可以支持跟踪每个用户的sql但不能写)
hdfs设定目录权限给proxyuser
sbin/start-thriftserver.sh –master yarn –driver-cores 2 –driver-memory 6G –executor-cores 5 –executor-memory 6G –num-executors 10 –proxy-user zeppelin –conf spark.default.parallelism=80 –conf spark.sql.shuffle.partitions=80 –conf spark.sql.adaptive.enabled=true –conf spark.scheduler.mode=FAIR –conf spark.network.timeout=600s –conf spark.memory.fraction=0.8 –conf spark.dynamicAllocation.shuffleTracking.enabled=true –conf spark.dynamicAllocation.shuffleTracking.timeout=180000 –conf spark.dynamicAllocation.enabled=true –conf spark.dynamicAllocation.minExecutors=3 –conf spark.dynamicAllocation.maxExecutors=50 –hiveconf hive.server2.thrift.port=10199 –hiveconf hive.default.fileformat=Orc
1 2 3 4 5 6 7 8 9 create database t;use t; create table test(id int , name string ) stored as parquet; desc formatted test;insert into table test values (1 ,'a' ),(2 ,'b' );select * from test;
hive create db and user in mysql
1 2 3 4 create database hive; grant all on hive.* to hive@'%' identified by 'Hive!@#2023'; grant all on hive.* to hive@'localhost' identified by 'Hive!@#2023'; flush privileges;
Hive表设置支持中文注释、中文表数据导入
1 2 3 4 5 alter table COLUMNS_V2 modify column COMMENT varchar(256) character set utf8; alter table TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8; alter table PARTITION_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8; alter table PARTITION_KEYS modify column PKEY_COMMENT varchar(4000) character set utf8; alter table INDEX_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
还需要在mysql的hive中执行 hive安装包中TXN schema初始化sql
1 2 3 4 5 6 7 mv mysql-connector-java-5.1.48.jar $HIVE_HOME/lib cd $HIVE_HOME/conf #cp hive-env.sh.template hive-env.sh cp hive-default.xml.template hive-site.xml cp hive-log4j2.properties.template hive-log4j2.properties cp hive-exec-log4j2.properties.template hive-exec-log4j2.properties
vi hive-site.xml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 <?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://[IP]:3306/[DB]?characterEncoding=UTF-8&useSSL=false</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>hive</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>hive</value> </property> <property> <name>hive.metastore.uris</name> <value>thrift://[hostname]:9083</value> <description>Thrift uri for the remote metastore. Used by metastore client to connect to remote metastore.</description> </property> <property> <name>hive.metastore.schema.verification</name> <value>false</value> </property> </configuration>
in vi, update system var to absolute path :%s#${system:java.io.tmpdir}#/tmp/javaiotmp#g :%s#${system:user.name}#hive#g
1 2 3 4 5 6 hadoop fs -mkdir -p /user/hive/warehouse hadoop fs -mkdir -p /user/hive/tmp hadoop fs -mkdir -p /user/hive/log hadoop fs -chmod -R 777 /user/hive/warehouse hadoop fs -chmod -R 777 /user/hive/tmp hadoop fs -chmod -R 777 /user/hive/log
initialize
1 $HIVE_HOME/bin/schematool -dbType mysql -initSchema hive hive
standalone run
1 2 3 4 5 nohup $HIVE_HOME/bin/hiveserver2 & $HIVE_HOME/bin/beeline !connect jdbc:hive2://localhost:10000 hive hive
as meta service for spark copy hive-site.xml to $SPARK-HOME/conf
1 2 hive --service metastore & spark-sql
1 2 3 4 5 6 7 8 9 10 11 12 13 CREATE TABLE emp (empno int , name string )stored as PARQUET; insert into table emp values (1 ,'a' ),(2 ,'b' );CREATE TABLE info (age int , name string )stored as PARQUET; insert into table info values (11 ,'a' ),(22 ,'b' );
role
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 https://www.cnblogs.com/yszd/p/11086677.html set role admin; show roles; SHOW CURRENT ROLES; CREATE ROLE guest; show grant on all; show grant user manhua on database default; GRANT SELECT ON TABLE default.emp TO ROLE guest; grant select on database default to user manhua; GRANT ROLE guest TO USER hadoop; REVOKE ALL PRIVILEGES on default.emp from user hadoop; revoke role role_test1 from user jayliu; revoke ALL on database default from user lisi; revoke select on database default from user hive; revoke select on TABLE default.emp from user hadoop;
dolphinscheduler 1 2 3 4 5 useradd dolphinscheduler echo "dolphinscheduler" | passwd --stdin dolphinscheduler sed -i '$adolphinscheduler ALL=(ALL) NOPASSWD: NOPASSWD: ALL' /etc/sudoers sed -i 's/Defaults requirett/#Defaults requirett/g' /etc/sudoers chown -R dolphinscheduler:dolphinscheduler $folder
su dolphinscheduler
1 2 3 ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys chmod 600 ~/.ssh/authorized_keys
mysql -uroot -p
1 2 3 4 CREATE DATABASE dolphinscheduler DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler' @'%' IDENTIFIED BY 'ds' ;GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler' @'localhost' IDENTIFIED BY 'ds' ; flush privileges;
vi conf/datasource.properties
1 2 3 4 5 6 7 8 SPRING_DATASOURCE_URL="jdbc:mysql://10.3.16.120:3306,10.3.16.121:3306/dolphinscheduler?autoReconnect=true&useUnicode=true&characterEncoding=utf-8&failOverReadOnly=false&useSSL=false" SPRING_DATASOURCE_DRIVER_CLASS_NAME=com.mysql.cj.jdbc.Driver spring.datasource.driver-class-name=com.mysql.jdbc.Driver spring.datasource.url="jdbc:mysql://localhost:3306/dolphinscheduler?useUnicode=true&characterEncoding=UTF-8&useSSL=false" spring.datasource.username=dolphinscheduler spring.datasource.password=ds
字段中文乱码 ALTER TABLE t_ds_project
CHANGE description
description
VARCHAR(255) CHARACTER SET UTF8 COLLATE utf8_general_ci;
download mysql-jar 5.1.47 to lib
link
1 2 3 4 5 vi conf/env/dolphinscheduler_env.sh vi install.sh cp core-site.xml hdfs-site.xml TO conf
frequent usage start 1 2 3 4 5 6 7 8 9 10 11 12 13 pushd $HADOOP_HOME sbin/start-all.sh popd pushd $ZK_HOME bin/zkServer.sh start popd service mariadb start pushd /opt/soft/dolphinscheduler sudo -u dolphinscheduler script/start-all.sh popd
http://[ip]:12345/dolphinscheduler/#/home
stop 1 2 3 4 5 6 7 8 9 10 11 pushd /opt/soft/dolphinscheduler sudo -u dolphinscheduler script/stop-all.sh popd pushd $ZK_HOME bin/zkServer.sh stop popd pushd $HADOOP_HOME sbin/stop-all.sh popd