Contents
  1. 1. InitEnv
    1. 1.1. Create
    2. 1.2. prepare
    3. 1.3. dev
      1. 1.3.1. need root
      2. 1.3.2. user
        1. 1.3.2.1. hadoop
        2. 1.3.2.2. hive
        3. 1.3.2.3. spark
        4. 1.3.2.4. bak
    4. 1.4. snapshot
  2. 2. 问题
    1. 2.1. 通过迁移 更改WSL存储目录
    2. 2.2. 配置磁盘-重启后有问题
  3. 3. mvn+git

InitEnv

Create

1
2
3
4
5
6
7
8
 # 重装
wsl --shutdown
wsl -l -v
wsl --unregister Ubuntu
wsl -l --online
wsl --install Ubuntu-24.04 --name Ubuntu --location E:\runtime\wsl-ubuntu\
wsl --set-default Ubuntu
wsl -d Ubuntu

prepare

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
sudo su
# dns
echo -e "\n[network]\ngenerateResolvConf=false" >> /etc/wsl.conf
cat /etc/wsl.conf
echo "nameserver 223.5.5.5" > /etc/resolv.conf
ping -c 2 www.baidu.com

# 修改源
cat /etc/lsb-release
cp /etc/apt/sources.list.d/ubuntu.sources /etc/apt/sources.list.d/ubuntu.sources.bak
sed -i \
-e 's|http://archive.ubuntu.com|https://mirrors.aliyun.com|g' \
-e 's|http://security.ubuntu.com|https://mirrors.aliyun.com|g' \
/etc/apt/sources.list.d/ubuntu.sources
apt update

apt install -y net-tools openssh-server
systemctl enable ssh
service ssh start

dev

need root

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
apt install openjdk-8-jdk maven

# mysql. root用户直接mysql进入
apt install mysql-server-8.0
systemctl status mysql
systemctl disable mysql
service mysql status
## service mysql start

create database hive character set latin1;
CREATE USER 'hive'@'%' IDENTIFIED BY 'hive';
GRANT ALL PRIVILEGES ON *.* TO 'hive'@'%';
flush privileges;

# profile
INSTALL_PATH=/data/soft
mkdir -p $INSTALL_PATH
chown manhua /data
chown manhua $INSTALL_PATH
exit

user

INSTALL_PATH=/data/soft && cd $INSTALL_PATH

hadoop

1
2
3
4
5
6
7
8
9
10
11
12
cd $INSTALL_PATH
#wget https://mirrors.ustc.edu.cn/apache/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
cp /mnt/e/pkg/hadoop-3.3.6.tar.gz .
tar -zxf hadoop-3.3.6.tar.gz
ln -s hadoop-3.3.6 hadoop
rm hadoop-3.3.6.tar.gz

echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> ~/.bashrc
echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> ~/.bashrc
echo "export HADOOP_HOME=$INSTALL_PATH/hadoop" >> ~/.bashrc
echo "export PATH=\$HADOOP_HOME/bin:\$PATH" >> ~/.bashrc

hive

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#wget https://archive.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz
cp /mnt/e/pkg/apache-hive-2.3.9-bin.tar.gz .
tar -zxf apache-hive-2.3.9-bin.tar.gz
ln -s apache-hive-2.3.9-bin hive
rm apache-hive-2.3.9-bin.tar.gz

wget https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.30/mysql-connector-java-8.0.30.jar
mv mysql-connector-java-8.0.30.jar $INSTALL_PATH/hive/lib

echo "export HIVE_HOME=$INSTALL_PATH/hive" >> ~/.bashrc
echo "export PATH=\$HIVE_HOME/bin:\$PATH" >> ~/.bashrc

tee $INSTALL_PATH/hive/conf/hive-site.xml > /dev/null <<'EOF'
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- Hive Metastore Database Configuration -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true&amp;useSSL=false&amp;allowPublicKeyRetrieval=true</value>
</property>

<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
</property>

<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>

<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hive</value>
</property>

<property>
<name>hive.metastore.uris</name>
<value>thrift://localhost:9083</value>
</property>

<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>

<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>

<!-- Hive Server2 Configuration -->
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>

<property>
<name>hive.server2.thrift.bind.host</name>
<value>localhost</value>
</property>

<!-- Other Important Configurations -->
<property>
<name>hive.exec.scratchdir</name>
<value>/tmp/hive</value>
</property>

<property>
<name>hive.exec.local.scratchdir</name>
<value>/tmp/hive/local</value>
</property>

<property>
<name>hive.downloaded.resources.dir</name>
<value>/tmp/hive/resources</value>
</property>

<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
</property>
</configuration>
EOF

部署Hadoop后执行:$INSTALL_PATH/hive/bin/schematool -dbType mysql -initSchema

nohup hive --service metastore -p 9083 > /tmp/hive-metastore.log 2>&1 &

spark

1
2
3
4
5
6
7
8
9
10
11
12
13
cd $INSTALL_PATH
#wget https://dlcdn.apache.org/spark/spark-3.5.6/spark-3.5.6-bin-hadoop3.tgz
cp /mnt/e/pkg/spark-3.5.6-bin-hadoop3.tgz .
tar -zxf spark-3.5.6-bin-hadoop3.tgz
ln -s spark-3.5.6-bin-hadoop3 spark
rm spark-3.5.6-bin-hadoop3.tgz

echo "export SPARK_HOME=$INSTALL_PATH/spark" >> ~/.bashrc
echo "export PATH=\$SPARK_HOME/bin:\$PATH" >> ~/.bashrc

cp $INSTALL_PATH/hive/conf/hive-site.xml $INSTALL_PATH/spark/conf
cp $INSTALL_PATH/hive/lib/mysql-connector-java-8.0.30.jar $INSTALL_PATH/spark/jars

bak

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
profile="~/.bashrc"
# profile="/etc/profile"

echo -e '\n\n#Java' >> ${profile}
echo 'export JAVA_HOME='${INSTALL_PATH}'/jdk' >> ${profile}
echo 'export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH' >> /etc/profile
echo 'export CLASSPATH=$CLASSPATH:.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib' >> ${profile}

echo -e '\n#maven' >> ${profile}
echo 'export MVN_HOME='${INSTALL_PATH}'/maven' >> ${profile}
echo 'export PATH=$MVN_HOME/bin:$PATH' >> ${profile}

echo -e '\n#Hadoop' >> ${profile}
echo 'export HADOOP_HOME='${INSTALL_PATH}'/hadoop' >> ${profile}
echo 'export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH' >> ${profile}
echo 'export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop' >> ${profile}

echo -e '\n#Spark' >> ${profile}
echo 'export SPARK_HOME='${INSTALL_PATH}'/spark' >> ${profile}
echo 'export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH' >> ${profile}


# git 默认有

# java mvn hadoop spark
cd /data/soft
wget https://mirrors.huaweicloud.com/openjdk/21/openjdk-21_linux-x64_bin.tar.gz
wget https://dlcdn.apache.org/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz
wget https://mirrors.ustc.edu.cn/apache/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
wget https://dlcdn.apache.org/spark/spark-3.5.6/spark-3.5.6-bin-hadoop3.tgz
wget https://mirrors.huaweicloud.com/apache/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz

binJDK=openjdk-21_linux-x64_bin.tar.gz
binMvn=apache-maven-3.9.9-bin.tar.gz
binHadoop=hadoop-3.3.6.tar.gz
binSpark=spark-3.5.6-bin-hadoop3.tgz
binHive=apache-hive-2.3.9-bin.tar.gz
tar -xzf ${binJDK} && folder=`tar -tf ${binJDK} |head -1|awk -F'/' '{print $1}'` && ln -s $folder jdk
tar -xzf ${binMvn} && folder=`tar -tf ${binMvn} |head -1|awk -F'/' '{print $1}'` &&ln -s $folder maven
tar -xzf ${binHadoop} && folder=`tar -tf ${binHadoop} |head -1` &&ln -s $folder hadoop
tar -xzf ${binSpark} && folder=`tar -tf ${binSpark} |head -1` && ln -s $folder spark
tar -xzf ${binHive} && folder=`tar -tf ${binHive} |grep -vE '^\.'|head -1|awk -F'/' '{print $1}'` && ln -s $folder hive
rm ${binJDK}
rm ${binMvn}
rm ${binHadoop}
rm ${binSpark}
rm ${binHive}

## mvn
mkdir -p ~/.m2/
vi ~/.m2/settings.xml

## wget https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar
wget https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.30/mysql-connector-java-8.0.30.jar
cp mysql-connector-java-8.0.30.jar /data/soft/spark/jars
mv mysql-connector-java-8.0.30.jar /data/soft/hive/lib
cd /data/soft/spark/conf
cp spark-defaults.conf.template spark-defaults.conf
vi spark-defaults.conf

vi hive-site.xml
cp /data/soft/spark/conf/hive-site.xml /data/soft/hive/conf
/data/soft/hive/bin/schematool -dbType mysql -initSchema hive hive

sudo shutdown now

snapshot

1
2
3
4
5
6
7
wsl -l  -v
wsl --export Ubuntu-hadoop E:\runtime\wsl-ubuntu-hadoop.tar

wsl --import Ubuntu-hadoop E:\runtime\wsl-ubuntu-hadoop\ E:\runtime\wsl-ubuntu-hadoop.tar --version 2
#wsl --setdefault Ubuntu
#Ubuntu-hadoop config --default-user manhua
wsl -d Ubuntu-hadoop

问题

本地磁盘的文件权限等有问题,建议复制到linux内运行

通过迁移 更改WSL存储目录

1
2
3
4
5
6
7
8
wsl -l  -v
wsl --shutdown
wsl --export Ubuntu E:\runtime\wsl-ubuntu.tar
wsl --unregister Ubuntu

wsl --import Ubuntu E:\runtime\wsl-ubuntu\ E:\runtime\wsl-ubuntu.tar --version 2
wsl --setdefault Ubuntu
Ubuntu config --default-user manhua

配置磁盘-重启后有问题

  • 我的电脑–右键–管理–磁盘管理-点击一下任意磁盘明细- 操作–创建VHD
  • PowerShell执行Get-Disk获得磁盘序号(磁盘管理界面也有显示) 或者 GET-CimInstance -query "SELECT * from Win32_DiskDrive"
  • wsl --mount \\.\PhysicalDrive2 --bare 卸载wsl --unmount \\.\PhysicalDrive2
  • lsblk
  • sudo mkfs -t ext4 /dev/sde
  • sudo blkid /dev/sde
  • sudo mkdir -p /data
  • sudo mount /dev/sde /data

[[mvn#setting文件]]

mvn+git

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
wget https://dlcdn.apache.org/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz
tar -xvzf apache-maven-3.9.9-bin.tar.gz
mv apache-maven-3.9.9 /opt/maven
cd /opt/maven/conf
vi settings.xml

git clone --depth 1 -b my0.12.0 https://github.com/kevinjmh/zeppelin.git
cd zeppelin
git branch -a
export MAVEN_OPTS="-Xmx3g -XX:MaxMetaspaceSize=512m"
mvn clean install -P build-distr -P spark-scala-2.12 -Pspark-3.5 -DskipTests

git fetch --depth 1 origin branch-0.12
git checkout -b branch-0.12 origin/branch-0.12


# 若github访问有问题,可下载压缩包后编译