3

离线方式安装高可用RKE2 (版本: v1.22.13+rke2r1)记录 - 奔跑的运维

 1 year ago
source link: https://www.cnblogs.com/zhaojli/p/16653553.html
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
  1. 想要了解RKE2可以到官网(https://docs.rke2.iohttps://docs.rancher.cn/docs/rke2/_index/)看最新资料
  2. 用官网给出的离线安装(https://docs.rke2.io/install/airgap/)方式可能有坑, 节点拉起时安装网络插件必须使用到网络,没有网络节点一直时Notready的状态, 有可能也和我部署的版本有关系, 大家也可以跳过给containerd设置的步骤,直接部署试试
  3. rke2-images.linux-amd64.tar.zst,rke2.linux-amd64.tar.gz,sha256sum-amd64.txt下载地址见 https://github.com/rancher/rke2/releases
  4. 如果发现有啥错误,欢迎评论区打扰

初始化节点

配置主机名信息,添加本地hosts解析

hostnamectl set-hostname rke2-master-default-loadblance
hostnamectl set-hostname rke2-master-default-nodepool-1
hostnamectl set-hostname rke2-master-default-nodepool-2
hostnamectl set-hostname rke2-master-default-nodepool-3
hostnamectl set-hostname rke2-node-default-nodepool-1
hostnamectl set-hostname rke2-node-default-nodepool-2
hostnamectl set-hostname rke2-node-default-nodepool-3
hostnamectl set-hostname rke2-node-default-nodepool-4

修改所有节点hosts

172.17.0.50  rke2-master-default-loadblance
172.17.0.51  rke2-master-default-nodepool-1
172.17.0.52  rke2-master-default-nodepool-2
172.17.0.53  rke2-master-default-nodepool-3
172.17.0.54  rke2-node-default-nodepool-1
172.17.0.55  rke2-node-default-nodepool-2
172.17.0.56  rke2-node-default-nodepool-3
172.17.0.57  rke2-node-default-nodepool-4

重新生成机器ID并重启

cp -rf /dev/null /etc/machine-id
systemd-machine-id-setup
reboot

所有节点安装需要的包

systemctl stop firewalld; systemctl disable firewalld; systemctl stop dnsmasq; systemctl disable dnsmasq; systemctl stop ntpd; systemctl disable ntpd; systemctl stop postfix; systemctl disable postfix;
iptables -F && iptables -X && iptables -F -t nat && iptables -X -t nat && iptables -P FORWARD ACCEPT
swapoff -a; sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab; 
setenforce 0
sed -i '/SELINUX/s/enforcing/disabled/' /etc/selinux/config
yum -y install epel-release; yum -y install chrony curl wget vim sysstat net-tools openssl openssh lsof socat nfs-utils conntrack ipvsadm ipset iptables sysstat libseccomp; systemctl disable rpcbind 

调整limit

cat >> /etc/security/limits.conf << EOF
* soft nproc  65535
* hard nproc  65535
* soft nofile 65535
* hard nofile 65535
EOF

调整NetworkManager

cat >>  /etc/NetworkManager/conf.d << EOF
[keyfile]
unmanaged-devices=interface-name:cali*;interface-name:flannel*
EOF
curl -LO https://storage.corpintra.plus/elrepo/kernel/kernel-lt-5.4.134-1.el7.elrepo.x86_64.rpm
curl -LO https://storage.corpintra.plus/elrepo/kernel/kernel-lt-devel-5.4.134-1.el7.elrepo.x86_64.rpm
yum install kernel-lt-* -y
grub2-set-default  0
grub2-mkconfig -o /etc/grub2.cfg
grubby --default-kernel 
reboot

导入ipvs模块

cat > /etc/modules-load.d/ipvs.conf << EOF
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_fo
ip_vs_nq
ip_vs_sed
ip_vs_ftp
nf_conntrack
br_netfilter
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
overlay
EOF

systemctl enable --now systemd-modules-load.service

修改内核并重启所有节点

cat >  /etc/sysctl.d/kubernetes.conf <<EOF
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_intvl = 30
net.ipv4.tcp_keepalive_probes = 10
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
net.ipv6.conf.lo.disable_ipv6 = 1
net.ipv4.neigh.default.gc_stale_time = 120
net.ipv4.conf.all.rp_filter = 0
net.ipv4.conf.default.rp_filter = 0
net.ipv4.conf.default.arp_announce = 2
net.ipv4.conf.lo.arp_announce = 2
net.ipv4.conf.all.arp_announce = 2
net.ipv4.ip_forward = 1
net.ipv4.tcp_max_tw_buckets = 5000
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 1024
net.ipv4.tcp_synack_retries = 2
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-arptables = 1
net.netfilter.nf_conntrack_max = 2310720
fs.inotify.max_user_watches=89100
fs.file-max = 52706963
fs.nr_open = 52706963
vm.swappiness = 0
vm.overcommit_memory=1
kernel.panic=10
EOF

reboot

Haproxy节点配置

yum install haproxy -y

写入以下配置到/etc/haproxy/haproxy.cfg

global
    log         127.0.0.1 local2
    chroot      /var/lib/haproxy
    pidfile     /var/run/haproxy.pid
    maxconn     6000
    user        haproxy
    group       haproxy
    daemon
    stats socket /var/lib/haproxy/stats

defaults
    mode                    tcp
    log                     global
    option                  tcplog
    option                  dontlognull
    option                  redispatch
    retries                 3
    timeout http-request    10s
    timeout queue           1m
    timeout connect         10s
    timeout client          1m
    timeout server          1m
    timeout http-keep-alive 10s
    timeout check           10s
    maxconn                 3000

listen stats
    bind 0.0.0.0:9000
    mode  http
    stats uri /status
    stats refresh 30s
    stats realm "Haproxy Manager"
    stats auth admin:admin
    stats hide-version
    stats admin if TRUE


frontend  rke2-server
   bind *:9345
   mode tcp
   default_backend      rke2-server

backend   rke2-server
    balance     roundrobin
    mode        tcp
    server      rke2-master-default-nodepool-1 172.17.0.51:9345 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3
    server      rke2-master-default-nodepool-2 172.17.0.52:9345 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3
    server      rke2-master-default-nodepool-3 172.17.0.53:9345 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3


frontend  rke2-apiserver
   bind *:6443
   mode tcp
   default_backend      rke2-apiserver

backend   rke2-apiserver
    balance     roundrobin
    mode        tcp
    server      rke2-master-default-nodepool-1 172.17.0.51:6443 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3
    server      rke2-master-default-nodepool-2 172.17.0.52:6443 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3
    server      rke2-master-default-nodepool-3 172.17.0.53:6443 check weight 1 maxconn 1000 check inter 2000 rise 2 fall 3

启动haproxy

systemctl enable --now haproxy.service

RKE2-Server初始化

在所有Master节点上执行资源下载以及安装

groupadd -r etcd
useradd -r -g etcd -s /bin/false -d /var/lib/etcd etcd
mkdir -p /etc/rke2/config
curl -L https://storage.corpintra.plus/rke2/audit-policy.yaml -o /etc/rke2/config/audit-policy.yaml

cat > /etc/sysconfig/rke2-server <<EOF
CONTAINERD_HTTP_PROXY=http://192.168.1.10:3128
CONTAINERD_HTTPS_PROXY=http://192.168.1.10:3128
CONTAINERD_NO_PROXY=127.0.0.0/8,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,.svc,.cluster.local
EOF

mkdir /root/rke2-artifacts
cd /root/rke2-artifacts/
curl -LO  https://storage.corpintra.plus/rke2/v1.22.13+rke2r1/rke2-images.linux-amd64.tar.zst
curl -LO  https://storage.corpintra.plus/rke2/v1.22.13+rke2r1/rke2.linux-amd64.tar.gz
curl -LO  https://storage.corpintra.plus/rke2/v1.22.13+rke2r1/sha256sum-amd64.txt
curl -sfL https://storage.corpintra.plus/rke2/install.sh | INSTALL_RKE2_TYPE=server INSTALL_RKE2_MIRROR=cn INSTALL_RKE2_ARTIFACT_PATH=/root/rke2-artifacts INSTALL_RKE2_VERSION=v1.22.13+rke2r1 sh -

配置第一台Master节点

常用的参数见: https://docs.rke2.io/install/install_options/server_config/
网络选择: https://docs.rke2.io/install/network_options/
如果你想禁用一些RKE2自带的Chart: https://docs.rke2.io/advanced/#disabling-server-charts

mkdir -p /etc/rancher/rke2
cat >  /etc/rancher/rke2/config.yaml <<EOF
write-kubeconfig-mode: "0644"
#server: https://172.17.0.50:9345 #等三台都起来后把这个配置取消注释,重启下rke2-server
tls-san:
  - "127.0.0.1"
  - "172.17.0.50"
  - "172.17.0.51"
  - "172.17.0.52"
  - "172.17.0.53"
  - "rke2-master-default-loadblance"
  - "rke2-master-default-nodepool-1"
  - "rke2-master-default-nodepool-2"
  - "rke2-master-default-nodepool-3"
cni: "calico"
profile: "cis-1.6"
cluster-cidr: 10.244.0.0/16
service-cidr: 10.96.0.0/16
disable-cloud-controller: true
kube-proxy-arg:
- "proxy-mode=ipvs"
kubelet-arg:
- "max-pods=110"
node-taint:
- "node-role.kubernetes.io/control-plane=true:NoSchedule"
audit-policy-file: "/etc/rke2/config/audit-policy.yaml"
etcd-snapshot-schedule-cron: "0 */4 * * *"
etcd-snapshot-retention: "84"
#disable: 
#- "rke2-ingress-nginx"
#- "rke2-metrics-server"
EOF

启动第一台节点(需要耐心等一等,很慢,如果遇到启动失败的,重启一次,一般重启一次能解决 )

systemctl enable --now rke2-server.service

配置第二台Master节点

mkdir -p /etc/rancher/rke2
cat >  /etc/rancher/rke2/config.yaml <<EOF
write-kubeconfig-mode: "0644"
token: <token for server node> ##填写第一个server节点的token,通过在第一个master节点查看/var/lib/rancher/rke2/server/token文件获得;
server: https://172.17.0.50:9345
tls-san:
  - "127.0.0.1"
  - "172.17.0.50"
  - "172.17.0.51"
  - "172.17.0.52"
  - "172.17.0.53"
  - "rke2-master-default-loadblance"
  - "rke2-master-default-nodepool-1"
  - "rke2-master-default-nodepool-2"
  - "rke2-master-default-nodepool-3"
cni: "calico"
profile: "cis-1.6"
cluster-cidr: 10.244.0.0/16
service-cidr: 10.96.0.0/16
disable-cloud-controller: true
kube-proxy-arg:
- "proxy-mode=ipvs"
kubelet-arg:
- "max-pods=110"
node-taint:
- "node-role.kubernetes.io/control-plane=true:NoSchedule"
audit-policy-file: "/etc/rke2/config/audit-policy.yaml"
etcd-snapshot-schedule-cron: "0 */4 * * *"
etcd-snapshot-retention: "84"
etcd-snapshot-dir: "/var/lib/etcd-snapshots"
#disable: 
#- "rke2-ingress-nginx"
#- "rke2-metrics-server"
EOF

启动第二台节点, 比较慢,耐心等等

systemctl enable --now rke2-server.service

配置第三台Master节点

mkdir -p /etc/rancher/rke2
cat >  /etc/rancher/rke2/config.yaml <<EOF
write-kubeconfig-mode: "0644"
token: <token for server node> ##填写第一个server节点的token,通过在第一个master节点查看/var/lib/rancher/rke2/server/token文件获得;
server: https://172.17.0.50:9345
tls-san:
  - "127.0.0.1"
  - "172.17.0.50"
  - "172.17.0.51"
  - "172.17.0.52"
  - "172.17.0.53"
  - "rke2-master-default-loadblance"
  - "rke2-master-default-nodepool-1"
  - "rke2-master-default-nodepool-2"
  - "rke2-master-default-nodepool-3"
cni: "calico"
profile: "cis-1.6"
cluster-cidr: 10.244.0.0/16
service-cidr: 10.96.0.0/16
disable-cloud-controller: true
kube-proxy-arg:
- "proxy-mode=ipvs"
kubelet-arg:
- "max-pods=110"
node-taint:
- "node-role.kubernetes.io/control-plane=true:NoSchedule"
audit-policy-file: "/etc/rke2/config/audit-policy.yaml"
etcd-snapshot-schedule-cron: "0 */4 * * *"
etcd-snapshot-retention: "84"
etcd-snapshot-dir: "/var/lib/etcd-snapshots"
#disable: 
#- "rke2-ingress-nginx"
#- "rke2-metrics-server"
EOF

启动第三台节点, 比较慢,耐心等等

systemctl enable --now rke2-server.service

等所有Master节点起来后执行下面命令检查

/var/lib/rancher/rke2/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml get nodes -o wide

RKE2-Agent节点安装

挨台安装Agent节点

cat > /etc/sysconfig/rke2-agent <<EOF
CONTAINERD_HTTP_PROXY=http://192.168.1.10:3128
CONTAINERD_HTTPS_PROXY=http://192.168.1.10:3128
CONTAINERD_NO_PROXY=127.0.0.0/8,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,.svc,.cluster.local
EOF


mkdir /root/rke2-artifacts
cd /root/rke2-artifacts/
curl -LO  https://storage.corpintra.plus/rke2/v1.22.13+rke2r1/rke2-images.linux-amd64.tar.zst
curl -LO  https://storage.corpintra.plus/rke2/v1.22.13+rke2r1/rke2.linux-amd64.tar.gz
curl -LO  https://storage.corpintra.plus/rke2/v1.22.13+rke2r1/sha256sum-amd64.txt
curl -sfL https://storage.corpintra.plus/rke2/install.sh | INSTALL_RKE2_TYPE=agent INSTALL_RKE2_MIRROR=cn INSTALL_RKE2_ARTIFACT_PATH=/root/rke2-artifacts INSTALL_RKE2_VERSION=v1.22.13+rke2r1 sh -

mkdir -p /etc/rancher/rke2
cat >  /etc/rancher/rke2/config.yaml <<EOF
server: https://172.17.0.50:9345
token: <token for server node> ##填写第一个server节点的token,通过在第一个master节点查看/var/lib/rancher/rke2/server/token文件获得;
cluster-cidr: 10.244.0.0/16
service-cidr: 10.96.0.0/16
kube-proxy-arg:
- "proxy-mode=ipvs"
kubelet-arg:
- "max-pods=110"
EOF


systemctl enable rke2-agent.service --now

等所有Node节点起来后执行下面命令检查

/var/lib/rancher/rke2/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml get nodes -o wide

RKE2默认使用containerd作为Runtime,如果想要查看节点上运行的容器,可以使用下面的命令:

export CRI_CONFIG_FILE=/var/lib/rancher/rke2/agent/etc/crictl.yaml && /var/lib/rancher/rke2/bin/crictl ps

安装存储插件(可选)

curl -LO https://storage.corpintra.plus/kubernetes/charts/csi-driver-smb-v1.9.0.tgz
helm upgrade csi-driver-smb \
   --namespace kube-system \
   --create-namespace \
   --debug \
   --wait \
   --install \
   --atomic \
   --set image.baseRepo="registry.cn-hangzhou.aliyuncs.com/kube-sig-storage" \
   --set image.smb.repository="registry.cn-hangzhou.aliyuncs.com/kube-sig-storage/smbplugin" \
   --set image.smb.tag="v1.9.0" \
   --set image.csiProvisioner.repository="registry.cn-hangzhou.aliyuncs.com/kube-sig-storage/csi-provisioner" \
   --set image.csiProvisioner.tag="v3.2.0" \
   --set image.livenessProbe.repository="registry.cn-hangzhou.aliyuncs.com/kube-sig-storage/livenessprobe" \
   --set image.livenessProbe.tag="v2.7.0" \
   --set image.nodeDriverRegistrar.repository="registry.cn-hangzhou.aliyuncs.com/kube-sig-storage/csi-node-driver-registrar" \
   --set image.nodeDriverRegistrar.tag="v2.5.1" \
   --set controller.replicas=2 \
   ./csi-driver-smb-v1.9.0.tgz
   

   
curl -LO https://storage.corpintra.plus/kubernetes/charts/csi-driver-nfs-v4.1.0.tgz   
helm upgrade csi-driver-nfs \
   --namespace kube-system \
   --create-namespace \
   --debug \
   --wait \
   --install \
   --atomic \
   --set image.nfs.repository="registry.cn-hangzhou.aliyuncs.com/kube-sig-storage/nfsplugin" \
   --set image.nfs.tag="v4.1.0" \
   --set image.csiProvisioner.repository="registry.cn-hangzhou.aliyuncs.com/kube-sig-storage/csi-provisioner" \
   --set image.csiProvisioner.tag="v3.2.0" \
   --set image.livenessProbe.repository="registry.cn-hangzhou.aliyuncs.com/kube-sig-storage/livenessprobe" \
   --set image.livenessProbe.tag="v2.7.0" \
   --set image.nodeDriverRegistrar.repository="registry.cn-hangzhou.aliyuncs.com/kube-sig-storage/csi-node-driver-registrar" \
   --set image.nodeDriverRegistrar.tag="v2.5.1" \
   --set controller.replicas=2 \
   ./csi-driver-nfs-v4.1.0.tgz

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK