6

AWS EKS 创建k8s生产环境实例 - Elvin™

 1 year ago
source link: https://www.cnblogs.com/elvi/p/16549069.html
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

#AWS EKS 创建k8s生产环境实例


  • 在AWS部署海外节点, 图简单使用web控制台创建VPC和k8s集群出错(k8s), 使用cli命令行工具创建成功
  • 本实例为复盘, 记录aws命令行工具创建eks, 安装efs驱动、LBS、ingress-nginx,使用ECR镜像储存等

#安装命令行工具

#安装aws cli
cd /tmp 
curl -kL "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
sudo ./aws/install
aws --version
#配置aws key
aws configure
#查看配置
aws configure list

#安装kubectl
curl -o kubectl https://s3.us-west-2.amazonaws.com/amazon-eks/1.22.6/2022-03-09/bin/linux/amd64/kubectl
chmod +x ./kubectl
mv kubectl /usr/local/bin
kubectl version --short --client

#安装eksctl
curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp
sudo mv /tmp/eksctl /usr/local/bin
eksctl version

#创建VPC网络和子网

#创建VPC网络和子网已单独发帖
https://www.cnblogs.com/elvi/p/16542406.html

#创建k8s集群

#env
k8s_name=aws-k8s
Region=ap-southeast-1 #新加坡 
#获取aws账户id
OwnerId=$(aws ec2 describe-vpcs --region ${Region} |jq -r ".Vpcs[0].OwnerId")
#使用已有子网 
private-subnets-id="subnet-lan-a-xxx,subnet-lan-b-xxx"
public-subnets-id="subnet-public-a-xxx,subnet-public-b-xxx"
# k8s cluster 
eksctl create cluster \
 --region ${Region} \
 --name ${k8s_name} \
 --version 1.22 \
 --vpc-private-subnets ${private-subnets-id} \
 --vpc-public-subnets  ${public-subnets-id} \
 --managed \
 --without-nodegroup \
 --dry-run

# 查看
eksctl get cluster --name ${k8s_name} --region ${Region}

# 出错或不要了,可删除
# eksctl delete cluster --name=${k8s_name}

# --dry-run 试运行,正式创建时去掉
# --without-nodegroup 不创建node节点
# --vpc-xx 添加已有网络,若不指定会自动创建
# 建议使用多个可用区网络,k8s集群创建后无法更改
# eksctl create cluster --help #查看帮助

#创建k8s计算节点组

#创建b区k8s节点
#k8s nodegroup  test
eksctl create nodegroup \
 --region ${Region} \
 --cluster ${k8s_name} \
 --name k8s-work-test  \
 --node-type m5.large \
 --nodes 1 \
 --nodes-min 1 \
 --nodes-max 10 \
 --instance-name test-node-b \
 --node-ami-family Ubuntu2004 \
 --node-private-networking \
 --node-zones ${Region}b \
 --node-security-groups sg-xxxxxxx \
 --ssh-access \
 --ssh-public-key aws-bastion \
 --full-ecr-access \
 --managed \
 --dry-run

# --nodes 1 创建1个node节点, 规格 m5.large 2核8G
# --node-ami-family Ubuntu2004 操作系统Ubuntu20.04
# --node-private-networking 使用私有子网
# --node-zones 可用区
# --node-security-groups 使用已创建的安全组
# --full-ecr-access ECR镜像仓库权限,一定要
# eksctl create nodegroup --help #查看帮助

#节点扩容
eksctl scale nodegroup --region ${Region} \
 --cluster ${k8s_name} --nodes=2 --name k8s-work-test

# 测试正常就可以删除, 创建配置更高的正式节点
# delete node
# eksctl delete nodegroup --cluster=${k8s_name} --name=k8s-work-test

#创建b区正式节点组 
eksctl create nodegroup \
 --region ${Region} \
 --cluster ${k8s_name} \
 --name k8s-work-b  \
 --node-type m5.4xlarge \
 --nodes 2 \
 --nodes-min 1 \
 --nodes-max 10 \
 --instance-name k8s-node-b \
 --max-pods-per-node 110 \
 --node-ami-family Ubuntu2004 \
 --node-private-networking \
 --node-zones ${Region}b \
 --node-security-groups sg-xxxxxxx \
 --ssh-access \
 --ssh-public-key aws-bastion \
 --full-ecr-access \
 --external-dns-access \
 --managed \
 --dry-run

#规格m5.4xlarge 16核64G
#node-zones创建多区,可用于高可用

#为k8s集群创建IAM OIDC提供商

# IAM OIDC即 AWS Identity and Access Management (IAM) OpenID Connect (OIDC)
# 创建IMA权限角色时,需要此功能开启  

#查看是否有OIDC,没有则创建
oidc_id=$(aws eks describe-cluster --name ${k8s_name} --query "cluster.identity.oidc.issuer" --output text |cut -d'/' -f 5)
if [ $(aws iam list-open-id-connect-providers | grep $oidc_id | wc -l ) -eq 0 ]; then
  eksctl utils associate-iam-oidc-provider --cluster ${k8s_name} --approve
fi

#eks安装efs csi驱动

  • k8s使用AWS EFS储存时用到csi驱动
  • efs可使用nfs协议挂载,但k8s节点默认没安装nfs客户端
#创建IAM policy和角色
curl -o iam-policy-efs.json \
 https://raw.githubusercontent.com/kubernetes-sigs/aws-efs-csi-driver/master/docs/iam-policy-example.json

aws iam create-policy \
    --policy-name EKS_EFS_CSI_Driver_Policy \
    --policy-document file://iam-policy-efs.json

#创建权限
eksctl create iamserviceaccount \
  --cluster ${k8s_name} \
  --namespace kube-system \
  --name efs-csi-controller-sa \
  --attach-policy-arn arn:aws:iam::${OwnerId}:policy/EKS_EFS_CSI_Driver_Policy \
  --approve \
  --region ${Region}

# 更新kubeconfig  ~/.kube/config  
aws eks update-kubeconfig --region ${Region} --name ${k8s_name}

#下载yaml文件
kubectl kustomize \
  "github.com/kubernetes-sigs/aws-efs-csi-driver/deploy/kubernetes/overlays/stable/?ref=release-1.4" > aws-eks-efs-csi.1.4.yaml

# vim aws-eks-efs-csi.1.4.yaml
# 手动删除如下部分  
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    app.kubernetes.io/name: aws-efs-csi-driver
  name: efs-csi-controller-sa
  namespace: kube-system
---

#部署efs csi
kubectl apply -f aws-eks-efs-csi.1.4.yaml
#使用efs创建pvc实例
apiVersion: v1
kind: PersistentVolume
metadata:
  name: aws-efs-test
spec:
  capacity:
    storage: 2000Gi
  accessModes: 
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  csi:
    driver: efs.csi.aws.com
    volumeHandle: fs-xxx:/data
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: aws-efs-test
spec:
  accessModes: 
    - ReadWriteMany
  resources:
    requests:
      storage: 2000Gi

# fs-xxx 为efs实例id,需要单独创建  
# 创建efs后需添加子网和安全组,否则无法访问  

#安装AWS LB Controller

  • AWS LoadBalancer默认使用Classic Load Balancer模式
  • 使用NLB、ALB模式的负载均衡器,和绑定EIP(绑定固定IP),必须安装LB controller
#创建IAM角色
curl -o iam_lbs_v2.4.2.json \
  https://raw.githubusercontent.com/kubernetes-sigs/aws-load-balancer-controller/v2.4.2/docs/install/iam_policy.json

aws iam create-policy \
  --policy-name iam_lbs_v2.4.2 \
  --policy-document file://iam_lbs_v2.4.2.json

eksctl create iamserviceaccount \
  --cluster=${k8s_name} \
  --namespace=kube-system \
  --name=aws-load-balancer-controller \
  --role-name "AmazonEKSLoadBalancerControllerRole" \
  --attach-policy-arn=arn:aws:iam::${OwnerId}:policy/iam_lbs_v2.4.2 \
  --approve

#安装cert-manager
kubectl apply \
  --validate=false \
  -f https://github.com/jetstack/cert-manager/releases/download/v1.5.4/cert-manager.yaml

#下载yaml
curl -Lo aws-load-balancer-controller_2.4.2.yaml \
  https://github.com/kubernetes-sigs/aws-load-balancer-controller/releases/download/v2.4.2/v2_4_2_full.yaml

#更改k8s集群名称
sed -i.bak -e "s|your-cluster-name|${k8s_name}|" aws-load-balancer-controller_2.4.2.yaml

#手动删除如下部分
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/name: aws-load-balancer-controller
  name: aws-load-balancer-controller
  namespace: kube-system
---

#部署lbs
kubectl apply -f aws-load-balancer-controller_2.4.2.yaml

#查看
kubectl get deployment -n kube-system aws-load-balancer-controller

#安装ingress-nginx-controller

#下载yaml
curl -o aws-ingress-nginx.nlb.v1.3.0.yml \
  https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.3.0/deploy/static/provider/aws/deploy.yaml

#增加spec.ipFamilyPolicy: SingleStack
#修改LoadBalancer部分的Service如下
---
apiVersion: v1
kind: Service
metadata:
  annotations:
    #负载均衡器自定义名称
    service.beta.kubernetes.io/aws-load-balancer-name: k8s-ingress-slb
    #负载均衡 NLB模式
    service.beta.kubernetes.io/aws-load-balancer-type: "external"
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: "ip"
    #使用EIP,互联网模式
    service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing"
    #public子网
    service.beta.kubernetes.io/aws-load-balancer-subnets: subnet-axxx, subnet-bxxx
    #弹性IP地址
    service.beta.kubernetes.io/aws-load-balancer-eip-allocations: eipalloc-axxx, eipalloc-bxxx
    #获取客户端真事IP
    service.beta.kubernetes.io/aws-load-balancer-target-group-attributes: preserve_client_ip.enabled=true
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.3.0
  name: ingress-nginx-controller
  namespace: ingress-nginx
spec:
  type: LoadBalancer
  # externalTrafficPolicy: Local
  ipFamilyPolicy: SingleStack
  ipFamilies:
    - IPv4
  ports:
  - appProtocol: http
    name: http
    port: 80
    protocol: TCP
    targetPort: http
  - appProtocol: https
    name: https
    port: 443
    protocol: TCP
    targetPort: https
  selector:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx

#部署
kubectl apply -f aws-ingress-nginx.nlb.v1.3.0.yml

#查看,获得得到EXTERNAL-IP地址 
kubectl get svc ingress-nginx-controller -n ingress-nginx

#ping测试EXTERNAL-IP地址ip是否为自己的EIP地址
ping k8s-ingress-slb-xxx.elb.${Region}.amazonaws.com

#访问测试
curl -I k8s-ingress-slb-xxx.elb.${Region}.amazonaws.com

#使用私有镜像仓库,并部署服务测试

#创建存储库nginx
aws ecr create-repository \
    --repository-name nginx \
    --region $Region

#登录储存库(缓存的登录凭证有效期12小时)
aws ecr get-login-password --region $Region \
 | docker login --username AWS --password-stdin ${OwnerId}.dkr.ecr.${Region}.amazonaws.com

#下载公共镜像, 改tag为私有储存库地址
docker pull public.ecr.aws/nginx/nginx:alpine
docker tag  public.ecr.aws/nginx/nginx:alpine \
  ${OwnerId}.dkr.ecr.${Region}.amazonaws.com/nginx:alpine

#push镜像到新建的储存库
docker push ${OwnerId}.dkr.ecr.${Region}.amazonaws.com/nginx:alpine

#deploy test
kubectl create deployment nginx --port=80 \
 --image=${OwnerId}.dkr.ecr.${Region}.amazonaws.com/nginx:alpine

#查看
kubectl get pod

#生命周期策略示例,保持5个镜像版本(tag)
cat >aws-ecr-policy.json <<EOF
{
  "rules": [
    {
      "rulePriority": 1,
      "description": "Keep only 3 image",
      "selection": {
        "tagStatus": "any",
        "countType": "imageCountMoreThan",
        "countNumber": 3
      },
      "action": {
        "type": "expire"
      }
    }
  ]
}
EOF
#创建策略
aws ecr put-lifecycle-policy --region $Region \
  --repository-name nginx \
  --lifecycle-policy-text file://aws-ecr-policy.json 

#删除清理pod
kubectl delete deploy/nginx

#删除存储库
aws ecr delete-repository \
  --region $Region --force \
  --repository-name nginx
  • k8s有pull私有镜像仓库权限,是因为创建参数--full-ecr-access
  • AWS ECR镜像储存服务不支持目录,只能分别给每个镜像创建储存库
  • aws ecr get-login-password生成的凭证有效期12小时,可使用定时任务每天登录2次解决

参考文档


About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK