AEWS 2기 3주차

항상 생각하는 거지만, 이 스터디에 참여자분들은 정말 정말 실력이 뛰어난 것 같습니다.

따라가기에 급급한 제가 참 부족해 보입니다.

분발해야겠습니다.

그럼

스터디 3주차 공유 시작하겠습니다.

EKS 스토리지에 대해 공부하겠습니다.

AWS EBS Controller

Volume (ebs-csi-controller) : EBS CSI driver 동작 : 볼륨 생성 및 파드에 볼륨 연결 - 링크

설치 Amazon EBS CSI driver as an Amazon EKS add-on - 링크 Parameters

--기본설정--

# default 네임스페이스 적용
kubectl ns default

# EFS 확인 : AWS 관리콘솔 EFS 확인해보자
echo $EfsFsId
mount -t efs -o tls $EfsFsId:/ /mnt/myefs
df -hT --type nfs4

echo "efs file test" > /mnt/myefs/memo.txt
cat /mnt/myefs/memo.txt
rm -f /mnt/myefs/memo.txt

# 스토리지클래스 및 CSI 노드 확인
kubectl get sc
kubectl get sc gp2 -o yaml | yh
kubectl get csinodes

# 노드 정보 확인
kubectl get node --label-columns=node.kubernetes.io/instance-type,eks.amazonaws.com/capacityType,topology.kubernetes.io/zone
eksctl get iamidentitymapping --cluster myeks

# 노드 IP 확인 및 PrivateIP 변수 지정
N1=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2a -o jsonpath={.items[0].status.addresses[0].address})
N2=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2b -o jsonpath={.items[0].status.addresses[0].address})
N3=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2c -o jsonpath={.items[0].status.addresses[0].address})
echo "export N1=$N1" >> /etc/profile
echo "export N2=$N2" >> /etc/profile
echo "export N3=$N3" >> /etc/profile
echo $N1, $N2, $N3

# 노드 보안그룹 ID 확인
NGSGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values=*ng1* --query "SecurityGroups[*].[GroupId]" --output text)
aws ec2 authorize-security-group-ingress --group-id $NGSGID --protocol '-1' --cidr 192.168.1.100/32

# 워커 노드 SSH 접속
for node in $N1 $N2 $N3; do ssh ec2-user@$node hostname; done

AWS LB/ExternalDNS, kube-ops-view 설치

# AWS LB Controller
helm repo add eks https://aws.github.io/eks-charts
helm repo update
helm install aws-load-balancer-controller eks/aws-load-balancer-controller -n kube-system --set clusterName=$CLUSTER_NAME \
  --set serviceAccount.create=false --set serviceAccount.name=aws-load-balancer-controller

# ExternalDNS
MyDomain=<자신의 도메인>
MyDomain=gasida.link
MyDnzHostedZoneId=$(aws route53 list-hosted-zones-by-name --dns-name "${MyDomain}." --query "HostedZones[0].Id" --output text)
echo $MyDomain, $MyDnzHostedZoneId

curl -s -O https://raw.githubusercontent.com/gasida/PKOS/main/aews/externaldns.yaml
sed -i "s/0.13.4/0.14.0/g" externaldns.yaml
MyDomain=$MyDomain MyDnzHostedZoneId=$MyDnzHostedZoneId envsubst < externaldns.yaml | kubectl apply -f -

# kube-ops-view
helm repo add geek-cookbook https://geek-cookbook.github.io/charts/
helm install kube-ops-view geek-cookbook/kube-ops-view --version 1.2.2 --set env.TZ="Asia/Seoul" --namespace kube-system
kubectl patch svc -n kube-system kube-ops-view -p '{"spec":{"type":"LoadBalancer"}}'
kubectl annotate service kube-ops-view -n kube-system "external-dns.alpha.kubernetes.io/hostname=kubeopsview.$MyDomain"
echo -e "Kube Ops View URL = http://kubeopsview.$MyDomain:8080/#scale=1.5"

설치 정보 확인

# 이미지 정보 확인
kubectl get pods --all-namespaces -o jsonpath="{.items[*].spec.containers[*].image}" | tr -s '[[:space:]]' '\n' | sort | uniq -c

# eksctl 설치/업데이트 addon 확인
eksctl get addon --cluster $CLUSTER_NAME

# IRSA 확인
eksctl get iamserviceaccount --cluster $CLUSTER_NAME

# 아래는 aws-ebs-csi-driver 전체 버전 정보와 기본 설치 버전(True) 정보 확인
aws eks describe-addon-versions \
    --addon-name aws-ebs-csi-driver \
    --kubernetes-version 1.28 \
    --query "addons[].addonVersions[].[addonVersion, compatibilities[].defaultVersion]" \
    --output text

# ISRA 설정 : AWS관리형 정책 AmazonEBSCSIDriverPolicy 사용
eksctl create iamserviceaccount \
  --name ebs-csi-controller-sa \
  --namespace kube-system \
  --cluster ${CLUSTER_NAME} \
  --attach-policy-arn arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy \
  --approve \
  --role-only \
  --role-name AmazonEKS_EBS_CSI_DriverRole

# ISRA 확인
eksctl get iamserviceaccount --cluster myeks
NAMESPACE	    NAME				            ROLE ARN
kube-system 	ebs-csi-controller-sa		arn:aws:iam::911283464785:role/AmazonEKS_EBS_CSI_DriverRole
...

# Amazon EBS CSI driver addon 추가
eksctl create addon --name aws-ebs-csi-driver --cluster ${CLUSTER_NAME} --service-account-role-arn arn:aws:iam::${ACCOUNT_ID}:role/AmazonEKS_EBS_CSI_DriverRole --force
kubectl get sa -n kube-system ebs-csi-controller-sa -o yaml | head -5

# 확인
eksctl get addon --cluster ${CLUSTER_NAME}
kubectl get deploy,ds -l=app.kubernetes.io/name=aws-ebs-csi-driver -n kube-system
kubectl get pod -n kube-system -l 'app in (ebs-csi-controller,ebs-csi-node)'
kubectl get pod -n kube-system -l app.kubernetes.io/component=csi-driver

# ebs-csi-controller 파드에 6개 컨테이너 확인
kubectl get pod -n kube-system -l app=ebs-csi-controller -o jsonpath='{.items[0].spec.containers[*].name}' ; echo
ebs-plugin csi-provisioner csi-attacher csi-snapshotter csi-resizer liveness-probe

# csinodes 확인
kubectl get csinodes

# gp3 스토리지 클래스 생성 - Link
kubectl get sc
cat <<EOT > gp3-sc.yaml
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
  name: gp3
allowVolumeExpansion: true
provisioner: ebs.csi.aws.com
volumeBindingMode: WaitForFirstConsumer
parameters:
  type: gp3
  #iops: "5000"
  #throughput: "250"
  allowAutoIOPSPerGBIncrease: 'true'
  encrypted: 'true'
  fsType: xfs # 기본값이 ext4
EOT
kubectl apply -f gp3-sc.yaml
kubectl get sc
kubectl describe sc gp3 | grep Parameters

# 속도 테스트 >> iops, throughput 설정 시 뒤에 pvc,pod 생성이 되지 않음... 문제 해결 필요
kubestr fio -f fio-read.fio -s gp3 --size 10G
while true; do aws ec2 describe-volumes --filters Name=tag:ebs.csi.aws.com/cluster,Values=true --query "Volumes[].{VolumeId: VolumeId, VolumeType: VolumeType, InstanceId: Attachments[0].InstanceId, State: Attachments[0].State}" --output text; date; sleep 1; done

확인

PVC/PV 파드 테스트

# 워커노드의 EBS 볼륨 확인 : tag(키/값) 필터링 - 링크
aws ec2 describe-volumes --filters Name=tag:Name,Values=$CLUSTER_NAME-ng1-Node --output table
aws ec2 describe-volumes --filters Name=tag:Name,Values=$CLUSTER_NAME-ng1-Node --query "Volumes[*].Attachments" | jq
aws ec2 describe-volumes --filters Name=tag:Name,Values=$CLUSTER_NAME-ng1-Node --query "Volumes[*].{ID:VolumeId,Tag:Tags}" | jq
aws ec2 describe-volumes --filters Name=tag:Name,Values=$CLUSTER_NAME-ng1-Node --query "Volumes[].[VolumeId, VolumeType, Attachments[].[InstanceId, State][]][]" | jq
aws ec2 describe-volumes --filters Name=tag:Name,Values=$CLUSTER_NAME-ng1-Node --query "Volumes[].{VolumeId: VolumeId, VolumeType: VolumeType, InstanceId: Attachments[0].InstanceId, State: Attachments[0].State}" | jq

# 워커노드에서 파드에 추가한 EBS 볼륨 확인
aws ec2 describe-volumes --filters Name=tag:ebs.csi.aws.com/cluster,Values=true --output table
aws ec2 describe-volumes --filters Name=tag:ebs.csi.aws.com/cluster,Values=true --query "Volumes[*].{ID:VolumeId,Tag:Tags}" | jq
aws ec2 describe-volumes --filters Name=tag:ebs.csi.aws.com/cluster,Values=true --query "Volumes[].{VolumeId: VolumeId, VolumeType: VolumeType, InstanceId: Attachments[0].InstanceId, State: Attachments[0].State}" | jq

# 워커노드에서 파드에 추가한 EBS 볼륨 모니터링
while true; do aws ec2 describe-volumes --filters Name=tag:ebs.csi.aws.com/cluster,Values=true --query "Volumes[].{VolumeId: VolumeId, VolumeType: VolumeType, InstanceId: Attachments[0].InstanceId, State: Attachments[0].State}" --output text; date; sleep 1; done

# PVC 생성
cat <<EOT > awsebs-pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: ebs-claim
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 4Gi
  storageClassName: gp3
EOT
kubectl apply -f awsebs-pvc.yaml
kubectl get pvc,pv

# 파드 생성
cat <<EOT > awsebs-pod.yaml
apiVersion: v1
kind: Pod
metadata:
  name: app
spec:
  terminationGracePeriodSeconds: 3
  containers:
  - name: app
    image: centos
    command: ["/bin/sh"]
    args: ["-c", "while true; do echo \$(date -u) >> /data/out.txt; sleep 5; done"]
    volumeMounts:
    - name: persistent-storage
      mountPath: /data
  volumes:
  - name: persistent-storage
    persistentVolumeClaim:
      claimName: ebs-claim
EOT
kubectl apply -f awsebs-pod.yaml

# PVC, 파드 확인
kubectl get pvc,pv,pod
kubectl get VolumeAttachment

# 추가된 EBS 볼륨 상세 정보 확인 
aws ec2 describe-volumes --volume-ids $(kubectl get pv -o jsonpath="{.items[0].spec.csi.volumeHandle}") | jq

# PV 상세 확인 : nodeAffinity 내용의 의미는?
kubectl get pv -o yaml | yh
...
    nodeAffinity:
      required:
        nodeSelectorTerms:
        - matchExpressions:
          - key: topology.ebs.csi.aws.com/zone
            operator: In
            values:
            - ap-northeast-2b
...

kubectl get node --label-columns=topology.ebs.csi.aws.com/zone,topology.kubernetes.io/zone
kubectl describe node | more

# 파일 내용 추가 저장 확인
kubectl exec app -- tail -f /data/out.txt

# 아래 명령어는 확인까지 다소 시간이 소요됨
kubectl df-pv

## 파드 내에서 볼륨 정보 확인
kubectl exec -it app -- sh -c 'df -hT --type=overlay'
kubectl exec -it app -- sh -c 'df -hT --type=xfs'

gp3 확인

볼륨 증가 - 링크 ⇒ 늘릴수는 있어도 줄일수는 없단다! - 링크

Resizing Persistent Volumes using Kubernetes

Author: Hemant Kumar (Red Hat) Editor’s note: this post is part of a series of in-depth articles on what’s new in Kubernetes 1.11 In Kubernetes v1.11 the persistent volume expansion feature is being promoted to beta. This feature allows users to easily

kubernetes.io

# 현재 pv 의 이름을 기준하여 4G > 10G 로 증가 : .spec.resources.requests.storage의 4Gi 를 10Gi로 변경
kubectl get pvc ebs-claim -o jsonpath={.spec.resources.requests.storage} ; echo
kubectl get pvc ebs-claim -o jsonpath={.status.capacity.storage} ; echo
kubectl patch pvc ebs-claim -p '{"spec":{"resources":{"requests":{"storage":"10Gi"}}}}'
kubectl patch pvc ebs-claim -p '{"status":{"capacity":{"storage":"10Gi"}}}' # status 는 바로 위 커멘드 적용 후 EBS 10Gi 확장 후 알아서 10Gi 반영됨

# 확인 : 볼륨 용량 수정 반영이 되어야 되니, 수치 반영이 조금 느릴수 있다
kubectl exec -it app -- sh -c 'df -hT --type=xfs'
kubectl df-pv
aws ec2 describe-volumes --volume-ids $(kubectl get pv -o jsonpath="{.items[0].spec.csi.volumeHandle}") | jq

4Gi->10Gi로 변경 확인

삭제

kubectl delete pod app & kubectl delete pvc ebs-claim

AWS Volume SnapShots Controller

Volumesnapshots 컨트롤러 설치 - 링크 VolumeSnapshot example Blog

# (참고) EBS CSI Driver에 snapshots 기능 포함 될 것으로 보임
kubectl describe pod -n kube-system -l app=ebs-csi-controller

# Install Snapshot CRDs
curl -s -O https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/master/client/config/crd/snapshot.storage.k8s.io_volumesnapshots.yaml
curl -s -O https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/master/client/config/crd/snapshot.storage.k8s.io_volumesnapshotclasses.yaml
curl -s -O https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/master/client/config/crd/snapshot.storage.k8s.io_volumesnapshotcontents.yaml
kubectl apply -f snapshot.storage.k8s.io_volumesnapshots.yaml,snapshot.storage.k8s.io_volumesnapshotclasses.yaml,snapshot.storage.k8s.io_volumesnapshotcontents.yaml
kubectl get crd | grep snapshot
kubectl api-resources  | grep snapshot

# Install Common Snapshot Controller
curl -s -O https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/master/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml
curl -s -O https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/master/deploy/kubernetes/snapshot-controller/setup-snapshot-controller.yaml
kubectl apply -f rbac-snapshot-controller.yaml,setup-snapshot-controller.yaml
kubectl get deploy -n kube-system snapshot-controller
kubectl get pod -n kube-system -l app=snapshot-controller

# Install Snapshotclass
curl -s -O https://raw.githubusercontent.com/kubernetes-sigs/aws-ebs-csi-driver/master/examples/kubernetes/snapshot/manifests/classes/snapshotclass.yaml
kubectl apply -f snapshotclass.yaml
kubectl get vsclass # 혹은 volumesnapshotclasses

확인

사용하기

테스트 PVC/파드 생성

# PVC 생성
kubectl apply -f awsebs-pvc.yaml

# 파드 생성
kubectl apply -f awsebs-pod.yaml

# 파일 내용 추가 저장 확인
kubectl exec app -- tail -f /data/out.txt

# VolumeSnapshot 생성 : Create a VolumeSnapshot referencing the PersistentVolumeClaim name >> EBS 스냅샷 확인
curl -s -O https://raw.githubusercontent.com/gasida/PKOS/main/3/ebs-volume-snapshot.yaml
cat ebs-volume-snapshot.yaml | yh
kubectl apply -f ebs-volume-snapshot.yaml

# VolumeSnapshot 확인
kubectl get volumesnapshot
kubectl get volumesnapshot ebs-volume-snapshot -o jsonpath={.status.boundVolumeSnapshotContentName} ; echo
kubectl describe volumesnapshot.snapshot.storage.k8s.io ebs-volume-snapshot
kubectl get volumesnapshotcontents

# VolumeSnapshot ID 확인 
kubectl get volumesnapshotcontents -o jsonpath='{.items[*].status.snapshotHandle}' ; echo

# AWS EBS 스냅샷 확인
aws ec2 describe-snapshots --owner-ids self | jq
aws ec2 describe-snapshots --owner-ids self --query 'Snapshots[]' --output table

# app & pvc 제거 : 강제로 장애 재현
kubectl delete pod app && kubectl delete pvc ebs-claim

완전히 제거된 후 일정의 텀을 둔다.

# 스냅샷에서 PVC 로 복원
kubectl get pvc,pv
cat <<EOT > ebs-snapshot-restored-claim.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: ebs-snapshot-restored-claim
spec:
  storageClassName: gp3
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 4Gi
  dataSource:
    name: ebs-volume-snapshot
    kind: VolumeSnapshot
    apiGroup: snapshot.storage.k8s.io
EOT
cat ebs-snapshot-restored-claim.yaml | yh
kubectl apply -f ebs-snapshot-restored-claim.yaml

# 확인
kubectl get pvc,pv

# 파드 생성
curl -s -O https://raw.githubusercontent.com/gasida/PKOS/main/3/ebs-snapshot-restored-pod.yaml
cat ebs-snapshot-restored-pod.yaml | yh
kubectl apply -f ebs-snapshot-restored-pod.yaml

# 파일 내용 저장 확인 : 파드 삭제 전까지의 저장 기록이 남아 있다. 이후 파드 재생성 후 기록도 잘 저장되고 있다
kubectl exec app -- cat /data/out.txt
...
Sat Dec 24 15:12:24 UTC 2022
Sat Dec 24 15:12:24 UTC 2022
Sat Dec 24 15:24:23 UTC 2022
Sat Dec 24 15:24:23 UTC 2022
...

# 삭제
kubectl delete pod app && kubectl delete pvc ebs-snapshot-restored-claim && kubectl delete volumesnapshots ebs-volume-snapshot

아래 파드에 시간을 두고 텍스트가 찍힌 것을 볼 수 있다.

AWS EFS Controller

현재는 EFS Addon 설치 가능 → https://docs.aws.amazon.com/eks/latest/userguide/efs-csi.html

Amazon EFS CSI driver - Amazon EKS

Amazon EFS CSI driver Amazon Elastic File System (Amazon EFS) provides serverless, fully elastic file storage so that you can share file data without provisioning or managing storage capacity and performance. The Amazon EFS Container Storage Interface (CSI

docs.aws.amazon.com

아키텍처

# EFS 정보 확인 
aws efs describe-file-systems --query "FileSystems[*].FileSystemId" --output text

# IAM 정책 생성
curl -s -O https://raw.githubusercontent.com/kubernetes-sigs/aws-efs-csi-driver/master/docs/iam-policy-example.json
aws iam create-policy --policy-name AmazonEKS_EFS_CSI_Driver_Policy --policy-document file://iam-policy-example.json

# ISRA 설정 : 고객관리형 정책 AmazonEKS_EFS_CSI_Driver_Policy 사용
eksctl create iamserviceaccount \
  --name efs-csi-controller-sa \
  --namespace kube-system \
  --cluster ${CLUSTER_NAME} \
  --attach-policy-arn arn:aws:iam::${ACCOUNT_ID}:policy/AmazonEKS_EFS_CSI_Driver_Policy \
  --approve

# ISRA 확인
kubectl get sa -n kube-system efs-csi-controller-sa -o yaml | head -5
eksctl get iamserviceaccount --cluster myeks

# EFS Controller 설치
helm repo add aws-efs-csi-driver https://kubernetes-sigs.github.io/aws-efs-csi-driver/
helm repo update
helm upgrade -i aws-efs-csi-driver aws-efs-csi-driver/aws-efs-csi-driver \
    --namespace kube-system \
    --set image.repository=602401143452.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/eks/aws-efs-csi-driver \
    --set controller.serviceAccount.create=false \
    --set controller.serviceAccount.name=efs-csi-controller-sa

# 확인
helm list -n kube-system
kubectl get pod -n kube-system -l "app.kubernetes.io/name=aws-efs-csi-driver,app.kubernetes.io/instance=aws-efs-csi-driver"

확인

- AWS → EFS → 파일 시스템 : 네트워크 → 탑재 대상 ID 확인

-EFS 파일시스템을 다수의 파드가 사용하게 설정 : Add empty StorageClasses from static example - Workshop 링크

# 모니터링
watch 'kubectl get sc efs-sc; echo; kubectl get pv,pvc,pod'

# 실습 코드 clone
git clone https://github.com/kubernetes-sigs/aws-efs-csi-driver.git /root/efs-csi
cd /root/efs-csi/examples/kubernetes/multiple_pods/specs && tree

# EFS 스토리지클래스 생성 및 확인
cat storageclass.yaml | yh
kubectl apply -f storageclass.yaml
kubectl get sc efs-sc

# PV 생성 및 확인 : volumeHandle을 자신의 EFS 파일시스템ID로 변경
EfsFsId=$(aws efs describe-file-systems --query "FileSystems[*].FileSystemId" --output text)
sed -i "s/fs-4af69aab/$EfsFsId/g" pv.yaml

cat pv.yaml | yh
apiVersion: v1
kind: PersistentVolume
metadata:
  name: efs-pv
spec:
  capacity:
    storage: 5Gi
  volumeMode: Filesystem
  accessModes:
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  storageClassName: efs-sc
  csi:
    driver: efs.csi.aws.com
    volumeHandle: fs-05699d3c12ef609e2

kubectl apply -f pv.yaml
kubectl get pv; kubectl describe pv

# PVC 생성 및 확인
cat claim.yaml | yh
kubectl apply -f claim.yaml
kubectl get pvc

# 파드 생성 및 연동 : 파드 내에 /data 데이터는 EFS를 사용
cat pod1.yaml pod2.yaml | yh
kubectl apply -f pod1.yaml,pod2.yaml
kubectl df-pv

# 파드 정보 확인 : PV에 5Gi 와 파드 내에서 확인한 NFS4 볼륨 크리 8.0E의 차이는 무엇? 파드에 6Gi 이상 저장 가능한가?
kubectl get pods
kubectl exec -ti app1 -- sh -c "df -hT -t nfs4"
kubectl exec -ti app2 -- sh -c "df -hT -t nfs4"
Filesystem           Type            Size      Used Available Use% Mounted on
127.0.0.1:/          nfs4            8.0E         0      8.0E   0% /data

# 공유 저장소 저장 동작 확인
tree /mnt/myefs              # 작업용EC2에서 확인
tail -f /mnt/myefs/out1.txt  # 작업용EC2에서 확인
kubectl exec -ti app1 -- tail -f /data/out1.txt
kubectl exec -ti app2 -- tail -f /data/out2.txt

확인

-EFS 파일시스템을 다수의 파드가 사용하게 설정 : Dynamic provisioning using EFS ← Fargate node는 현재 미지원 - Workshop

Dynamic provisioning using EFS | EKS Workshop

Now that we understand the EFS storage class for Kubernetes let's create a Persistent Volume and change the assets container on the assets deployment to mount the Volume created.

www.eksworkshop.com

EKS Persistent Volumes for Instance Store & Add NodeGroup

신규 노드 그룹 ng2 생성 - Blog : c5d.large 의 EC2 인스턴스 스토어(임시 블록 스토리지) 설정 작업 - 링크 , NVMe SSD - 링크

- 데이터 손실 : 기본 디스크 드라이브 오류, 인스턴스가 중지됨, 인스턴스가 최대 절전 모드로 전환됨, 인스턴스가 종료됨

인스턴스 스토어는 EC2 스토리지(EBS) 정보에 출력되지는 않는다

# 인스턴스 스토어 볼륨이 있는 c5 모든 타입의 스토리지 크기
aws ec2 describe-instance-types \
 --filters "Name=instance-type,Values=c5*" "Name=instance-storage-supported,Values=true" \
 --query "InstanceTypes[].[InstanceType, InstanceStorageInfo.TotalSizeInGB]" \
 --output table
--------------------------
|  DescribeInstanceTypes |
+---------------+--------+
|  c5d.large    |  50    |
|  c5d.12xlarge |  1800  |
...

# 신규 노드 그룹 생성
eksctl create nodegroup --help
eksctl create nodegroup -c $CLUSTER_NAME -r $AWS_DEFAULT_REGION --subnet-ids "$PubSubnet1","$PubSubnet2","$PubSubnet3" --ssh-access \
  -n ng2 -t c5d.large -N 1 -m 1 -M 1 --node-volume-size=30 --node-labels disk=nvme --max-pods-per-node 100 --dry-run > myng2.yaml

cat <<EOT > nvme.yaml
  preBootstrapCommands:
    - |
      # Install Tools
      yum install nvme-cli links tree jq tcpdump sysstat -y

      # Filesystem & Mount
      mkfs -t xfs /dev/nvme1n1
      mkdir /data
      mount /dev/nvme1n1 /data

      # Get disk UUID
      uuid=\$(blkid -o value -s UUID mount /dev/nvme1n1 /data) 

      # Mount the disk during a reboot
      echo /dev/nvme1n1 /data xfs defaults,noatime 0 2 >> /etc/fstab
EOT
sed -i -n -e '/volumeType/r nvme.yaml' -e '1,$p' myng2.yaml
eksctl create nodegroup -f myng2.yaml

# 노드 보안그룹 ID 확인
NG2SGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values=*ng2* --query "SecurityGroups[*].[GroupId]" --output text)
aws ec2 authorize-security-group-ingress --group-id $NG2SGID --protocol '-1' --cidr 192.168.1.100/32

# 워커 노드 SSH 접속
N4=<각자 자신의 워커 노드4번 Private IP 지정>
N4=192.168.3.160
ssh ec2-user@$N4 hostname

# 확인
ssh ec2-user@$N4 sudo nvme list
ssh ec2-user@$N4 sudo lsblk -e 7 -d
ssh ec2-user@$N4 sudo df -hT -t xfs
ssh ec2-user@$N4 sudo tree /data
ssh ec2-user@$N4 sudo cat /etc/fstab

# (옵션) max-pod 확인
kubectl describe node -l disk=nvme | grep Allocatable: -A7
Allocatable:
  attachable-volumes-aws-ebs:  25
  cpu:                         1930m
  ephemeral-storage:           27905944324
  hugepages-1Gi:               0
  hugepages-2Mi:               0
  memory:                      3097552Ki
  pods:                        100

# (옵션) kubelet 데몬 파라미터 확인 : --max-pods=29 --max-pods=100
ssh ec2-user@$N4 sudo ps -ef | grep kubelet
root      2972     1  0 16:03 ?        00:00:09 /usr/bin/kubelet --config /etc/kubernetes/kubelet/kubelet-config.json --kubeconfig /var/lib/kubelet/kubeconfig --container-runtime-endpoint unix:///run/containerd/containerd.sock --image-credential-provider-config /etc/eks/image-credential-provider/config.json --image-credential-provider-bin-dir /etc/eks/image-credential-provider --node-ip=192.168.3.131 --pod-infra-container-image=602401143452.dkr.ecr.ap-northeast-2.amazonaws.com/eks/pause:3.5 --v=2 --cloud-provider=aws --container-runtime=remote --node-labels=eks.amazonaws.com/sourceLaunchTemplateVersion=1,alpha.eksctl.io/cluster-name=myeks,alpha.eksctl.io/nodegroup-name=ng2,disk=nvme,eks.amazonaws.com/nodegroup-image=ami-0da378ed846e950a4,eks.amazonaws.com/capacityType=ON_DEMAND,eks.amazonaws.com/nodegroup=ng2,eks.amazonaws.com/sourceLaunchTemplateId=lt-030e6043923ce712b --max-pods=29 --max-pods=100

노드 그룹 생성 확인

노드 그룹 2에 생성된 인스턴스 접속 후 확인해보기

-스토리지 클래스 재생성

# 기존 삭제
#curl -s -O https://raw.githubusercontent.com/rancher/local-path-provisioner/master/deploy/local-path-storage.yaml
cd
kubectl delete -f local-path-storage.yaml

#
sed -i 's/opt/data/g' local-path-storage.yaml
kubectl apply -f local-path-storage.yaml

# 모니터링
watch 'kubectl get pod -owide;echo;kubectl get pv,pvc'
ssh ec2-user@$N4 iostat -xmdz 1 -p nvme1n1

# 측정 : Read
#curl -s -O https://raw.githubusercontent.com/wikibook/kubepractice/main/ch10/fio-read.fio
kubestr fio -f fio-read.fio -s local-path --size 10G --nodeselector disk=nvme
...

삭제

# 
kubectl delete -f local-path-storage.yaml

# 노드그룹 삭제
eksctl delete nodegroup -c $CLUSTER_NAME -n ng2

노드 그룹

Graviton (ARM) Instance 노드그룹 - Link

Graviton (ARM) instances | EKS Workshop

www.eksworkshop.com

#
kubectl get nodes -L kubernetes.io/arch

# 신규 노드 그룹 생성
eksctl create nodegroup --help
eksctl create nodegroup -c $CLUSTER_NAME -r $AWS_DEFAULT_REGION --subnet-ids "$PubSubnet1","$PubSubnet2","$PubSubnet3" --ssh-access \
  -n ng3 -t t4g.medium -N 1 -m 1 -M 1 --node-volume-size=30 --node-labels family=graviton --dry-run > myng3.yaml
eksctl create nodegroup -f myng3.yaml

# 확인
kubectl get nodes --label-columns eks.amazonaws.com/nodegroup,kubernetes.io/arch
kubectl describe nodes --selector family=graviton
aws eks describe-nodegroup --cluster-name $CLUSTER_NAME --nodegroup-name ng3 | jq .nodegroup.taints

# taints 셋팅 -> 적용에 2~3분 정도 시간 소요
aws eks update-nodegroup-config --cluster-name $CLUSTER_NAME --nodegroup-name ng3 --taints "addOrUpdateTaints=[{key=frontend, value=true, effect=NO_EXECUTE}]"

# 확인
kubectl describe nodes --selector family=graviton | grep Taints
aws eks describe-nodegroup --cluster-name $CLUSTER_NAME --nodegroup-name ng3 | jq .nodegroup.taints
# NO_SCHEDULE - This corresponds to the Kubernetes NoSchedule taint effect. This configures the managed node group with a taint that repels all pods that don't have a matching toleration. All running pods are not evicted from the manage node group's nodes.
# NO_EXECUTE - This corresponds to the Kubernetes NoExecute taint effect. Allows nodes configured with this taint to not only repel newly scheduled pods but also evicts any running pods without a matching toleration.
# PREFER_NO_SCHEDULE - This corresponds to the Kubernetes PreferNoSchedule taint effect. If possible, EKS avoids scheduling Pods that do not tolerate this taint onto the node.

노드 그룹 생성 중

Run pods on Graviton

#
cat << EOT > busybox.yaml
apiVersion: v1
kind: Pod
metadata:
  name: busybox
spec:
  terminationGracePeriodSeconds: 3
  containers:
  - name: busybox
    image: busybox
    command:
    - "/bin/sh"
    - "-c"
    - "while true; do date >> /home/pod-out.txt; cd /home; sync; sync; sleep 10; done"
  tolerations:
    - effect: NoExecute
      key: frontend
      operator: Exists
EOT
kubectl apply -f busybox.yaml

# 파드가 배포된 노드 정보 확인
kubectl get pod -owide

# 삭제
kubectl delete pod busybox
eksctl delete nodegroup -c $CLUSTER_NAME -n ng3

확인

Spot instances - Link Blog

Amazon EKS now supports provisioning and managing EC2 Spot Instances in managed node groups | Amazon Web Services

This post was contributed by Ran Sheinberg, Principal Solutions Architect and Deepthi Chelupati, Sr Product Manager Amazon Elastic Kubernetes Service (Amazon EKS) makes it easy to run upstream, secure, and highly available Kubernetes clusters on AWS. In 20

aws.amazon.com

- Instance type diversification - Link

GitHub - aws/amazon-ec2-instance-selector: A CLI tool and go library which recommends instance types based on resource criteria

A CLI tool and go library which recommends instance types based on resource criteria like vcpus and memory - aws/amazon-ec2-instance-selector

github.com

# ec2-instance-selector 설치
curl -Lo ec2-instance-selector https://github.com/aws/amazon-ec2-instance-selector/releases/download/v2.4.1/ec2-instance-selector-`uname | tr '[:upper:]' '[:lower:]'`-amd64 && chmod +x ec2-instance-selector
mv ec2-instance-selector /usr/local/bin/
ec2-instance-selector --version

# 사용
ec2-instance-selector --vcpus 2 --memory 4 --gpus 0 --current-generation -a x86_64 --deny-list 't.*' --output table-wide
Instance Type  VCPUs   Mem (GiB)  Hypervisor  Current Gen  Hibernation Support  CPU Arch  Network Performance  ENIs    GPUs    GPU Mem (GiB)  GPU Info  On-Demand Price/Hr  Spot Price/Hr (30d avg)  
-------------  -----   ---------  ----------  -----------  -------------------  --------  -------------------  ----    ----    -------------  --------  ------------------  -----------------------  
c5.large       2       4          nitro       true         true                 x86_64    Up to 10 Gigabit     3       0       0              none      $0.096              $0.04574                 
c5a.large      2       4          nitro       true         false                x86_64    Up to 10 Gigabit     3       0       0              none      $0.086              $0.02859                 
c5d.large      2       4          nitro       true         true                 x86_64    Up to 10 Gigabit     3       0       0              none      $0.11               $0.03266                 
c6i.large      2       4          nitro       true         true                 x86_64    Up to 12.5 Gigabit   3       0       0              none      $0.096              $0.04011                 
c6id.large     2       4          nitro       true         true                 x86_64    Up to 12.5 Gigabit   3       0       0              none      $0.1155             $0.02726                 
c6in.large     2       4          nitro       true         false                x86_64    Up to 25 Gigabit     3       0       0              none      $0.1281             $0.0278                  
c7i.large      2       4          nitro       true         true                 x86_64    Up to 12.5 Gigabit   3       0       0              none      $0.1008             $0.02677

#Internally ec2-instance-selector is making calls to the DescribeInstanceTypes for the specific region and filtering the instances based on the criteria selected in the command line, in our case we filtered for instances that meet the following criteria:
- Instances with no GPUs
- of x86_64 Architecture (no ARM instances like A1 or m6g instances for example)
- Instances that have 2 vCPUs and 4 GB of RAM
- Instances of current generation (4th gen onwards)
- Instances that don’t meet the regular expression t.* to filter out burstable instance types

Create spot capacity - Link

Create spot capacity | EKS Workshop

Lets deploy a managed node group that creates Spot instances, followed by modifying the existing catalog component of our application to run on the newly created Spot instances.

www.eksworkshop.com

#
kubectl get nodes -l eks.amazonaws.com/capacityType=ON_DEMAND
kubectl get nodes -L eks.amazonaws.com/capacityType
NAME                                              STATUS   ROLES    AGE   VERSION               CAPACITYTYPE
ip-192-168-1-65.ap-northeast-2.compute.internal   Ready    <none>   75m   v1.28.5-eks-5e0fdde   ON_DEMAND
ip-192-168-2-89.ap-northeast-2.compute.internal   Ready    <none>   75m   v1.28.5-eks-5e0fdde   ON_DEMAND
ip-192-168-3-39.ap-northeast-2.compute.internal   Ready    <none>   75m   v1.28.5-eks-5e0fdde   ON_DEMAND

# 생성 : 아래 node-role 은 각자 자신의 노드롤 ARN을 입력하자
# role AWSServiceRoleForAmazonEKSNodegroup 테스트해보자
aws eks create-nodegroup \
  --cluster-name $CLUSTER_NAME \
  --nodegroup-name managed-spot \
  --subnets $PubSubnet1 $PubSubnet2 $PubSubnet3 \
  --node-role arn:aws:iam::911283464785:role/eksctl-myeks-nodegroup-ng1-NodeInstanceRole-wvZ2FX2m79Vv \
  --instance-types c5.large c5d.large c5a.large \
  --capacity-type SPOT \
  --scaling-config minSize=2,maxSize=3,desiredSize=2 \
  --disk-size 20

aws eks create-nodegroup \
  --cluster-name $CLUSTER_NAME \
  --nodegroup-name managed-spot \
  --subnets $PubSubnet1 $PubSubnet2 $PubSubnet3 \
  --node-role arn:aws:iam::911283464785:role/eksctl-myeks-nodegroup-ng1-NodeInstanceRole-Bf5LiwkL64gF \
  --instance-types c5.large c5d.large c5a.large \
  --capacity-type SPOT \
  --scaling-config minSize=2,maxSize=3,desiredSize=2 \
  --disk-size 20

#
aws eks wait nodegroup-active --cluster-name $CLUSTER_NAME --nodegroup-name managed-spot

# 확인
kubectl get nodes -L eks.amazonaws.com/capacityType,eks.amazonaws.com/nodegroup
NAME                                               STATUS   ROLES    AGE   VERSION               CAPACITYTYPE   NODEGROUP
ip-192-168-1-38.ap-northeast-2.compute.internal    Ready    <none>   37s   v1.28.5-eks-5e0fdde   SPOT           managed-spot
ip-192-168-1-65.ap-northeast-2.compute.internal    Ready    <none>   93m   v1.28.5-eks-5e0fdde   ON_DEMAND      ng1
ip-192-168-2-104.ap-northeast-2.compute.internal   Ready    <none>   37s   v1.28.5-eks-5e0fdde   SPOT           managed-spot
ip-192-168-2-89.ap-northeast-2.compute.internal    Ready    <none>   93m   v1.28.5-eks-5e0fdde   ON_DEMAND      ng1
ip-192-168-3-39.ap-northeast-2.compute.internal    Ready    <none>   93m   v1.28.5-eks-5e0fdde   ON_DEMAND      ng1

Running a workload on Spot

#
cat << EOT > busybox.yaml
apiVersion: v1
kind: Pod
metadata:
  name: busybox
spec:
  terminationGracePeriodSeconds: 3
  containers:
  - name: busybox
    image: busybox
    command:
    - "/bin/sh"
    - "-c"
    - "while true; do date >> /home/pod-out.txt; cd /home; sync; sync; sleep 10; done"
  nodeSelector:
    eks.amazonaws.com/capacityType: SPOT
EOT
kubectl apply -f busybox.yaml

# 파드가 배포된 노드 정보 확인
kubectl get pod -owide

# 삭제
kubectl delete pod busybox
eksctl delete nodegroup -c $CLUSTER_NAME -n managed-spot

실습자원 삭제

eksctl delete cluster --name $CLUSTER_NAME && aws cloudformation delete-stack --stack-name $CLUSTER_NAME

이것으로 스터디 공유를 마치겠습니다.

양도 많고 공부해야 될 것도 너무 많습니다. 그치만 재미있네요.

8주차까지 열심히 달려보겠습니다.

'study > AEWS 2기' 카테고리의 다른 글

AEWS 2기 4주차 두번째 (0)	2024.03.30
AEWS 2기 4주차 첫번째 (0)	2024.03.25
AEWS 2기 2주차 첫번째 (0)	2024.03.22
AEWS 2기 2주차 두번째 (0)	2024.03.13
AEWS 2기 1주차 (0)	2024.03.04

덴고의 공부방

AEWS 2기 3주차

AWS Volume SnapShots Controller

AWS EFS Controller

EKS Persistent Volumes for Instance Store & Add NodeGroup

노드 그룹

'study > AEWS 2기' 카테고리의 다른 글

티스토리툴바

AEWS 2기 3주차

AWS Volume SnapShots Controller

AWS EFS Controller

EKS Persistent Volumes for Instance Store & Add NodeGroup

노드 그룹

'study > AEWS 2기' 카테고리의 다른 글

'study/AEWS 2기' Related Articles

티스토리툴바