..
kans3) gke cni에 대하여
GKE CNI
gke는 기본적으로 eks와 동일하게 flat mode의 cni를 제공한다. 일반적으로 사용하는 vpc native cluster의 경우, pod나 svc ip가 선택한 (노드가 올라가는)subnet의 secondary ip로 생성이 된다. secondary ip는 vm에 호스팅된 컨테이너 등에게 부여하여 구성할 수 있게된다. primary subnet과 동일하게 routes에도 들어가고, router를 통해 광고도 되기 때문에 추가작업 없이 내부 통신이 가능하다. overlay network가 아니여서, 클러스터 간 뿐만 아니라 vpc 내부와 vpc와 연결된 네트워크 사이에서 라우팅 처리만 되었다면 pod랑도 직접 통신되어 바로 식별가능하다. 단점이라하믄.. 이렇게 되면 pod와 svc ip까지 겹치지 않도록 잘 구성하여 부여해야한다.
이미지출처🔗
실습
실습 환경
클러스터 구성
# 클러스터 설정을 다 쓰기엔 많고.. 몇개값만 보면 아래와 같다.
--zone "asia-northeast3-a"
--cluster-version "1.30.5-gke.1355000"
--enable-ip-alias
--enable-master-authorized-networks
--master-authorized-networks $MY_IP
# 클러스터 내부에 node pool구성으로 들어가게 되는데, 이는 instance group이라 보면된다(실제로 mig로 생성됨)
--machine-type "e2-medium"
--image-type "COS_CONTAINERD"
--num-nodes "2"
테스트시 사용 할 이미지 생성
# 웹이랑 tcpdump를 한번에 할 수 있는 이미지를 모르겠어서.. 그냥 nginx에 tcpdump 설치한 이미지 만들었다..
❯ cat Dockerfile
FROM nginx:latest
RUN apt-get update && apt-get install -y tcpdump
# cloud build를 써서 artifact registry에 넣어서 사용
❯ gcloud builds submit --region=asia-northeast3 --tag asia-northeast3-docker.pkg.dev/$PROJECTID/test/testimg:1
❯ k get no
NAME STATUS ROLES AGE VERSION
gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6 Ready <none> 141m v1.30.5-gke.1355000
gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj Ready <none> 141m v1.30.5-gke.1355000
# 첫번째 노드로 접속
❯ gcloud compute ssh gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6
# host-local ipam사용하는 것을 확인
eunyoung@gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6 ~ $ sudo cat /etc/cni/net.d/10-containerd-net.conflist
{
"name": "k8s-pod-network",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "ptp",
"mtu": 1460,
"ipam": {
"type": "host-local",
"subnet": "10.32.0.0/24",
"routes": [
{
"dst": "0.0.0.0/0"
}
]
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
# kube-proxy 데몬셋을 살펴봤다.
# 기본으로 iptables proxy mode를 쓰는건 알지만.. 안보여서 뒤적거려봤다...
❯ kubectl -n kube-system get daemonset kube-proxy -o yaml
apiVersion: apps/v1
kind: DaemonSet
...
spec:
containers:
- command:
- /bin/sh
- -c
- kube-proxy --cluster-cidr=10.32.0.0/14 --oom-score-adj=-998 --v=2 --feature-gates=InTreePluginAWSUnregister=true,InTreePluginAzureDiskUnregister=true,InTreePluginvSphereUnregister=true,DisableKubeletCloudCredentialProviders=true,UnauthenticatedHTTP2DOSMitigation=true,KMSv1=true
--iptables-sync-period=1m --iptables-min-sync-period=10s --ipvs-sync-period=1m
--ipvs-min-sync-period=10s --detect-local-mode=NodeCIDR 1>>/var/log/kube-proxy.log
2>&1
# 설정값을 어딘가에서 찾기는 애매해보여서 로그를 확인해봄
# 보면 주기적으로 iptables를 동기화 하는 것을 볼 수 있다.
eunyoung@gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6 ~ $ tail -5 /var/log/kube-proxy.log
I1102 18:30:37.833370 1 proxier.go:1494] "Reloading service iptables data" numServices=0 numEndpoints=0 numFilterChains=5 numFilterRules=2 numNATChains=4 numNATRules=5
I1102 18:30:37.844172 1 proxier.go:799] "SyncProxyRules complete" elapsed="15.285634ms"
I1102 18:32:05.617089 1 proxier.go:805] "Syncing iptables rules"
I1102 18:32:05.622239 1 proxier.go:1494] "Reloading service iptables data" numServices=7 numEndpoints=8 numFilterChains=6 numFilterRules=4 numNATChains=4 numNATRules=12
I1102 18:32:05.639250 1 proxier.go:799] "SyncProxyRules complete" elapsed="22.197176ms"
파드간 통신
❯ cat test-pod.yaml
apiVersion: v1
kind: Pod
metadata:
name: pod1
spec:
nodeName: gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6
containers:
- name: pod1
image: nicolaka/netshoot
command: ["tail"]
args: ["-f", "/dev/null"]
terminationGracePeriodSeconds: 0
---
apiVersion: v1
kind: Pod
metadata:
name: pod2
spec:
nodeName: gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj
containers:
- name: pod2
image: nicolaka/netshoot
command: ["tail"]
args: ["-f", "/dev/null"]
terminationGracePeriodSeconds: 0
❯ k apply -f test-pod.yaml
# pod1에 접속하여 pod로 ping
❯ k exec -it pod1 -- /bin/sh
~ # ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
2: eth0@if7: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1460 qdisc noqueue state UP group default
link/ether 5a:ee:33:89:1f:6e brd ff:ff:ff:ff:ff:ff link-netnsid 0
inet 10.32.0.5/24 brd 10.32.0.255 scope global eth0
valid_lft forever preferred_lft forever
# pod2 ip = 10.32.1.13
~ # ping -c 3 10.32.1.13
PING 10.32.1.13 (10.32.1.13) 56(84) bytes of data.
64 bytes from 10.32.1.13: icmp_seq=1 ttl=62 time=0.746 ms
64 bytes from 10.32.1.13: icmp_seq=2 ttl=62 time=0.179 ms
64 bytes from 10.32.1.13: icmp_seq=3 ttl=62 time=0.192 ms
--- 10.32.1.13 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 2054ms
rtt min/avg/max/mdev = 0.179/0.372/0.746/0.264 ms
# pod2에서는 tcpdump로 확인해보면 arp호출해서 mac주소를 알아낸 뒤 바로 pod2로 온다.
~ # tcpdump
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), snapshot length 262144 bytes
20:01:26.435245 ARP, Request who-has pod2 tell 10.32.1.1, length 28
20:01:26.435263 ARP, Reply pod2 is-at 02:82:6f:14:53:01 (oui Unknown), length 28
20:01:26.435267 IP 10.32.0.5 > pod2: ICMP echo request, id 15, seq 1, length 64
20:01:26.435284 IP pod2 > 10.32.0.5: ICMP echo reply, id 15, seq 1, length 64
# pod1이 있는 노드에서 떠봤을 때에도, 각 pod ip로 확인 가능하다.
root@gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6:~# tcpdump icmp
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), snapshot length 262144 bytes
20:01:26.434972 IP 10.32.0.5 > 10.32.1.13: ICMP echo request, id 15, seq 1, length 64
20:01:26.435619 IP 10.32.1.13 > 10.32.0.5: ICMP echo reply, id 15, seq 1, length 64
20:01:27.460871 IP 10.32.0.5 > 10.32.1.13: ICMP echo request, id 15, seq 2, length 64
20:01:27.461013 IP 10.32.1.13 > 10.32.0.5: ICMP echo reply, id 15, seq 2, length 64
외부에서 NodePort로 확인
❯ cat svc.yaml
...
spec:
ports:
- name: svc-webport
port: 9000
targetPort: 80
nodePort: 30007
selector:
app: webpod
type: NodePort
❯ k apply -f svc.yaml
❯ k get no -owide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6 Ready <none> 8h v1.30.5-gke.1355000 10.0.0.37 34.47.122.177 Container-Optimized OS from Google 6.1.100+ containerd://1.7.22
gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj Ready <none> 8h v1.30.5-gke.1355000 10.0.0.38 34.64.32.222 Container-Optimized OS from Google 6.1.100+ containerd://1.7.22
# 로컬에서 확인
❯ curl 34.47.122.177:30007
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
...
# webpod가 있는 노드에서 확인
root@gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6:~# tcpdump src 121.167.232.64 and port 30007
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), snapshot length 262144 bytes
00:24:28.800102 IP 121.167.232.64.58595 > gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6.asia-northeast3-a.c.dhha-team-20221221.internal.30007: Flags [S], seq 767436524, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 2639455605 ecr 0,sackOK,eol], length 0
00:24:28.804698 IP 121.167.232.64.58595 > gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6.asia-northeast3-a.c.dhha-team-20221221.internal.30007: Flags [.], ack 1151346211, win 2068, options [nop,nop,TS val 2639455617 ecr 3362636754], length 0
00:24:28.804698 IP 121.167.232.64.58595 > gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6.asia-northeast3-a.c.dhha-team-20221221.internal.30007: Flags [P.], seq 0:82, ack 1, win 2068, options [nop,nop,TS val 2639455617 ecr 3362636754], length 82
# webpod에서 tcpdump 확인
00:23:03.468025 IP 10.32.0.1.21526 > webpod1.80: Flags [S], seq 4287812138, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 4117526044 ecr 0,sackOK,eol], length 0
00:23:03.468045 IP webpod1.80 > 10.32.0.1.21526: Flags [S.], seq 2724611818, ack 4287812139, win 21120, options [mss 1420,sackOK,TS val 3362551422 ecr 4117526044,nop,wscale 7], length 0
00:23:03.474813 IP 10.32.0.1.21526 > webpod1.80: Flags [.], ack 1, win 2068, options [nop,nop,TS val 4117526051 ecr 3362551422], length 0
00:23:03.474815 IP 10.32.0.1.21526 > webpod1.80: Flags [P.], seq 1:83, ack 1, win 2068, options [nop,nop,TS val 4117526051 ecr 3362551422], length 82: HTTP: GET / HTTP/1.1
00:23:03.474850 IP webpod1.80 > 10.32.0.1.21526: Flags [.], ack 83, win 165, options [nop,nop,TS val 3362551429 ecr 4117526051], length 0
00:23:03.475059 IP webpod1.80 > 10.32.0.1.21526: Flags [P.], seq 1:239, ack 83, win 165, options [nop,nop,TS val 3362551429 ecr 4117526051], length 238: HTTP: HTTP/1.1 200 OK
00:23:03.475140 IP webpod1.80 > 10.32.0.1.21526: Flags [P.], seq 239:854, ack 83, win 165, options [nop,nop,TS val 3362551429 ecr 4117526051], length 615: HTTP
# 위에서 본 10.32.0.1 ip를 node에서 확인해보면, pod의 gateway ip이자 node에서 veth pair로 pod interface와 연결된 ip인 것을 확일 할 수 있다.
eunyoung@gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6 ~ $ ip a
...
11: vethe73c1f42@if2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1460 qdisc noqueue state UP group default
link/ether 6e:be:4e:b1:b4:50 brd ff:ff:ff:ff:ff:ff link-netns cni-9323f0e6-2ca0-4b11-6fa6-e49b780e505f
inet 10.32.0.1/32 scope global vethe73c1f42
valid_lft forever preferred_lft forever
inet6 fe80::6cbe:4eff:feb1:b450/64 scope link
valid_lft forever preferred_lft forever
# webpod가 있는 노드가 아닌 다른 노드를 호출
❯ curl 34.64.32.222:30007
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
# 호출된 노드를 확인해보면, 외부 ip로 들어온 것을 확인 할 수 있다.
root@gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj:~# tcpdump src 121.167.232.64 and port 30007
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), snapshot length 262144 bytes
00:31:18.547435 IP 121.167.232.64.58637 > gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.30007: Flags [S], seq 3561575594, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 2109264739 ecr 0,sackOK,eol], length 0
00:31:18.554140 IP 121.167.232.64.58637 > gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.30007: Flags [.], ack 2337967013, win 2068, options [nop,nop,TS val 2109264745 ecr 1030989870], length 0
00:31:18.554595 IP 121.167.232.64.58637 > gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.30007: Flags [P.], seq 0:81, ack 1, win 2068, options [nop,nop,TS val 2109264745 ecr 1030989870], length 81
# 이제 들어온 노드의 ip로 nat가 되었을텐, 들어온 노드 내부 ip를 src로 잡아서 보면.. (이 부분은 예전에 [service1](https://nyoung08.github.io/study/2024/09/29/service1/)에서도 쓴 적 있는 iptables를 따라가면 된다)
# 그 노드에서 바로 pod ip를 찍는 것을 볼 수 있다.
root@gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-3dm6:~# tcpdump src 10.0.0.38
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), snapshot length 262144 bytes
00:31:18.547814 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > 10.32.0.9.http: Flags [S], seq 3561575594, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 2109264739 ecr 0,sackOK,eol], length 0
00:31:18.554172 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > 10.32.0.9.http: Flags [.], ack 2337967013, win 2068, options [nop,nop,TS val 2109264745 ecr 1030989870], length 0
00:31:18.554614 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > 10.32.0.9.http: Flags [P.], seq 0:81, ack 1, win 2068, options [nop,nop,TS val 2109264745 ecr 1030989870], length 81: HTTP: GET / HTTP/1.1
00:31:18.559327 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > 10.32.0.9.http: Flags [.], ack 854, win 2054, options [nop,nop,TS val 2109264751 ecr 1030989877], length 0
00:31:18.559781 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > 10.32.0.9.http: Flags [F.], seq 81, ack 854, win 2054, options [nop,nop,TS val 2109264751 ecr 1030989877], length 0
00:31:18.564458 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > 10.32.0.9.http: Flags [.], ack 855, win 2054, options [nop,nop,TS val 2109264756 ecr 1030989882], length 0
# webpod에서 확인해보면 node에서 오는 것으로 보인다.
00:31:18.547856 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > webpod1.80: Flags [S], seq 3561575594, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 2109264739 ecr 0,sackOK,eol], length 0
00:31:18.547878 IP webpod1.80 > gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974: Flags [S.], seq 2337967012, ack 3561575595, win 21120, options [mss 1420,sackOK,TS val 1030989870 ecr 2109264739,nop,wscale 7], length 0
00:31:18.554177 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > webpod1.80: Flags [.], ack 1, win 2068, options [nop,nop,TS val 2109264745 ecr 1030989870], length 0
00:31:18.554619 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > webpod1.80: Flags [P.], seq 1:82, ack 1, win 2068, options [nop,nop,TS val 2109264745 ecr 1030989870], length 81: HTTP: GET / HTTP/1.1
00:31:18.554631 IP webpod1.80 > gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974: Flags [.], ack 82, win 165, options [nop,nop,TS val 1030989876 ecr 2109264745], length 0
00:31:18.554735 IP webpod1.80 > gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974: Flags [P.], seq 1:239, ack 82, win 165, options [nop,nop,TS val 1030989876 ecr 2109264745], length 238: HTTP: HTTP/1.1 200 OK
00:31:18.554825 IP webpod1.80 > gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974: Flags [P.], seq 239:854, ack 82, win 165, options [nop,nop,TS val 1030989877 ecr 2109264745], length 615: HTTP
00:31:18.559341 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > webpod1.80: Flags [.], ack 854, win 2054, options [nop,nop,TS val 2109264751 ecr 1030989877], length 0
00:31:18.559784 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > webpod1.80: Flags [F.], seq 82, ack 854, win 2054, options [nop,nop,TS val 2109264751 ecr 1030989877], length 0
00:31:18.559848 IP webpod1.80 > gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974: Flags [F.], seq 854, ack 83, win 165, options [nop,nop,TS val 1030989882 ecr 2109264751], length 0
00:31:18.564463 IP gke-nyoung-cluster-1-nyoung-nodepool--d89bf7aa-prpj.asia-northeast3-a.c.dhha-team-20221221.internal.18974 > webpod1.80: Flags [.], ack 855, win 2054, options [nop,nop,TS val 2109264756 ecr 1030989882], length 0