K8S 的proxy是运行在每一个node上面的,它主要起到代理作用,分为两种模式:
一种用户态:iptable
一种内核态iptable:
通过调用reflector函数,进行watch,watch service和endpoint资源,如发现有新建或者变动,则进行iptable的同步,proxier.syncProxyRules(),存储servic;endpoint也是如此,同步iptable和存储endpoint;
运行以后每隔30s,都会调用函数同步iptable
s.Proxier.SyncLoop();
// SyncLoop runs periodic work. This is expected to run as a goroutine or as the main loop of the app. It does not return.
func (proxier *Proxier) SyncLoop() {
t := time.NewTicker(proxier.syncPeriod)
defer t.Stop()
for {
<-t.C
glog.V(6).Infof("Periodic sync")
proxier.Sync()
}
}
func (proxier *Proxier) Sync() {
proxier.mu.Lock()
defer proxier.mu.Unlock()
proxier.syncProxyRules()
}
//execConntrackTool executes conntrack tool using given parameters func (proxier *Proxier) execConntrackTool(parameters ...string) error { conntrackPath, err := proxier.exec.LookPath("conntrack") if err != nil { return fmt.Errorf("Error looking for path of conntrack: %v", err) } output, err := proxier.exec.Command(conntrackPath, parameters...).CombinedOutput() if err != nil { return fmt.Errorf("Conntrack command returned: %q, error message: %s", string(output), err) } return nil } // This is where all of the iptables-save/restore calls happen. // The only other iptables rules are those that are setup in iptablesInit() // assumes proxier.mu is held func (proxier *Proxier) syncProxyRules() { start := time.Now() defer func() { glog.V(4).Infof("syncProxyRules took %v", time.Since(start)) }() // don't sync rules till we've received services and endpoints if !proxier.haveReceivedEndpointsUpdate || !proxier.haveReceivedServiceUpdate { glog.V(2).Info("Not syncing iptables until Services and Endpoints have been received from master") return } glog.V(3).Infof("Syncing iptables rules") // Create and link the kube services chain. { tablesNeedServicesChain := []utiliptables.Table{utiliptables.TableFilter, utiliptables.TableNAT} for _, table := range tablesNeedServicesChain { if _, err := proxier.iptables.EnsureChain(table, kubeServicesChain); err != nil { glog.Errorf("Failed to ensure that %s chain %s exists: %v", table, kubeServicesChain, err) return } } tableChainsNeedJumpServices := []struct { table utiliptables.Table chain utiliptables.Chain }{ {utiliptables.TableFilter, utiliptables.ChainOutput}, {utiliptables.TableNAT, utiliptables.ChainOutput}, {utiliptables.TableNAT, utiliptables.ChainPrerouting}, } comment := "kubernetes service portals" args := []string{"-m", "comment", "--comment", comment, "-j", string(kubeServicesChain)} for _, tc := range tableChainsNeedJumpServices { if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, tc.table, tc.chain, args...); err != nil { glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", tc.table, tc.chain, kubeServicesChain, err) return } } } // Create and link the kube postrouting chain. { if _, err := proxier.iptables.EnsureChain(utiliptables.TableNAT, kubePostroutingChain); err != nil { glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, kubePostroutingChain, err) return } comment := "kubernetes postrouting rules" args := []string{"-m", "comment", "--comment", comment, "-j", string(kubePostroutingChain)} if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, kubePostroutingChain, err) return } } // Get iptables-save output so we can check for existing chains and rules. // This will be a map of chain name to chain with rules as stored in iptables-save/iptables-restore existingFilterChains := make(map[utiliptables.Chain]string) iptablesSaveRaw, err := proxier.iptables.Save(utiliptables.TableFilter) if err != nil { // if we failed to get any rules glog.Errorf("Failed to execute iptables-save, syncing all rules: %v", err) } else { // otherwise parse the output existingFilterChains = utiliptables.GetChainLines(utiliptables.TableFilter, iptablesSaveRaw) } existingNATChains := make(map[utiliptables.Chain]string) iptablesSaveRaw, err = proxier.iptables.Save(utiliptables.TableNAT) if err != nil { // if we failed to get any rules glog.Errorf("Failed to execute iptables-save, syncing all rules: %v", err) } else { // otherwise parse the output existingNATChains = utiliptables.GetChainLines(utiliptables.TableNAT, iptablesSaveRaw) } filterChains := bytes.NewBuffer(nil) filterRules := bytes.NewBuffer(nil) natChains := bytes.NewBuffer(nil) natRules := bytes.NewBuffer(nil) // Write table headers. writeLine(filterChains, "*filter") writeLine(natChains, "*nat") // Make sure we keep stats for the top-level chains, if they existed // (which most should have because we created them above). if chain, ok := existingFilterChains[kubeServicesChain]; ok { writeLine(filterChains, chain) } else { writeLine(filterChains, utiliptables.MakeChainLine(kubeServicesChain)) } if chain, ok := existingNATChains[kubeServicesChain]; ok { writeLine(natChains, chain) } else { writeLine(natChains, utiliptables.MakeChainLine(kubeServicesChain)) } if chain, ok := existingNATChains[kubeNodePortsChain]; ok { writeLine(natChains, chain) } else { writeLine(natChains, utiliptables.MakeChainLine(kubeNodePortsChain)) } if chain, ok := existingNATChains[kubePostroutingChain]; ok { writeLine(natChains, chain) } else { writeLine(natChains, utiliptables.MakeChainLine(kubePostroutingChain)) } if chain, ok := existingNATChains[KubeMarkMasqChain]; ok { writeLine(natChains, chain) } else { writeLine(natChains, utiliptables.MakeChainLine(KubeMarkMasqChain)) } // Install the kubernetes-specific postrouting rules. We use a whole chain for // this so that it is easier to flush and change, for example if the mark // value should ever change. writeLine(natRules, []string{ "-A", string(kubePostroutingChain), "-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`, "-m", "mark", "--mark", proxier.masqueradeMark, "-j", "MASQUERADE", }...) // Install the kubernetes-specific masquerade mark rule. We use a whole chain for // this so that it is easier to flush and change, for example if the mark // value should ever change. writeLine(natRules, []string{ "-A", string(KubeMarkMasqChain), "-j", "MARK", "--set-xmark", proxier.masqueradeMark, }...) // Accumulate NAT chains to keep. activeNATChains := map[utiliptables.Chain]bool{} // use a map as a set // Accumulate the set of local ports that we will be holding open once this update is complete replacementPortsMap := map[localPort]closeable{} // Build rules for each service. for svcName, svcInfo := range proxier.serviceMap { protocol := strings.ToLower(string(svcInfo.protocol)) // Create the per-service chain, retaining counters if possible. svcChain := servicePortChainName(svcName, protocol) if chain, ok := existingNATChains[svcChain]; ok { writeLine(natChains, chain) } else { writeLine(natChains, utiliptables.MakeChainLine(svcChain)) } activeNATChains[svcChain] = true // Capture the clusterIP. args := []string{ "-A", string(kubeServicesChain), "-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcName.String()), "-m", protocol, "-p", protocol, "-d", fmt.Sprintf("%s/32", svcInfo.clusterIP.String()), "--dport", fmt.Sprintf("%d", svcInfo.port), } if proxier.masqueradeAll { writeLine(natRules, append(args, "-j", string(KubeMarkMasqChain))...) } if len(proxier.clusterCIDR) > 0 { writeLine(natRules, append(args, "! -s", proxier.clusterCIDR, "-j", string(KubeMarkMasqChain))...) } writeLine(natRules, append(args, "-j", string(svcChain))...) // Capture externalIPs. for _, externalIP := range svcInfo.externalIPs { // If the "external" IP happens to be an IP that is local to this // machine, hold the local port open so no other process can open it // (because the socket might open but it would never work). if local, err := isLocalIP(externalIP); err != nil { glog.Errorf("can't determine if IP is local, assuming not: %v", err) } else if local { lp := localPort{ desc: "externalIP for " + svcName.String(), ip: externalIP, port: svcInfo.port, protocol: protocol, } if proxier.portsMap[lp] != nil { glog.V(4).Infof("Port %s was open before and is still needed", lp.String()) replacementPortsMap[lp] = proxier.portsMap[lp] } else { socket, err := openLocalPort(&lp) if err != nil { glog.Errorf("can't open %s, skipping this externalIP: %v", lp.String(), err) continue } replacementPortsMap[lp] = socket } } // We're holding the port, so it's OK to install iptables rules. args := []string{ "-A", string(kubeServicesChain), "-m", "comment", "--comment", fmt.Sprintf(`"%s external IP"`, svcName.String()), "-m", protocol, "-p", protocol, "-d", fmt.Sprintf("%s/32", externalIP), "--dport", fmt.Sprintf("%d", svcInfo.port), } // We have to SNAT packets to external IPs. writeLine(natRules, append(args, "-j", string(KubeMarkMasqChain))...) // Allow traffic for external IPs that does not come from a bridge (i.e. not from a container) // nor from a local process to be forwarded to the service. // This rule roughly translates to "all traffic from off-machine". // This is imperfect in the face of network plugins that might not use a bridge, but we can revisit that later. externalTrafficOnlyArgs := append(args, "-m", "physdev", "!", "--physdev-is-in", "-m", "addrtype", "!", "--src-type", "LOCAL") writeLine(natRules, append(externalTrafficOnlyArgs, "-j", string(svcChain))...) dstLocalOnlyArgs := append(args, "-m", "addrtype", "--dst-type", "LOCAL") // Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local. // This covers cases like GCE load-balancers which get added to the local routing table. writeLine(natRules, append(dstLocalOnlyArgs, "-j", string(svcChain))...) } // Capture load-balancer ingress. for _, ingress := range svcInfo.loadBalancerStatus.Ingress { if ingress.IP != "" { // create service firewall chain fwChain := serviceFirewallChainName(svcName, protocol) if chain, ok := existingNATChains[fwChain]; ok { writeLine(natChains, chain) } else { writeLine(natChains, utiliptables.MakeChainLine(fwChain)) } activeNATChains[fwChain] = true // The service firewall rules are created based on ServiceSpec.loadBalancerSourceRanges field. // This currently works for loadbalancers that preserves source ips. // For loadbalancers which direct traffic to service NodePort, the firewall rules will not apply. args := []string{ "-A", string(kubeServicesChain), "-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcName.String()), "-m", protocol, "-p", protocol, "-d", fmt.Sprintf("%s/32", ingress.IP), "--dport", fmt.Sprintf("%d", svcInfo.port), } // jump to service firewall chain writeLine(natRules, append(args, "-j", string(fwChain))...) args = []string{ "-A", string(fwChain), "-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcName.String()), } // We have to SNAT packets from external IPs. writeLine(natRules, append(args, "-j", string(KubeMarkMasqChain))...) if len(svcInfo.loadBalancerSourceRanges) == 0 { // allow all sources, so jump directly to KUBE-SVC chain writeLine(natRules, append(args, "-j", string(svcChain))...) } else { // firewall filter based on each source range allowFromNode := false for _, src := range svcInfo.loadBalancerSourceRanges { writeLine(natRules, append(args, "-s", src, "-j", string(svcChain))...) // ignore error because it has been validated _, cidr, _ := net.ParseCIDR(src) if cidr.Contains(proxier.nodeIP) { allowFromNode = true } } // generally, ip route rule was added to intercept request to loadbalancer vip from the // loadbalancer's backend hosts. In this case, request will not hit the loadbalancer but loop back directly. // Need to add the following rule to allow request on host. if allowFromNode { writeLine(natRules, append(args, "-s", fmt.Sprintf("%s/32", ingress.IP), "-j", string(svcChain))...) } } // If the packet was able to reach the end of firewall chain, then it did not get DNATed. // It means the packet cannot go thru the firewall, then mark it for DROP writeLine(natRules, append(args, "-j", string(KubeMarkDropChain))...) } } // Capture nodeports. If we had more than 2 rules it might be // worthwhile to make a new per-service chain for nodeport rules, but // with just 2 rules it ends up being a waste and a cognitive burden. if svcInfo.nodePort != 0 { // Hold the local port open so no other process can open it // (because the socket might open but it would never work). lp := localPort{ desc: "nodePort for " + svcName.String(), ip: "", port: svcInfo.nodePort, protocol: protocol, } if proxier.portsMap[lp] != nil { glog.V(4).Infof("Port %s was open before and is still needed", lp.String()) replacementPortsMap[lp] = proxier.portsMap[lp] } else { socket, err := openLocalPort(&lp) if err != nil { glog.Errorf("can't open %s, skipping this nodePort: %v", lp.String(), err) continue } replacementPortsMap[lp] = socket } // We're holding the port, so it's OK to install iptables rules. args := []string{ "-A", string(kubeNodePortsChain), "-m", "comment", "--comment", svcName.String(), "-m", protocol, "-p", protocol, "--dport", fmt.Sprintf("%d", svcInfo.nodePort), } // Nodeports need SNAT. writeLine(natRules, append(args, "-j", string(KubeMarkMasqChain))...) // Jump to the service chain. writeLine(natRules, append(args, "-j", string(svcChain))...) } // If the service has no endpoints then reject packets. if len(proxier.endpointsMap[svcName]) == 0 { writeLine(filterRules, "-A", string(kubeServicesChain), "-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcName.String()), "-m", protocol, "-p", protocol, "-d", fmt.Sprintf("%s/32", svcInfo.clusterIP.String()), "--dport", fmt.Sprintf("%d", svcInfo.port), "-j", "REJECT", ) continue } // Generate the per-endpoint chains. We do this in multiple passes so we // can group rules together. endpoints := make([]string, 0) endpointChains := make([]utiliptables.Chain, 0) for _, ep := range proxier.endpointsMap[svcName] { endpoints = append(endpoints, ep) endpointChain := servicePortEndpointChainName(svcName, protocol, ep) endpointChains = append(endpointChains, endpointChain) // Create the endpoint chain, retaining counters if possible. if chain, ok := existingNATChains[utiliptables.Chain(endpointChain)]; ok { writeLine(natChains, chain) } else { writeLine(natChains, utiliptables.MakeChainLine(endpointChain)) } activeNATChains[endpointChain] = true } // First write session affinity rules, if applicable. if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP { for _, endpointChain := range endpointChains { writeLine(natRules, "-A", string(svcChain), "-m", "comment", "--comment", svcName.String(), "-m", "recent", "--name", string(endpointChain), "--rcheck", "--seconds", fmt.Sprintf("%d", svcInfo.stickyMaxAgeSeconds), "--reap", "-j", string(endpointChain)) } } // Now write loadbalancing & DNAT rules. n := len(endpointChains) for i, endpointChain := range endpointChains { // Balancing rules in the per-service chain. args := []string{ "-A", string(svcChain), "-m", "comment", "--comment", svcName.String(), } if i < (n - 1) { // Each rule is a probabilistic match. args = append(args, "-m", "statistic", "--mode", "random", "--probability", fmt.Sprintf("%0.5f", 1.0/float64(n-i))) } // The final (or only if n == 1) rule is a guaranteed match. args = append(args, "-j", string(endpointChain)) writeLine(natRules, args...) // Rules in the per-endpoint chain. args = []string{ "-A", string(endpointChain), "-m", "comment", "--comment", svcName.String(), } // Handle traffic that loops back to the originator with SNAT. // Technically we only need to do this if the endpoint is on this // host, but we don't have that information, so we just do this for // all endpoints. // TODO: if we grow logic to get this node's pod CIDR, we can use it. writeLine(natRules, append(args, "-s", fmt.Sprintf("%s/32", strings.Split(endpoints[i], ":")[0]), "-j", string(KubeMarkMasqChain))...) // Update client-affinity lists. if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP { args = append(args, "-m", "recent", "--name", string(endpointChain), "--set") } // DNAT to final destination. args = append(args, "-m", protocol, "-p", protocol, "-j", "DNAT", "--to-destination", endpoints[i]) writeLine(natRules, args...) } } // Delete chains no longer in use. for chain := range existingNATChains { if !activeNATChains[chain] { chainString := string(chain) if !strings.HasPrefix(chainString, "KUBE-SVC-") && !strings.HasPrefix(chainString, "KUBE-SEP-") && !strings.HasPrefix(chainString, "KUBE-FW-") { // Ignore chains that aren't ours. continue } // We must (as per iptables) write a chain-line for it, which has // the nice effect of flushing the chain. Then we can remove the // chain. writeLine(natChains, existingNATChains[chain]) writeLine(natRules, "-X", chainString) } } // Finally, tail-call to the nodeports chain. This needs to be after all // other service portal rules. writeLine(natRules, "-A", string(kubeServicesChain), "-m", "comment", "--comment", `"kubernetes service nodeports; NOTE: this must be the last rule in this chain"`, "-m", "addrtype", "--dst-type", "LOCAL", "-j", string(kubeNodePortsChain)) // Write the end-of-table markers. writeLine(filterRules, "COMMIT") writeLine(natRules, "COMMIT") // Sync rules. // NOTE: NoFlushTables is used so we don't flush non-kubernetes chains in the table. filterLines := append(filterChains.Bytes(), filterRules.Bytes()...) natLines := append(natChains.Bytes(), natRules.Bytes()...) lines := append(filterLines, natLines...) glog.V(3).Infof("Restoring iptables rules: %s", lines) err = proxier.iptables.RestoreAll(lines, utiliptables.NoFlushTables, utiliptables.RestoreCounters) if err != nil { glog.Errorf("Failed to execute iptables-restore: %v", err) // Revert new local ports. revertPorts(replacementPortsMap, proxier.portsMap) return } // Close old local ports and save new ones. for k, v := range proxier.portsMap { if replacementPortsMap[k] == nil { v.Close() } } proxier.portsMap = replacementPortsMap // Clean up the older SNAT rule which was directly in POSTROUTING. // TODO(thockin): Remove this for v1.3 or v1.4. args := []string{ "-m", "comment", "--comment", "kubernetes service traffic requiring SNAT", "-m", "mark", "--mark", oldIptablesMasqueradeMark, "-j", "MASQUERADE", } if err := proxier.iptables.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { if !utiliptables.IsNotFoundError(err) { glog.Errorf("Error removing old-style SNAT rule: %v", err) } } }