Linuxext -> Linux网络服务 -> Linux下解决大量的TIME_WAIT

原贴:http://www.linuxext.com/bbs/simple/index.php?t14822.html

Linuxext -> Linux网络服务 -> Linux下解决大量的TIME_WAIT [打印本页]

[root@web02 ~]# vi /etc/sysctl.conf
新增如下内容:
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_syncookies=1
使内核参数生效:
[root@web02 ~]# sysctl -p
readme:
net.ipv4.tcp_syncookies=1 打开TIME-WAIT套接字重用功能,对于存在大量连接的Web服务器非常有效。
net.ipv4.tcp_tw_recyle=1
net.ipv4.tcp_tw_reuse=1 减少处于FIN-WAIT-2连接状态的时间,使系统可以处理更多的连接。
net.ipv4.tcp_fin_timeout=30 减少TCP KeepAlive连接侦测的时间,使系统可以处理更多的连接。
net.ipv4.tcp_keepalive_time=1800 增加TCP SYN队列长度,使系统可以处理更多的并发连接。
net.ipv4.tcp_max_syn_backlog=8192

转自:红联Linux 
static int mmc_init_card(struct mmc_host *host, u32 ocr, 1607 struct mmc_card *oldcard) 1608 { 1609 struct mmc_card *card; 1610 int err; 1611 u32 cid[4]; 1612 u32 rocr; 1613 1614 WARN_ON(!host->claimed); 1615 1616 /* Set correct bus mode for MMC before attempting init */ 1617 if (!mmc_host_is_spi(host)) 1618 mmc_set_bus_mode(host, MMC_BUSMODE_OPENDRAIN); 1619 1620 /* 1621 * Since we're changing the OCR value, we seem to 1622 * need to tell some cards to go back to the idle 1623 * state. We wait 1ms to give cards time to 1624 * respond. 1625 * mmc_go_idle is needed for eMMC that are asleep 1626 */ 1627 mmc_go_idle(host); 1628 1629 /* The extra bit indicates that we support high capacity */ 1630 err = mmc_send_op_cond(host, ocr | (1 << 30), &rocr); 1631 if (err) 1632 goto err; 1633 1634 /* 1635 * For SPI, enable CRC as appropriate. 1636 */ 1637 if (mmc_host_is_spi(host)) { 1638 err = mmc_spi_set_crc(host, use_spi_crc); 1639 if (err) 1640 goto err; 1641 } 1642 1643 /* 1644 * Fetch CID from card. 1645 */ 1646 err = mmc_send_cid(host, cid); 1647 if (err) 1648 goto err; 1649 1650 if (oldcard) { 1651 if (memcmp(cid, oldcard->raw_cid, sizeof(cid)) != 0) { 1652 pr_debug("%s: Perhaps the card was replaced\n", 1653 mmc_hostname(host)); 1654 err = -ENOENT; 1655 goto err; 1656 } 1657 1658 card = oldcard; 1659 } else { 1660 /* 1661 * Allocate card structure. 1662 */ 1663 card = mmc_alloc_card(host, &mmc_type); 1664 if (IS_ERR(card)) { 1665 err = PTR_ERR(card); 1666 goto err; 1667 } 1668 1669 card->ocr = ocr; 1670 card->type = MMC_TYPE_MMC; 1671 card->rca = 1; 1672 memcpy(card->raw_cid, cid, sizeof(card->raw_cid)); 1673 } 1674 1675 /* 1676 * Call the optional HC's init_card function to handle quirks. 1677 */ 1678 if (host->ops->init_card) 1679 host->ops->init_card(host, card); 1680 1681 /* 1682 * For native busses: set card RCA and quit open drain mode. 1683 */ 1684 if (!mmc_host_is_spi(host)) { 1685 err = mmc_set_relative_addr(card); 1686 if (err) 1687 goto free_card; 1688 1689 mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL); 1690 } 1691 1692 if (!oldcard) { 1693 /* 1694 * Fetch CSD from card. 1695 */ 1696 err = mmc_send_csd(card, card->raw_csd); 1697 if (err) 1698 goto free_card; 1699 1700 err = mmc_decode_csd(card); 1701 if (err) 1702 goto free_card; 1703 err = mmc_decode_cid(card); 1704 if (err) 1705 goto free_card; 1706 } 1707 1708 /* 1709 * handling only for cards supporting DSR and hosts requesting 1710 * DSR configuration 1711 */ 1712 if (card->csd.dsr_imp && host->dsr_req) 1713 mmc_set_dsr(host); 1714 1715 /* 1716 * Select card, as all following commands rely on that. 1717 */ 1718 if (!mmc_host_is_spi(host)) { 1719 err = mmc_select_card(card); 1720 if (err) 1721 goto free_card; 1722 } 1723 1724 if (!oldcard) { 1725 /* Read extended CSD. */ 1726 err = mmc_read_ext_csd(card); 1727 if (err) 1728 goto free_card; 1729 1730 /* 1731 * If doing byte addressing, check if required to do sector 1732 * addressing. Handle the case of <2GB cards needing sector 1733 * addressing. See section 8.1 JEDEC Standard JED84-A441; 1734 * ocr register has bit 30 set for sector addressing. 1735 */ 1736 if (rocr & BIT(30)) 1737 mmc_card_set_blockaddr(card); 1738 1739 /* Erase size depends on CSD and Extended CSD */ 1740 mmc_set_erase_size(card); 1741 } 1742 1743 /* 1744 * Reselect the card type since host caps could have been changed when 1745 * debugging even if the card is not new. 1746 */ 1747 mmc_select_card_type(card); 1748 1749 /* Enable ERASE_GRP_DEF. This bit is lost after a reset or power off. */ 1750 if (card->ext_csd.rev >= 3) { 1751 err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, 1752 EXT_CSD_ERASE_GROUP_DEF, 1, 1753 card->ext_csd.generic_cmd6_time); 1754 1755 if (err && err != -EBADMSG) 1756 goto free_card; 1757 1758 if (err) { 1759 /* 1760 * Just disable enhanced area off & sz 1761 * will try to enable ERASE_GROUP_DEF 1762 * during next time reinit 1763 */ 1764 card->ext_csd.enhanced_area_offset = -EINVAL; 1765 card->ext_csd.enhanced_area_size = -EINVAL; 1766 } else { 1767 card->ext_csd.erase_group_def = 1; 1768 /* 1769 * enable ERASE_GRP_DEF successfully. 1770 * This will affect the erase size, so 1771 * here need to reset erase size 1772 */ 1773 mmc_set_erase_size(card); 1774 } 1775 } 1776 1777 /* 1778 * Ensure eMMC user default partition is enabled 1779 */ 1780 if (card->ext_csd.part_config & EXT_CSD_PART_CONFIG_ACC_MASK) { 1781 card->ext_csd.part_config &= ~EXT_CSD_PART_CONFIG_ACC_MASK; 1782 err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_PART_CONFIG, 1783 card->ext_csd.part_config, 1784 card->ext_csd.part_time); 1785 if (err && err != -EBADMSG) 1786 goto free_card; 1787 } 1788 1789 /* 1790 * Enable power_off_notification byte in the ext_csd register 1791 */ 1792 if (card->ext_csd.rev >= 6) { 1793 err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, 1794 EXT_CSD_POWER_OFF_NOTIFICATION, 1795 EXT_CSD_POWER_ON, 1796 card->ext_csd.generic_cmd6_time); 1797 if (err && err != -EBADMSG) 1798 goto free_card; 1799 1800 /* 1801 * The err can be -EBADMSG or 0, 1802 * so check for success and update the flag 1803 */ 1804 if (!err) 1805 card->ext_csd.power_off_notification = EXT_CSD_POWER_ON; 1806 } 1807 1808 /* set erase_arg */ 1809 if (mmc_can_discard(card)) 1810 card->erase_arg = MMC_DISCARD_ARG; 1811 else if (mmc_can_trim(card)) 1812 card->erase_arg = MMC_TRIM_ARG; 1813 else 1814 card->erase_arg = MMC_ERASE_ARG; 1815 1816 /* 1817 * Select timing interface 1818 */ 1819 err = mmc_select_timing(card); 1820 if (err) 1821 goto free_card; 1822 1823 if (mmc_card_hs200(card)) { 1824 host->doing_init_tune = 1; 1825 1826 err = mmc_hs200_tuning(card); 1827 if (!err) 1828 err = mmc_select_hs400(card); 1829 1830 host->doing_init_tune = 0; 1831 1832 if (err) 1833 goto free_card; 1834 } else if (mmc_card_hs400es(card)) { 1835 if (host->ops->execute_hs400_tuning) { 1836 err = host->ops->execute_hs400_tuning(host, card); 1837 if (err) 1838 goto free_card; 1839 } 1840 } else { 1841 /* Select the desired bus width optionally */ 1842 err = mmc_select_bus_width(card); 1843 if (err > 0 && mmc_card_hs(card)) { 1844 err = mmc_select_hs_ddr(card); 1845 if (err) 1846 goto free_card; 1847 } 1848 } 1849 1850 /* 1851 * Choose the power class with selected bus interface 1852 */ 1853 mmc_select_powerclass(card); 1854 1855 /* 1856 * Enable HPI feature (if supported) 1857 */ 1858 if (card->ext_csd.hpi) { 1859 err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, 1860 EXT_CSD_HPI_MGMT, 1, 1861 card->ext_csd.generic_cmd6_time); 1862 if (err && err != -EBADMSG) 1863 goto free_card; 1864 if (err) { 1865 pr_warn("%s: Enabling HPI failed\n", 1866 mmc_hostname(card->host)); 1867 card->ext_csd.hpi_en = 0; 1868 } else { 1869 card->ext_csd.hpi_en = 1; 1870 } 1871 } 1872 1873 /* 1874 * If cache size is higher than 0, this indicates the existence of cache 1875 * and it can be turned on. Note that some eMMCs from Micron has been 1876 * reported to need ~800 ms timeout, while enabling the cache after 1877 * sudden power failure tests. Let's extend the timeout to a minimum of 1878 * DEFAULT_CACHE_EN_TIMEOUT_MS and do it for all cards. 1879 */ 1880 if (card->ext_csd.cache_size > 0) { 1881 unsigned int timeout_ms = MIN_CACHE_EN_TIMEOUT_MS; 1882 1883 timeout_ms = max(card->ext_csd.generic_cmd6_time, timeout_ms); 1884 err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, 1885 EXT_CSD_CACHE_CTRL, 1, timeout_ms); 1886 if (err && err != -EBADMSG) 1887 goto free_card; 1888 1889 /* 1890 * Only if no error, cache is turned on successfully. 1891 */ 1892 if (err) { 1893 pr_warn("%s: Cache is supported, but failed to turn on (%d)\n", 1894 mmc_hostname(card->host), err); 1895 card->ext_csd.cache_ctrl = 0; 1896 } else { 1897 card->ext_csd.cache_ctrl = 1; 1898 } 1899 } 1900 1901 /* 1902 * Enable Command Queue if supported. Note that Packed Commands cannot 1903 * be used with Command Queue. 1904 */ 1905 card->ext_csd.cmdq_en = false; 1906 if (card->ext_csd.cmdq_support && host->caps2 & MMC_CAP2_CQE) { 1907 err = mmc_cmdq_enable(card); 1908 if (err && err != -EBADMSG) 1909 goto free_card; 1910 if (err) { 1911 pr_warn("%s: Enabling CMDQ failed\n", 1912 mmc_hostname(card->host)); 1913 card->ext_csd.cmdq_support = false; 1914 card->ext_csd.cmdq_depth = 0; 1915 } 1916 } 1917 /* 1918 * In some cases (e.g. RPMB or mmc_test), the Command Queue must be 1919 * disabled for a time, so a flag is needed to indicate to re-enable the 1920 * Command Queue. 1921 */ 1922 card->reenable_cmdq = card->ext_csd.cmdq_en; 1923 1924 if (host->cqe_ops && !host->cqe_enabled) { 1925 err = host->cqe_ops->cqe_enable(host, card); 1926 if (!err) { 1927 host->cqe_enabled = true; 1928 1929 if (card->ext_csd.cmdq_en) { 1930 pr_info("%s: Command Queue Engine enabled\n", 1931 mmc_hostname(host)); 1932 } else { 1933 host->hsq_enabled = true; 1934 pr_info("%s: Host Software Queue enabled\n", 1935 mmc_hostname(host)); 1936 } 1937 } 1938 } 1939 1940 if (host->caps2 & MMC_CAP2_AVOID_3_3V && 1941 host->ios.signal_voltage == MMC_SIGNAL_VOLTAGE_330) { 1942 pr_err("%s: Host failed to negotiate down from 3.3V\n", 1943 mmc_hostname(host)); 1944 err = -EINVAL; 1945 goto free_card; 1946 } 1947 1948 if (!oldcard) 1949 host->card = card; 1950 1951 return 0; 1952 1953 free_card: 1954 if (!oldcard) 1955 mmc_remove_card(card); 1956 err: 1957 return err; 1958 }逐行解释下这个函数
07-25
extern int tcp_v4_rcv(struct sk_buff *skb); #if IS_ENABLED(CONFIG_IPV6) /*even though we are checking IPV6 for both Built-in and module, * but when built as module this will casue build error as * BLOG_LOCALIN_TCP is always Built-in */ extern int tcp_v6_rcv(struct sk_buff *skb); #else int tcp_v6_rcv(struct sk_buff *skb) { /* when IPv6 is not enabled we dont expect any packets here */ BUG(); } #endif static inline struct sk_buff *bcm_find_skb_by_flowid(uint32_t flowid) { /* TODO add this function later,needed for coalescing */ return NULL; } static inline int set_skb_fields(struct sk_buff *skb, BlogFcArgs_t *fc_args) { struct net_device *dev_p; struct dst_entry *dst_p; dev_p = bcm_get_netdev_by_id_nohold(fc_args->local_rx_devid); dst_p = blog_get_dstentry_by_id(fc_args->dst_entry_id); if(!dev_p || !dst_p) return -1; skb->dev = dev_p; skb_dst_set(skb, dst_p); skb->skb_iif = dev_p->ifindex; return 0; } static inline void position_skb_ptrs_to_transport(struct sk_buff *skb, BlogFcArgs_t *fc_args) { /*initialize ip & tcp header related fields in skb */ skb_set_mac_header(skb, 0); skb_set_network_header(skb, fc_args->tx_l3_offset); skb_set_transport_header(skb, fc_args->tx_l4_offset); /*position data pointer to start of TCP hdr */ skb_pull(skb,fc_args->tx_l4_offset); skb->pkt_type = PACKET_HOST; return; } static inline struct sk_buff * __bcm_tcp_prep_skb(pNBuff_t pNBuff, BlogFcArgs_t *fc_args) { struct sk_buff *skb; if(IS_FKBUFF_PTR(pNBuff)) { FkBuff_t *fkb = PNBUFF_2_FKBUFF(pNBuff); /* Translate the fkb to skb */ /* find the skb for flowid or allocate a new skb */ skb = bcm_find_skb_by_flowid(fkb->flowid); if(!skb) { skb = skb_xlate_dp(fkb, NULL); if(!skb) { goto fail; } } skb->mark=0; skb->priority=0; } else { skb = PNBUFF_2_SKBUFF(pNBuff); /* Remove any debris in the socket control block * used by IPCB,IP6CB and TCP_SKB_CB * note: not needed for fkb's as entire skb is cleared in skb_xlate_dp above */ memset(skb->cb, 0, sizeof(skb->cb)); } if (unlikely(set_skb_fields(skb, fc_args) != 0)) goto fail; position_skb_ptrs_to_transport(skb, fc_args); return skb; fail: if (skb) dev_kfree_skb_any(skb); else nbuff_free(pNBuff); return NULL; } struct sk_buff * bcm_tcp_prep_skb(pNBuff_t pNBuff, BlogFcArgs_t *fc_args) { return __bcm_tcp_prep_skb(pNBuff, fc_args); } EXPORT_SYMBOL(bcm_tcp_prep_skb); #ifdef BCM_TCP_V4_TASK static int g_bcm_tcp_task_en = 1; typedef struct { spinlock_t lock; struct sk_buff_head input_q; struct sk_buff_head process_q; struct task_struct *task; wait_queue_head_t thread_wqh; int work_avail; } bcm_tcp_queue_t; typedef struct { struct sk_buff *skb_p; } bcm_tcp_item_t; #define TCP_RCVTSK_LOCK(_c) spin_lock_bh(&((_c)->lock)) #define TCP_RCVTSK_UNLK(_c) spin_unlock_bh(&((_c)->lock)) #define MAX_BCM_TCP_INPUT_LEN (1024) #define MAX_BCM_TCP_BUDGET (256) static bcm_tcp_queue_t bcm_tcp_async_q; #define WAKEUP_BCM_TCP_TASK() do { \ wake_up_interruptible(&(bcm_tcp_async_q.thread_wqh)); \ } while (0) static inline void __bcm_tcp_enqueue(struct sk_buff *skb) { if (skb) { bcm_tcp_queue_t *async_q = (bcm_tcp_queue_t *) (&bcm_tcp_async_q); TCP_RCVTSK_LOCK(async_q); if(likely(skb_queue_len(&(async_q->input_q))< MAX_BCM_TCP_INPUT_LEN )) { skb_queue_tail(&(async_q->input_q),skb); skb = NULL; if(!(async_q->work_avail)) { async_q->work_avail = 1; WAKEUP_BCM_TCP_TASK(); } } TCP_RCVTSK_UNLK(async_q); } if(skb) __kfree_skb(skb); } void bcm_tcp_enqueue(struct sk_buff *skb) { __bcm_tcp_enqueue(skb); } EXPORT_SYMBOL(bcm_tcp_enqueue); /* inject the packet into ipv4_tcp_stack directly from the network driver */ static inline int bcm_tcp_v4_recv_queue(pNBuff_t pNBuff, struct net_device *txdev, BlogFcArgs_t *fc_args) { struct sk_buff *skb; skb = __bcm_tcp_prep_skb(pNBuff, fc_args); if(skb) { skb->protocol = htons(ETH_P_IP); if(g_bcm_tcp_task_en) /*hand over pkt to bcm_tcp_task()*/ __bcm_tcp_enqueue(skb); else { /* * bh_disable is needed to prevent deadlock on sock_lock when TCP timers * are executed */ local_bh_disable(); tcp_v4_rcv(skb); local_bh_enable(); } } return 0; } /* inject the packet into ipv6_tcp_stack directly from the network driver */ static inline int bcm_tcp_v6_recv_queue(pNBuff_t pNBuff, struct net_device *txdev, BlogFcArgs_t *fc_args) { struct sk_buff *skb; skb = __bcm_tcp_prep_skb(pNBuff, fc_args); if(skb) { skb->protocol = htons(ETH_P_IPV6); IP6CB(skb)->iif = skb->dev->ifindex; IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); if(g_bcm_tcp_task_en) /*hand over pkt to bcm_tcp_task()*/ __bcm_tcp_enqueue(skb); else { /* * bh_disable is needed to prevent deadlock on sock_lock when TCP timers * are executed */ local_bh_disable(); tcp_v6_rcv(skb); local_bh_enable(); } } return 0; } static int bcm_tcp_recv_thread_func(void *thread_data) { unsigned int budget; struct sk_buff *skb; bcm_tcp_queue_t *async_q = NULL; async_q = (bcm_tcp_queue_t *) thread_data; while (1) { wait_event_interruptible( (async_q->thread_wqh), kthread_should_stop() || (async_q->work_avail) ); if (kthread_should_stop()) { printk(KERN_INFO "kthread_should_stop detected in wfd\n"); break; } budget = MAX_BCM_TCP_BUDGET; if(skb_queue_len(&(async_q->process_q))<= MAX_BCM_TCP_BUDGET) { TCP_RCVTSK_LOCK(async_q); if(!skb_queue_empty(&(async_q->input_q))) { skb_queue_splice_tail_init(&(async_q->input_q),&(async_q->process_q)); } TCP_RCVTSK_UNLK(async_q); } /* * bh_disable is needed to prevent deadlock on sock_lock when TCP timers * are executed */ local_bh_disable(); while(likely(budget-- && (skb = __skb_dequeue(&(async_q->process_q))) )) { if(skb->protocol == htons(ETH_P_IPV6)) tcp_v6_rcv(skb); else tcp_v4_rcv(skb); } local_bh_enable(); async_q->work_avail = (!skb_queue_empty(&(async_q->process_q))) ? 1 : 0; // No more work in process queue , double check input queue. if(!async_q->work_avail) { TCP_RCVTSK_LOCK(async_q); if(!skb_queue_empty(&(async_q->input_q))) { async_q->work_avail = 1; } TCP_RCVTSK_UNLK(async_q); } /* we still have packets in Q, reschedule the task */ if (async_q->work_avail){ schedule(); } } return 0; } struct task_struct *create_bcm_tcp_task(bcm_tcp_queue_t *async_q) { struct task_struct *tsk; int cpu_num = num_online_cpus(); unsigned int bind_mask = 0x00; spin_lock_init(&async_q->lock); async_q->work_avail = 0; init_waitqueue_head(&(async_q->thread_wqh)); skb_queue_head_init(&(async_q->input_q)); skb_queue_head_init(&(async_q->process_q)); tsk = kthread_create(bcm_tcp_recv_thread_func, async_q, "bcm_tcp_task"); if (IS_ERR(tsk)) { printk(KERN_EMERG "bcm_tcp_task creation failed\n"); return NULL; } async_q->task = tsk; //AFFINITY with non-1st (wl0) and Non-last (Archer) CORE if(cpu_num>2) { struct cpumask aff_mask; int cpuid; cpumask_clear(&aff_mask); for(cpuid = 1; cpuid<=cpu_num ;cpuid++) { if(cpuid != 1 && cpuid != cpu_num) { cpumask_or(&aff_mask,&aff_mask,(cpumask_of(cpuid-1))); bind_mask |= (1<<(cpuid-1)); } } printk(" %s:%d bind_mask:0x%X\n",__FUNCTION__,__LINE__,bind_mask); set_cpus_allowed_ptr(async_q->task,&aff_mask); } wake_up_process(tsk); printk(KERN_EMERG "bcm_tcp_task created successfully with budget %d ,cpumask:0x%X\n", MAX_BCM_TCP_BUDGET,bind_mask); return tsk; } static struct proc_dir_entry *proc_bcm_tcp_recv_dir = NULL; /* /proc/bcm_tcp_recv */ static struct proc_dir_entry *proc_bcm_tcp_recv_ops_file = NULL; /* /proc/bcm_tcp_recv/operate */ static ssize_t bcm_tcp_recv_file_write(struct file *file, const char __user *buf, size_t cnt, loff_t *ppos); #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,20,0)) static const struct proc_ops bcm_tcp_recv_fops = { .proc_write = bcm_tcp_recv_file_write, }; #else static const struct file_operations bcm_tcp_recv_fops = { .owner = THIS_MODULE, .write = bcm_tcp_recv_file_write, }; #endif static ssize_t bcm_tcp_recv_file_write(struct file *file, const char __user *buf, size_t cnt, loff_t *ppos) { char input[64]=""; char ACT=' '; if(cnt < 1) return -EFAULT; if (cnt > 64) cnt = 64; if (copy_from_user(input, buf, cnt) != 0) return -EFAULT; input[cnt-1] = '\0'; /* Command format : Enable : 1 Disable : 0 */ ACT = input[0]; switch(ACT) { case '1': g_bcm_tcp_task_en= 1 ; printk("g_bcm_tcp_task_en:%d \n",g_bcm_tcp_task_en); break; case '0': g_bcm_tcp_task_en= 0; printk("g_bcm_tcp_task_en:%d \n",g_bcm_tcp_task_en); break; default: printk("g_bcm_tcp_task_en:%d \n",g_bcm_tcp_task_en); break; } return cnt; } /** * ----------------------------------------------------------------------------- * Function : initialize the proc entry * ----------------------------------------------------------------------------- */ int bcm_tcp_recv_proc_init(void) { if (!(proc_bcm_tcp_recv_dir = proc_mkdir("bcm_tcp_recv_task", NULL))) goto fail; if (!(proc_bcm_tcp_recv_ops_file = proc_create("bcm_tcp_recv_task/operate", 0644, NULL, &bcm_tcp_recv_fops))) goto fail; return 0; fail: printk("%s %s: Failed to create proc /bcm_tcp_recv_task\n", __FILE__, __FUNCTION__); remove_proc_entry("bcm_tcp_recv_task" ,NULL); return (-1); } EXPORT_SYMBOL(bcm_tcp_recv_proc_init); /** * ----------------------------------------------------------------------------- * Function : initialize the proc entry * ----------------------------------------------------------------------------- */ void bcm_tcp_recv_proc_fini(void) { remove_proc_entry("operate", proc_bcm_tcp_recv_dir); remove_proc_entry("bcm_tcp_recv", NULL); } EXPORT_SYMBOL(bcm_tcp_recv_proc_fini); static int __init bcm_tcp_init(void) { bcm_tcp_async_q.task = create_bcm_tcp_task(&bcm_tcp_async_q); if(bcm_tcp_async_q.task == NULL) BUG(); else { bcm_tcp_recv_proc_init(); } return 0; } subsys_initcall(bcm_tcp_init); #endif /* inject the packet into ipv4_tcp_stack directly from the network driver */ static inline int bcm_tcp_v4_recv(pNBuff_t pNBuff, struct net_device *txdev, BlogFcArgs_t *fc_args) { struct sk_buff *skb; skb = __bcm_tcp_prep_skb(pNBuff, fc_args); if (skb) { skb->protocol = htons(ETH_P_IP); /* * bh_disable is needed to prevent deadlock on sock_lock when TCP timers * are executed */ local_bh_disable(); tcp_v4_rcv(skb); local_bh_enable(); } return 0; } /* inject the packet into ipv6_tcp_stack directly from the network driver */ static inline int bcm_tcp_v6_recv(pNBuff_t pNBuff, struct net_device *txdev, BlogFcArgs_t *fc_args) { struct sk_buff *skb; skb = __bcm_tcp_prep_skb(pNBuff, fc_args); if (skb) { skb->protocol = htons(ETH_P_IPV6); /* always use ifindex of skb->dev as skb_dst can be set in tcp_v6_early_demux * and it's possible skb_dst is different from skb->dev, when Src IP used * for creating socket/route is not part of the outgoing interface */ IP6CB(skb)->iif = skb->dev->ifindex; IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); /*TODO check if we need to consider any IPV6 options */ /* * bh_disable is needed to prevent deadlock on sock_lock when TCP timers * are executed */ local_bh_disable(); tcp_v6_rcv(skb); local_bh_enable(); } return 0; } static int bcm_tcp_recv(pNBuff_t pNBuff, struct net_device *txdev) { /* The expectation is that this dev_hard_xmit() function will never be called. Instead the function with args parameter (i.e. bcm_tcp_recv_args) would be invoked */ BUG(); return 0; } int bcm_tcp_recv_args(pNBuff_t pNBuff, struct net_device *txdev, BlogFcArgs_t *fc_args) { if (fc_args->tx_is_ipv4) { if (fc_args->use_tcplocal_xmit_enq_fn) { return bcm_tcp_v4_recv_queue(pNBuff, txdev, fc_args); } else { return bcm_tcp_v4_recv(pNBuff, txdev, fc_args); } } else { if (fc_args->use_tcplocal_xmit_enq_fn) { return bcm_tcp_v6_recv_queue(pNBuff, txdev, fc_args); } else { return bcm_tcp_v6_recv(pNBuff, txdev, fc_args); } } return 0; } static const struct net_device_ops bcm_tcp_netdev_ops = { .ndo_open = NULL, .ndo_stop = NULL, .ndo_start_xmit = (HardStartXmitFuncP)bcm_tcp_recv, .ndo_set_mac_address = NULL, .ndo_do_ioctl = NULL, .ndo_tx_timeout = NULL, .ndo_get_stats = NULL, .ndo_change_mtu = NULL }; struct net_device *blogtcp_local_dev=NULL; static void bcm_blogtcp_dev_setup(struct net_device *dev) { dev->type = ARPHRD_RAWIP; dev->mtu = BCM_MAX_MTU_PAYLOAD_SIZE; dev->netdev_ops = &bcm_tcp_netdev_ops; bcm_netdev_ext_field_set(dev, blog_stats_flags, BLOG_DEV_STAT_FLAG_INCLUDE_ALL); bcm_netdev_ext_field_set(dev, dev_xmit_args, bcm_tcp_recv_args); netdev_accel_tx_fkb_set(dev); } void bcm_tcp_register_netdev(void) { int ret; blogtcp_local_dev = alloc_netdev(0, "blogtcp_local", NET_NAME_UNKNOWN, bcm_blogtcp_dev_setup); if ( blogtcp_local_dev ) { ret = register_netdev(blogtcp_local_dev); if (ret) { printk(KERN_ERR "blogtcp_local register_netdev failed\n"); free_netdev(blogtcp_local_dev); blogtcp_local_dev = NULL; } else printk("blogtcp_local netdev registered successfully \n"); } } inline static int encap_offset(struct sk_buff *skb, uint32_t * encap) { /*start from innermost IP always */ int offset = skb->transport_header - skb->network_header; *encap = TYPE_IP; return offset; } int bcm_tcp_blog_emit(struct sk_buff *skb, struct sock *sk) { if(skb->blog_p && skb->blog_p->l2_mode) { blog_skip(skb,blog_skip_reason_l2_local_termination); } else if( (sk && sk->sk_state == TCP_ESTABLISHED) && skb->blog_p && (skb->blog_p->rx.info.bmap.ETH_802x == 1)) { struct net_device *tmpdev; uint32_t encap ; int offset = encap_offset(skb, &encap); if(skb->dev == NULL) { /* Error */ return -1; } skb_push(skb,offset); tmpdev = skb->dev; skb->dev = blogtcp_local_dev; skb->blog_p->local_rx_devid = bcm_netdev_ext_field_get(tmpdev, devid); skb->blog_p->use_xmit_args = 1; #if (LINUX_VERSION_CODE < KERNEL_VERSION(5,15,0)) { struct tcp_sock *tp = tcp_sk(sk); if(tp->tcp_discard) { skb->blog_p->tcp_discard = 1; skb->blog_p->fro = 1; } } #endif skb->blog_p->local_tcp = 1; skb->blog_p->hw_cso = 1; if (is_netdev_accel_gdx_tx(blogtcp_local_dev)) { blog_emit_generic(skb, blogtcp_local_dev, BLOG_GENPHY); } else { blog_emit(skb, blogtcp_local_dev, encap, 0, BLOG_TCP_LOCALPHY); } skb->dev = tmpdev; skb_pull(skb,offset); } else{ /*unsupported local tcp */ blog_skip(skb, blog_skip_reason_local_tcp_termination); } return 0; } extern int bcmnet_configure_gdx_accel(struct net_device *dev, bcmnet_accel_t *accel_p); static int bcm_tcp_module_load_notify(struct notifier_block *self, unsigned long val, void *data) { bcmnet_accel_t accel={}; int bp3_htoa_license; if (!strcmp("gdx", ((struct module *)data)->name)) { bp3_htoa_license = bcm_license_check(BP3_FEATURE_HTOA); if (bp3_htoa_license <= 0) { /* No valid htoa license. Do not enable GDX */ printk("%s: ***No valid HTOA license. Do not enable GDX for local tcp acceleration***\n", __func__); return 0; } printk("%s: ***HTOA license present. Enable GDX for local tcp acceleration***\n", __func__); switch (val) { case MODULE_STATE_LIVE: #if defined(CONFIG_BCM_GDX_HW) accel.gdx_hw = 1; #endif accel.gdx_tx = 1; bcmnet_configure_gdx_accel(blogtcp_local_dev, &accel); break; case MODULE_STATE_GOING: #if defined(CONFIG_BCM_GDX_HW) accel.gdx_hw = 0; #endif accel.gdx_tx = 0; bcmnet_configure_gdx_accel(blogtcp_local_dev, &accel); break; default: return 0; } } return 0; } static struct notifier_block bcm_tcp_module_load_nb = { .notifier_call = bcm_tcp_module_load_notify, }; static int __init bcm_tcp_accel_init(void) { bcm_tcp_register_netdev(); register_module_notifier(&bcm_tcp_module_load_nb); return 0; } fs_initcall(bcm_tcp_accel_init);继续结合分析localin加速 并分析如何针对FTP报文关闭Localin加速
11-18
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值