etherCAT igh-master接收超时问题

此博客为一次记录

现象:
        在应用层调用receive时,偶发有接收时间超过1ms的问题(工作频率是1000HZ,所以接收时间一般在100us以内才正常),甚至偶有4,5ms的时候。

环境

        rtlinux+igh1.5.2

排查:

        1 首先在驱动和lib库里增加计时操作,确定超时确实是由etherCAT导致。

        2 细化计时范围,缩小至由于锁导致的receive的ioctl返回时间慢。

        3 改写igh驱动,将锁由原来的inline改写为带计时功能的宏,并且将上次加锁的代码行号记录下来。

原本的锁 master/lock.h

static inline void ec_lock_init(ec_lock_t *sem) { sema_init(sem, 1); }

static inline void ec_lock_down(ec_lock_t *sem) { down(sem); }

static inline int ec_lock_down_interruptible(ec_lock_t *sem) { return down_interruptible(sem); }

static inline void ec_lock_up(ec_lock_t *sem) { up(sem);}

改写为

extern ktime_t start_time_ns_test;

extern ktime_t end_time_ns_test;

extern s64 elapsed_time_ns_test;

extern ktime_t start_time_ns_test2;

extern ktime_t end_time_ns_test2;

extern s64 elapsed_time_ns_test2;

extern volatile ktime_t start_time_ns_test3;

extern volatile ktime_t end_time_ns_test3;

extern volatile ktime_t lock_time[1024];

extern s64 elapsed_time_ns_test3;

extern int temporaryRet;

extern volatile int lock_array[1024];

extern volatile int master_fsm;

extern volatile int master_fsm_reason;

extern volatile s64 schedule_elapsed_time;

#define OVERTIME 300000

//#define ec_lock_init(sem) do { start_time_ns_test=ktime_get(); sema_init((sem), 1); end_time_ns_test=ktime_get();if((elapsed_time_ns_test=ktime_to_ns(ktime_sub(end_time_ns_test, start_time_ns_test)))>OVERTIME){printk("xiuwen%s.c: %d %s too long cost %lld\n",__FILE__,__LINE__,__FUNCTION__,elapsed_time_ns_test);}} while (0)

#define ec_lock_init(sem) do{ sema_init(sem, 1); printk("xiuwen%s.c: %d %s idx %d\n",__FILE__,__LINE__,__FUNCTION__,(uintptr_t)sem%1024);} while(0)

#define ec_lock_down(sem) do \

{\

start_time_ns_test2=ktime_get(); down(sem);end_time_ns_test2=ktime_get();\

if((elapsed_time_ns_test2=ktime_to_ns(ktime_sub(end_time_ns_test2, start_time_ns_test2)))>OVERTIME)\

{\

printk("xiuwen%s: %d %s lock down too long cost %lld lastLine %d idx %ld pid %d tid %d master %d reason %dn",\

__FILE__,__LINE__,__FUNCTION__,elapsed_time_ns_test2,lock_array[(uintptr_t)sem%1024],(uintptr_t)sem%1024,current->pid,current->tgid,master_fsm,master_fsm_reason);\

}\

lock_array[(uintptr_t)sem%1024]=__LINE__;\

lock_time[(uintptr_t)sem%1024]=end_time_ns_test2;\

} while (0)

#define ec_lock_down_interruptible(sem) ((((start_time_ns_test=ktime_get())&&(0==(temporaryRet=down_interruptible((sem)))))&&(end_time_ns_test=ktime_get()))&&\

((elapsed_time_ns_test=ktime_to_ns(ktime_sub(end_time_ns_test, start_time_ns_test)))>OVERTIME?\

(NULL!=(1+(printk("xiuwen%s: %d %s lock_down_interrupt too long %lld lastLine %d idx %ld pid %d tid %d master %d reason %d\n",\

__FILE__,__LINE__,__FUNCTION__,elapsed_time_ns_test,lock_array[(uintptr_t)sem%1024],(uintptr_t)sem%1024,current->pid,current->tgid,master_fsm,master_fsm_reason)\

))):1),(lock_array[(uintptr_t)sem%1024]=__LINE__),(lock_time[(uintptr_t)sem%1024]=end_time_ns_test),temporaryRet)

#define ec_lock_up(sem) do \

{ start_time_ns_test3=ktime_get(); \

up(sem);\

end_time_ns_test3=ktime_get();\

if((elapsed_time_ns_test3=ktime_to_ns(ktime_sub(end_time_ns_test3, start_time_ns_test3)))>OVERTIME)\

{\

printk("xiuwen%s: %d %s lock up too long cost %lld lastLine %d idx %ld pid %d tid %d master %d reason %d\n",\

__FILE__,__LINE__,__FUNCTION__,elapsed_time_ns_test3,lock_array[(uintptr_t)sem%1024],(uintptr_t)sem%1024,current->pid,current->tgid ,master_fsm,master_fsm_reason);\

}\

if((elapsed_time_ns_test3=ktime_to_ns(ktime_sub(end_time_ns_test3, lock_time[(uintptr_t)sem%1024])))>OVERTIME)\

{\

printk("xiuwen%s: %d %s down lock spend to long time %lld,",__FILE__,__LINE__,__FUNCTION__,elapsed_time_ns_test3);\

}\

lock_array[(uintptr_t)sem%1024]=__LINE__;\

} while (0)

4 运行测试发现,锁被etherCAT驱动的operation_pthread线程锁持有时间过长

master/master.c

5 读代码发现锁是因为执行slave状态机偶尔会出现过慢的情况,导致锁未被释放,进而导致上层接收超时,想追状态机函数,由于每次超时的子状态机不是同一个,且比较复杂就放弃了,想着从其他方向入手。

6 观察到应用层的调度策略是fifo,优先级是最高,而master的op线程却是普通调度策略,感觉像是状态机执行的次数少,然后出了某些意外,在下次执行状态机时去处理这个意外才导致此次时间过长。所以把OP线程同样改为FIFO,且优先级最高。

master/master.c : ec_master_operation_thread

static int ec_master_operation_thread(void *priv_data)

{

ec_master_t *master = (ec_master_t *) priv_data;

// struct cpumask mask;

struct sched_param param;

param.sched_priority = 99; // 设置优先级为99

if (sched_setscheduler(current, SCHED_FIFO, &param))

{

pr_err("Failed to set scheduler\n");

return -1;

}

EC_MASTER_DBG(master, 1, "Operation thread running"

" with fsm interval = %u us, max data size=%zu\n",

master->send_interval, master->max_queue_size);

while (!kthread_should_stop()) {

ec_datagram_output_stats(&master->fsm_datagram);

if (master->injection_seq_rt == master->injection_seq_fsm) {

// output statistics

ec_master_output_stats(master);

// execute master & slave state machines

if (ec_lock_down_interruptible(&master->master_sem)) {

break;

}

if (ec_fsm_master_exec(&master->fsm)) {

// Inject datagrams (let the RT thread queue them, see

// ecrt_master_send())

master->injection_seq_fsm++;

}

// if rt_slave_requests is true and the slaves are available

// this will be handled by the app explicitly calling

// ecrt_master_exec_slave_request()

if (!master->rt_slave_requests || !master->rt_slaves_available) {

ec_master_exec_slave_fsms(master);

}

ec_lock_up(&master->master_sem);

}

#ifdef EC_USE_HRTIMER

// the op thread should not work faster than the sending RT thread

ec_master_nanosleep(master->send_interval * 1000);

#else

if (ec_fsm_master_idle(&master->fsm)) {

set_current_state(TASK_INTERRUPTIBLE);

schedule_timeout(1);

}

else {

schedule();

}

#endif

}

EC_MASTER_DBG(master, 1, "Master OP thread exiting...\n");

return 0;

}

7 改完之后再次运行,未再出现过超时问题,使用stress-ng进行mem和cpu加压满载测试同样正常运行超过24H。

结论:

etherCAT协议还是比较复杂,igh的状态机不好排查,所以目前只排查到是进程调度策略导致的master的op线程被调度少或者抢占了才导致的(但是igh的OP线程是自旋锁,按理来说不应该会被抢占才对)。如果后续继续排查,有两个方向
1 OP线程循环执行时关中断,等此次循环结束再开中断来验证是否是被抢占导致的。
2 追slave状态机的代码

PS :

如果有大佬知道具体原因,麻烦留言说下哈。

1.基于Igh-master1.5.2的多种Ethercat IO 模块及伺服电机的操作例子 2.基于Ethercat-Ighmaster二次开发库简化你的开发流程 所有源码及文档托管在码云平台,附件中有代码仓库地址 3.含有IghMaster1.5.2最新版本代码,编译脚本及安装到 最新的代码托管在: https://gitee.com/wllw7176/MyEthercat-IGH-1.5.2.git EtherCAT 1.5.2 编译及使用说明 1.交叉编译源码 源码中有内核模块编译,指令定模块目录 根据实际情况修改build_ethercat-1.5.2.sh中以下几个变量: output_dir='output' #编译输出目录 module_install_dir='module_install' #内核模块安装目录 kernel_source_dir='/mnt/fs_ext/imx6/linux-3.0.35' #内核源码目录 lib_modules_kernel_promt='3.0.35-2666-gbdde708' #内核版本号 host=arm-fsl-linux-gnueabi #交叉编译链前缀 注意要先编译内核,然后编译Ethrecat因为Ethercat依赖于内核通用网卡模块 以root用户运行./build_ethercat-1.5.2.sh #编译具体根据提示选择y/n就可以了 ---------------------------------------------------------------------------------------------------------------------------------------------- 手动安装Ethercat到ARM开发板: 2.内核模块安装 2.1 将output文件下通lib_modules_kernel_promt='3.0.35-2666-gbdde708'(build_ethercat-1.5.2.sh 中定义)放入开发板/lib/modules下 2.2 开发板中执行"depmod"命令 3. output里面的其它文件放入开发板对应位置 4. 设置参数 修改/etc/sysconfig/ethercat MASTER0_DEVICE="e4:f3:f5:c6:41:b6" #与ethercat绑定的Mac地址 DEVICE_MODULES="generic" #通用网卡就填generic,其余支持网卡换成模块名字就行 5.添加udev规则 echo KERNEL==\"EtherCAT[0-9]*\", MODE=\"0664\" > /etc/udev/rules.d/99-EtherCAT.rules 6. 启动服务 /etc/init.d/ethercat restart 出现以下信息表明移植成功 Shutting down EtherCAT master 1.5.2 done Starting EtherCAT master 1.5.2 ec_generic: Binding socket to interface 3 (eth0). done 7. 应用层测试 root@linaro-ubuntu-desktop:~# ethercat Please specify a command! Usage: ethercat [OPTIONS] [ARGUMENTS] Commands (can be abbreviated): alias Write alias addresses. config Show slave configurations. cstruct Generate slave PDO information in C language. data Output binary domain process data. debug Set the master's debug level. domains Show configured domains. download Write an SDO entry to a slave. eoe Display Ethernet over EtherCAT statictics. foe_read Read a file from a slave via FoE. foe_write Store a file on a slave via FoE. graph Output the bus topology as a graph. master Show master and Ethernet device information. pdos List Sync managers, PDO assignment and mapping. reg_read Output a slave's register contents. reg_write Write data to a slave's registers. rescan Rescan the bus. sdos List SDO dictionaries. sii_read Output a slave's SII contents. sii_write Write SII contents to a slave. slaves Display slaves on the bus. soe_read Read an SoE IDN from a slave. soe_write Write an SoE IDN to a slave. states Request application-layer states. upload Read an SDO entry from a slave. version Show version information. xml Generate slave information XML. Global options: --master -m Comma separated list of masters to select, ranges are allowed. Examples: '1,3', '5-7,9', '-3'. Default: '-' (all). --force -f Force a command. --quiet -q Output less information. --verbose -v Output more information. --help -h Show this help. Numerical values can be specified either with decimal (no prefix), octal (prefix '0') or hexadecimal (prefix '0x') base. Call 'ethercat --help' for command-specific help. 以上内容为基本的使用,进一步使用要结合ethercat说明文档和电机说明。 ---------------------------------------------------------------------------------------------------------------------------------------------- 自动安装Ethercat到ARM开发板: 将output目录复制到开发板然后运行output目录下install_to_arm.sh脚本 执行6,7步骤测试自动安装是否成功 ---------------------------------------------------------------------------------------------------------------------------------------------- 问题汇总: (1)Starting EtherCAT master 1.5.2 EtherCAT ERROR: MAC address may not be empty. FATAL: Error inserting ec_master (/lib/modules/3.0.35-2666-gbdde708-gbdbf2583/mnt/hgfs/win_linux_share/linux/download/ethercat-1.5.2/module_install/master/ec_master.ko): Invalid argument failed Mac地址为空,将对应的mac地址MASTER0_DEVICE="e4:f3:f5:c6:41:b6" (2)编译Ethercat 一定要保证所用编译器与内核与硬件平台一致,否则会出现内核模块不能使用或者其它未知问题 (3)内核源码要先编译通过,否则Ethercat模块将不能编译通过,因为Ethercat模块依赖于内核本身驱动模块。
以下是使用EtherCAT通信协议的雷塞(Leisai)556步进驱动器的示例程序: ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include "ecrt.h" #define REFRESH_RATE 1000 // 刷新率(单位:微秒) #define PI 3.14159265358979323846 // EtherCAT配置 #define ECAT_TIMEOUT 5000 // 超时时间(单位:毫秒) #define ECAT_SLAVE 0, 0 // 从站地址 // 雷塞556参数 #define MAX_SPEED 3000 // 最速度(单位:rpm) #define MAX_ACC 1000 // 最加速度(单位:rpm/s) // 定义PDO映射索引 #define PDO_INPUT_INDEX 0x1601 #define PDO_OUTPUT_INDEX 0x1A02 // 定义PDO映射子索引 #define PDO_INPUT_SUBINDEX_POSITION 0x00 #define PDO_INPUT_SUBINDEX_VELOCITY 0x02 #define PDO_INPUT_SUBINDEX_TORQUE 0x03 #define PDO_OUTPUT_SUBINDEX_CONTROL 0x00 // 定义命令码 #define CMD_SET_POSITION 0x02 #define CMD_SET_VELOCITY 0x03 #define CMD_SET_TORQUE 0x04 #define CMD_SET_BIT 0x06 // 定义状态码 #define STATE_NOT_READY_TO_SWITCH_ON 0x0000 #define STATE_SWITCH_ON_DISABLED 0x0040 #define STATE_READY_TO_SWITCH_ON 0x0021 #define STATE_SWITCHED_ON 0x0023 #define STATE_OPERATION_ENABLED 0x0027 #define STATE_QUICK_STOP_ACTIVE 0x0007 #define STATE_FAULT 0x000F // 定义常量 #define POSITION_FACTOR (2 * PI / 65536.0) // 位置因子(单位:弧度) #define VELOCITY_FACTOR (2 * PI / 60.0 / 65536.0) // 速度因子(单位:弧度/秒) #define TORQUE_FACTOR (2 * PI / 65536.0 / 0.001) // 力矩因子(单位:牛米) // 定义全局变量 static ec_master_t *master = NULL; static ec_master_state_t master_state = {}; static ec_domain_t *domain = NULL; static ec_domain_state_t domain_state = {}; static ec_slave_config_t *slave = NULL; static ec_slave_config_state_t slave_state = {}; static uint8_t *domain_pd = NULL; static ec_pdo_entry_reg_t domain_regs[] = { {ECAT_SLAVE, PDO_OUTPUT_INDEX, PDO_OUTPUT_SUBINDEX_CONTROL, 16, &control_word}, {ECAT_SLAVE, PDO_INPUT_INDEX, PDO_INPUT_SUBINDEX_POSITION, 32, &position}, {ECAT_SLAVE, PDO_INPUT_INDEX, PDO_INPUT_SUBINDEX_VELOCITY, 32, &velocity}, {ECAT_SLAVE, PDO_INPUT_INDEX, PDO_INPUT_SUBINDEX_TORQUE, 16, &torque}, {} }; static uint16_t control_word = 0x0000; static int32_t target_position = 0; static int32_t actual_position = 0; static int32_t actual_velocity = 0; static int16_t actual_torque = 0; // 定义函数 static void check_domain_state(void); static void check_slave_config_states(void); int main(int argc, char *argv[]) { // 初始化EtherCAT主站 if (ecrt_init() != 0) { printf("Failed to initialize EtherCAT master!\n"); return -1; } // 获取EtherCAT主站 master = ecrt_request_master(0); if (master == NULL) { printf("Failed to get EtherCAT master!\n"); return -1; } // 发现从站 if (ecrt_master_scan(master, 0) <= 0) { printf("No EtherCAT slaves found!\n"); return -1; } // 获取从站配置 slave = ecrt_master_slave_config(master, ECAT_SLAVE); if (slave == NULL) { printf("Failed to get slave configuration!\n"); return -1; } // 创建EtherCAT域 domain = ecrt_master_create_domain(master); if (domain == NULL) { printf("Failed to create EtherCAT domain!\n"); return -1; } // 注册PDO映射 if (ecrt_domain_reg_pdo_entry_list(domain, domain_regs) < 0) { printf("Failed to register PDO entry list!\n"); return -1; } // 计算PDO映射 if (ecrt_master_application_time(master, REFRESH_RATE) != 0) { printf("Failed to set application time!\n"); return -1; } if (ecrt_master_sync_reference_clock(master) != 0) { printf("Failed to sync reference clock!\n"); return -1; } if (ecrt_domain_size(domain) > 0x1000) { printf("Domain size exceeds 4KB!\n"); return -1; } domain_pd = ecrt_domain_data(domain); // 配置从站 if (ecrt_slave_config_pdos(slave, EC_RT_MODE_3, 1, NULL, NULL) != EC_SUCCESS) { printf("Failed to configure PDOs!\n"); return -1; } // 配置从站状态 if (ecrt_slave_config_map(slave) < 0) { printf("Failed to configure slave state!\n"); return -1; } // 检查主站状态 ecrt_master_receive(master); check_domain_state(); if (master_state.al_states != 0) { printf("EtherCAT master is not in the AL state!\n"); return -1; } // 检查从站状态 ecrt_master_receive(master); check_slave_config_states(); if (slave_state.al_state != 0) { printf("EtherCAT slave is not in the AL state!\n"); return -1; } // 初始化从站 control_word = 0x0006; // 将控制字设置为“Switch On + Enable Operation” ecrt_domain_queue(domain); ecrt_master_send(master); ecrt_master_receive(master); check_domain_state(); if (actual_position != 0) { printf("Failed to initialize the drive!\n"); return -1; } // 设置驱动器参数 control_word = 0x0007; // 将控制字设置为“Switch On + Enable Operation + Quick Stop” ecrt_domain_queue(domain); ecrt_master_send(master); ecrt_master_receive(master); check_domain_state(); if (actual_position != 0) { printf("Failed to set drive parameters!\n"); return -1; } // 启动驱动器 control_word = 0x000F; // 将控制字设置为“Switch On + Enable Operation + Quick Stop + Enable Interpolated Position Mode” ecrt_domain_queue(domain); ecrt_master_send(master); ecrt_master_receive(master); check_domain_state(); if (actual_position != 0) { printf("Failed to start the drive!\n"); return -1; } // 循环运行 while (1) { // 读取当前位置、速度和力矩 ecrt_master_receive(master); actual_position = *((int32_t *)(domain_pd + ecrt_slave_config_reg_pdo_entry_offset(slave, PDO_INPUT_INDEX, PDO_INPUT_SUBINDEX_POSITION))); actual_velocity = *((int32_t *)(domain_pd + ecrt_slave_config_reg_pdo_entry_offset(slave, PDO_INPUT_INDEX, PDO_INPUT_SUBINDEX_VELOCITY))); actual_torque = *((int16_t *)(domain_pd + ecrt_slave_config_reg_pdo_entry_offset(slave, PDO_INPUT_INDEX, PDO_INPUT_SUBINDEX_TORQUE))); // 显示当前状态 switch (control_word & 0x006F) { case STATE_NOT_READY_TO_SWITCH_ON: printf("Drive is in the Not Ready to Switch On state.\n"); break; case STATE_SWITCH_ON_DISABLED: printf("Drive is in the Switch On Disabled state.\n"); break; case STATE_READY_TO_SWITCH_ON: printf("Drive is in the Ready to Switch On state.\n"); break; case STATE_SWITCHED_ON: printf("Drive is in the Switched On state.\n"); break; case STATE_OPERATION_ENABLED: printf("Drive is in the Operation Enabled state.\n"); break; case STATE_QUICK_STOP_ACTIVE: printf("Drive is in the Quick Stop Active state.\n"); break; case STATE_FAULT: printf("Drive is in the Fault state.\n"); break; default: printf("Drive is in an unknown state.\n"); break; } // 设置目标位置 if (control_word & 0x001F == STATE_OPERATION_ENABLED) { target_position += (int32_t)(MAX_SPEED * REFRESH_RATE / 1000000.0); if (target_position > 65536) { target_position -= 65536; } *((int32_t *)(domain_pd + ecrt_slave_config_reg_pdo_entry_offset(slave, PDO_OUTPUT_INDEX, PDO_OUTPUT_SUBINDEX_CONTROL))) = CMD_SET_POSITION | (target_position << 8); } // 刷新PDO ecrt_domain_queue(domain); ecrt_master_send(master); usleep(REFRESH_RATE); } // 停止驱动器 control_word = 0x0007; // 将控制字设置为“Switch On + Enable Operation + Quick Stop” ecrt_domain_queue(domain); ecrt_master_send(master); ecrt_master_receive(master); check_domain_state(); if (actual_position != 0) { printf("Failed to stop the drive!\n"); return -1; } // 释放EtherCAT主站 ecrt_release_master(master); master = NULL; // 结束程序 return 0; } static void check_domain_state(void) { ecrt_master_state(master, &master_state); ecrt_domain_state(domain, &domain_state); } static void check_slave_config_states(void) { ecrt_slave_config_state(slave, &slave_state); } ``` 这个示例程序使用了EtherCAT通信协议来控制雷塞(Leisai)556步进驱动器。程序中定义了一些常量和全局变量,包括刷新率、PDO映射、命令码、状态码等。程序首先初始化EtherCAT主站,并发现从站。然后,程序创建一个EtherCAT域,并注册PDO映射。接下来,程序配置从站和从站状态,并初始化驱动器。程序循环运行,读取当前位置、速度和力矩,并根据控制字设置目标位置。最后,程序停止驱动器并释放EtherCAT主站。 请注意,这只是一个示例程序,具体实现可能因驱动器型号和应用场景而异。在实际应用中,请根据实际情况进行调整。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值