RFC是SAP提供的一种标准接口,用于实现SAP系统与其他系统之间的远程函数调用。通过RFC接口,一个系统可以调用另一个系统中的功能模块,实现数据交换和业务流程的协同。RFC接口具有实时性高、调用简单直接等优点,但通常只能实现单向调用,且对函数的语言和参数有一定限制。
本章python实现RFC调用进行数据抽取,废话少说直接上代码。
一、SAPDB数据实现类:SAPDB.py
##############################################################
#脚本: SAPDB.py
#日期: 2025-01-14 17:18:44
#描述: SAP链接数据
#参数:
##############################################################
from pyrfc import Connection
from SqlParse import LmSqlParse
from Etlutils import ENVIRON
from SysConstant import SysCons
from typing import Any, Dict, NoReturn, Optional, Tuple, Type, Union
RegistClass={
#编码:类名称
"ZBI_FM001":"ZBI_FM001",
"RFC_READ_TABLE2":"RFC_READ_TABLE2"
}
#初始化类对象
class sapsql():
#classdirver=None,sapfunc=None,env=None
@classmethod
def connector(cls,**kwargs: Any):
env=None
if "classdirver" not in kwargs:
raise Exception(
"classdirver not found"+".".join(RegistClass.keys())
)
if "sapfunc" not in kwargs:
raise Exception(
"sapfunc not found"
)
classdirver=kwargs["classdirver"]
sapfunc=kwargs["sapfunc"]
if "env" in kwargs:
env=kwargs["env"]
if classdirver.upper() not in RegistClass.keys():
raise Exception(
"classdirver:"+classdirver+" not found" + ".".join(RegistClass.keys())
)
else:
classdv=RegistClass.get(classdirver.upper())
if env is None:
runcl=classdv+"('"+sapfunc+"')"
else :
runcl=classdv+"('"+sapfunc+"','"+env+"')"
return eval(runcl)
# clasNm=RegistClass.get(classnm)
class BaseSAP():
SAPDFSEP=SysCons.SAPDFSEP
SAPDTENV=SysCons.SAPDTENV
RunRowCnt=0
def __init__(self,sapFunc,env=None):
self.sapFunc=sapFunc
if env is not None:
self.configENV=env
config = ENVIRON.getEnv(self.configENV)
self.conn = Connection(**config)
def setDefaultOp(self,DefaultOp):
self.DefaultOp=DefaultOp
def ping(self):
try:
self.conn.ping()
return True
except Exception as e:
raise ValueError(e)
def close(self):
self.conn.close()
"""
默认查询SAP表通过/SAPDS/RFC_READ_TABLE2 查询
"""
def query_batchBysql(self,sql:str,header=False):
pass
"""
查询通用RFC方法
"""
def query_batchByRfc(self,parm,header=False):
result = self.__querRfcbaseQuery(self.sapFunc,parm)
rethead,retdata=self.querRfcParse(result)
if header:
retdata.insert(0,rethead)
return retdata
def __querRfcbaseQuery(self,funName,paraVal:dict):
result=self.conn.call(funName,**paraVal)
return result
def querRfcParse(self,result):
return [],[]
#通过SAPRFC_READ_TABLE2链接
class RFC_READ_TABLE2(BaseSAP):
DefaultOp="|"
RunRowCnt=0
RFCName="/SAPDS/RFC_READ_TABLE2"
def query_batchBysql(self,sql:str,header=False):
#/为SQL特殊字符进行转换,SAP 字段存在这种符合,进行转码
sql=sql.replace("/","@")
runtabs,sqlCols,runwheres=LmSqlParse.simpleSqlAnaly(sql)
#runcols=sqlCols
#runcols=['/DMBE/SCHEDULING_DESK']
retcols=[]
for col in sqlCols:
retcols.append(col.replace("@","/"))
runtab=runtabs[0] if len(runtabs)>0 else ""
runwhere=runwheres[0].strip() if len(runwheres)>0 else ""
if len(runwhere)>0 and runwhere.upper().startswith("WHERE"):
runwhere=runwhere[6:].replace("="," = ")
fields,headers,rowcount=self.qry(retcols,runtab,runwhere)
self.RunRowCnt=rowcount
if header:
fields.insert(0,headers)
return fields
def qry(self,Fields,SQLTable,where,SplitOp=DefaultOp,MaxRows=None):
#print(where)
if Fields[0] == "*":
Fields = []
else:
Fields = [{'FIELDNAME': x} for x in Fields] # Notice the format
#options = [{'TEXT': x} for x in where] # again, notice the format
options=[{'TEXT': where }]
#options = [" CLIENT = '140'" ]
#options=[]
# tables = self.conn.call("/SAPDS/RFC_READ_TABLE2", QUERY_TABLE=SQLTable, DELIMITER=SplitOp, FIELDS=Fields, \
# OPTIONS=options, ROWCOUNT=MaxRows, ROWSKIPS=FromRow)
if MaxRows==None:
tables = self.conn.call(self.RFCName, QUERY_TABLE=SQLTable, DELIMITER=SplitOp, FIELDS=Fields, \
OPTIONS=options)
else :
tables = self.conn.call(self.RFCName, QUERY_TABLE=SQLTable, DELIMITER=SplitOp, FIELDS=Fields, \
OPTIONS=options, ROWCOUNT=MaxRows)
OUT_TABLE=tables["OUT_TABLE"]
data_names = tables["FIELDS"]
headers = [x['FIELDNAME'] for x in data_names]
data_fields = tables[OUT_TABLE]
rowcount = len(data_fields)
print(headers)
fields=[]
for dataRow in data_fields:
row=dataRow['WA']
rs=[ x.strip() for x in row.split(SplitOp)]
fields.append(tuple(rs))
return fields,headers,rowcount
def getTabDes(self, SQLTable):
tables = self.conn.call(self.RFCName, QUERY_TABLE=SQLTable, FIELDS=[], ROWCOUNT=1)
tab_cols = tables["FIELDS"]
return tab_cols
@classmethod
def sap_test(self):
selectsql = "select ARKTX from VBAP"
# confg = {
# "ASHOST": "172.18.163.167",
# "CLIENT": "140",
# "SYSNR": "00",
# "USER": "L250002",
# "PASSWD": "Lm@1111111111",
# "LANG":"ZH"
# }
sapcfg={}
sapcfg.update({"env": "SAPDEV"})
sapcfg.update({"classdirver": "RFC_READ_TABLE2"})
sapcfg.update({"sapfunc": "RFC_READ_TABLE2"})
dbcon=sapsql.connector(**sapcfg)
dbcon.query_batchBysql(selectsql)
#通用SAPRFC调用
class ZBI_FM001(BaseSAP):
RunRowCnt=0
def querRfcParse(self,result):
datas=result["ET_DATA"]
retdata=[]
rethead=[]
for rows in datas:
lmheader=[]
lmdata=[]
for cols in rows.get("DATA"):
key=cols.get("FIELDNAME")
val=cols.get("FIELDVALUE").strip()
lmheader.append(key)
lmdata.append(val)
retdata.append(tuple(lmdata))
rethead=lmheader
self.RunRowCnt=self.RunRowCnt+1
return rethead,retdata
if __name__ == '__main__':
# cfg={
# "classdirver" :"RFC_READ_TABLE2",
# "sapfunc": "RFC_READ_TABLE2",
# "env": "SAPDEV"
# }
# d=sapsql.connector(**cfg)
# print(d.query_batch("select ARKTX from VBAP"))
#
# #一、查询函数
# cfg={
# "classdirver" :"ZBI_FM001",
# "sapfunc": "ZBI_FM001",
# "env": "SAPDEV"
# }
#
# par = {
# "IV_TYPE": "006"
# }
# d=sapsql.connector(**cfg)
# print(d.query_batchByRfc(par,header=False))
# print(d.RunRowCnt)
#二、查询表
selectsql = "select MANDT from MARA"
sapcfg = {}
sapcfg.update({"env": "SAPDEV"})
sapcfg.update({"classdirver": "RFC_READ_TABLE2"})
sapcfg.update({"sapfunc": "RFC_READ_TABLE2"})
dbcon = sapsql.connector(**sapcfg)
ret=dbcon.query_batchBysql(selectsql)
二、SQL解析 SqlParse.py
import sqlparse
class LmSqlParse:
def __init__(self):
pass
@classmethod
def simpleSqlAnaly(cls,sql):
stmts = sqlparse.parse(sql)[0].tokens
cols = []
tbls = []
froms = []
wheres = []
last_key = ''
for stmt in stmts:
if stmt.value == 'insert' or stmt.value == 'select' or stmt.value == 'from':
last_key = stmt.value
# 剔除空格和换行
if stmt.ttype is sqlparse.tokens.Text.Whitespace:
continue
# 关键字
elif stmt.ttype is sqlparse.tokens.Keyword.DML:
dml = stmt.value
last_key = dml
# 字段
elif isinstance(stmt, sqlparse.sql.IdentifierList):
# 判断上一个是什么类型
if last_key == 'select':
for identifier in stmt.get_identifiers():
col_name = identifier.value
cols.append(col_name)
elif last_key == 'from':
for identifier in stmt.get_identifiers():
froms.append(identifier.value)
else:
for identifier in stmt.get_identifiers():
tbls.append(identifier.value)
elif isinstance(stmt, sqlparse.sql.Identifier):
if last_key == 'select':
cols.append(stmt.value)
elif last_key == 'from':
froms.append(stmt.value)
else:
tbls.append(stmt.value)
elif isinstance(stmt, sqlparse.sql.Where):
wheres.append(stmt.value)
return froms,cols,wheres
# cols: ['id', 'name', 'score']
# tbls: ['inser_tbl']
# froms: ['tbl']
# wheres: ['where id > 10 ']
三、SAP DB链接封装
import mysql.connector
import pymysql
import pymssql
from SAPDB import sapsql
class BaseDb:
batch_size = 200
def execute_fetchone(self,sql):
cursor = self.conn.cursor()
cursor.execute(sql)
cursor.close()
self.conn.commit()
def execute_batch(self,sql,params=None,batch_size=None):
cursor = self.conn.cursor()
if batch_size is not None:
self.batch_size = batch_size
for size in range(0, len(params), self.batch_size):
paramBatch = params[size:size + self.batch_size]
cursor.executemany(sql, paramBatch)
self.conn.commit()
cursor.close()
self.conn.commit()
def query_fetchone(self,sql):
cursor = self.conn.cursor()
cursor.execute(sql)
if cursor.with_rows:
result = cursor.fetchone()
cursor.close()
return result
def query_batch(self,sql,head=False):
cursor = self.conn.cursor()
cursor.execute(sql)
results =list(cursor.fetchall())
cursor.close()
if head:
fields = [tuple(field[0] for field in cursor.description)]
results=fields+results
return results
def ping(self):
self.conn.ping()
def close(self):
self.conn.close()
class MySap(BaseDb):
def __init__(self,**config):
self.conn =sapsql.connector(**config)
def execute_batch(self, sql, params=None,batch_size=None):
pass
def query_fetchone(self, sql,header=False):
result=self.conn.query_batchBysql(sql,header)
self.RunRowCnt=self.conn.RunRowCnt
return result
def query_batchByRfc(self,header=False,**parm):
result=self.conn.query_batchByRfc(parm,header)
self.RunRowCnt=self.conn.RunRowCnt
return result
class EtlDB():
@classmethod
def getDBCon(cls,dbType:str,**config)->BaseDb:
if dbType.upper()=='MYSQL':
pass
# return MySql(**config)
elif dbType.upper()=='GBASE':
pass
# return MyGbase(**config)
elif dbType.upper()=='SQLSERVER':
return MySQLServer(**config)
elif dbType.upper()=='SAP':
return MySap(**config)
elif dbType.upper()=='SQLITE':
pass
四、执行程序封装
#脚本: saptodb.py
#日期: 2025-01-21 17:18:44
#描述: SAP通过RFC获取数据导入DB
#参数:
##############################################################
import os,sys,argparse
from datetime import datetime
sys.path.append(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
from kyctool import EtlDB,EtlLog,StringUtil,ENVIRON
def parse_args(variables):
ret={}
for variable in variables:
vars=variable.split("=")
if len(vars)==2 :
k=vars[0].strip()
v=vars[1].strip()
ret.update({k:v})
return ret
deftargetType="SQLSERVER"
defsourceType="SAP"
defbatchsize=1000
def noTostr(valStr):
return "" if valStr is None else valStr
if __name__ == '__main__':
# 实际场景
code=1
argparse=argparse.ArgumentParser()
argparse.add_argument("--sourceType",type=str,required=True,help="SAP")
argparse.add_argument("--sourcedb",type=str,required=True,help="SAPDEV")
argparse.add_argument("--targetType",type=str,required=True,help="SQLSERVER")
argparse.add_argument("--targetdb",type=str,required=True,help="SQLSERV")
argparse.add_argument("--insertsql",type=str,required=True,help="insert into ")
argparse.add_argument("--batchsize",type=int,required=False,help="batchsize")
argparse.add_argument("--targetBefSQL",type=str,required=False,help="执行插入前SQL")
argparse.add_argument("--targetAftSQL",type=str,required=False,help="执行插入后SQL")
argparse.add_argument("--variable",nargs="+",required=False,help="SQL KYC variable example:[--variable V_END_DATE='203401' V_START_DATE='203403']")
argparse.add_argument("--logfile",type=str,required=True,help="LOG FILE")
args=argparse.parse_args()
#初始化日志信息
#print(sqlFile)
logFile=args.logfile
#数据源
#sql文件是否存在
sourcedb=args.sourcedb
sourceType=args.sourceType
#目标库
#sql文件是否存在
targetdb=args.targetdb
insertsql=args.insertsql
targetType=args.targetType
batchsize=args.batchsize
targetBefSQL=args.targetBefSQL
targetAftSQL=args.targetAftSQL
#获取环境变量
variable=parse_args(args.variable)
if sourceType is None:
sourceType=defsourceType
sourcecfg=ENVIRON.getEnv(sourcedb)
etllog = EtlLog.getLog(logFile)
if targetType is None:
targetType=deftargetType
if batchsize is None:
batchsize=defbatchsize
targetcfg=ENVIRON.getEnv(targetdb)
etllog.info("Run SQL :\t"+logFile)
etllog.info("=================================parameter=================================")
etllog.info("sourceType:\t"+ sourceType)
etllog.info("sourcedb :\t"+ "\t".join([k+"="+StringUtil.mask_password({k:v}) for k,v in sourcecfg.items()]))
etllog.info("targetType:\t"+ targetType)
etllog.info("targetdb :\t"+ "\t".join([k+"="+StringUtil.mask_password({k:v}) for k,v in targetcfg.items()]))
etllog.info("targetBefSQL :\t"+ noTostr(targetBefSQL))
etllog.info("insertsql :\t"+ insertsql)
etllog.info("targetAftSQL :\t"+ noTostr(targetAftSQL))
etllog.info("variable:\t"+ "\t".join([k+"="+v for k,v in variable.items()]))
etllog.info("===========================================================================")
start_date = datetime.now()
sourCon=None
targetCon=None
#同步数据
try:
etllog.info("抽取["+sourcedb+"]数据")
#sap链接信息
sapcfg={}
sapcfg.update({"env":sourcedb})
sapcfg.update({"classdirver":"ZBI_FM001"})
sapcfg.update({"sapfunc":"ZBI_FM001"})
sourCon=EtlDB.getDBCon(sourceType,**sapcfg)
sourCon.ping()
result=sourCon.query_batchByRfc(header=False,**variable)
runRowCnt=sourCon.RunRowCnt
etllog.info("抽取"+str(runRowCnt)+"成功")
targetCon=EtlDB.getDBCon(targetType,**targetcfg)
if targetBefSQL is not None:
etllog.info("执行导入前逻辑:" + targetBefSQL + "")
targetCon.execute_fetchone(targetBefSQL)
etllog.info("导入数据至["+targetdb+"]")
targetCon.execute_batch(insertsql,result)
etllog.info("导入数据成功")
if targetAftSQL is not None:
etllog.info("执行导入后逻辑:" + targetAftSQL + "")
targetCon.execute_fetchone(targetAftSQL)
code=0
etllog.info("QUERY_CNT:" + str(runRowCnt) + " 条")
except Exception as e:
etllog.error(e)
code = 2
finally:
if not sourCon is None :
sourCon.close()
if not targetCon is None :
targetCon.close()
end_date = datetime.now()
dfdate=round((end_date-start_date).total_seconds()/60,2)
etllog.info("Run Time:"+str(dfdate)+" min")
exit(code)
五、SH脚本编写
#!/bin/bash
##############################################################
#脚本:
#日期: 2025-01-14 17:18:44
#开发: system
#描述: SAP数据同步
#参数: date
##############################################################
function getParm(){
lmpython ${ETL_HOME}/bin/lmtool.py --type env --param $1
}
function loginfo(){
dateForm="%Y-%m-%d %T"
echo [`date +"${dateForm}"`]: $1 |tee -a ${logFile}
}
if [ $# -ne 1 ] ; then
echo "Pram error !"
echo "Usage:$0 20230619"
echo "Example:$0 20230619"
exit 1
fi
#获取参数
par_date=$1
inParam="$0 $*"
#获取日志名称
moduleName="lmdbw_ods/ods_f_sap"
script_sh=$0
logfileName=`echo ${script_sh##*/}|awk -F '.' '{print $1}'`
#配置信息
#databaseDir=`getParm ETL_ENV.dataBaseDir`
logbaseDir=`getParm ETL_ENV.logBaseDir`
sourceType="SAP"
sourceDB="SAPDEV"
selectsql="select VBELN,POSNR from LIPS "
targetType="SQLSERVER"
targetDB="LMBDW"
targetBefSQL="truncate table ODS_SAP_LE_LIPS_S_D"
insertsql="insert into ODS_SAP_LE_LIPS_S_D(DATA_DT,VBELN,POSNR ) VALUES (${par_date},%s,%s )"
batchsize=1000
logdir=${logbaseDir}/${par_date}/${moduleName}
[ -d ${logdir} ] || mkdir -p ${logdir}
logfile=${logdir}/${logfileName}.log
echo "=========================================================================================================" |tee -a ${logFile}
loginfo "Begin to run job!"
loginfo "${inParam} "
loginfo "log in ${logfile}"
python ${ETL_HOME}/bin/saptodb.py --sourceType "${sourceType}" --sourcedb "${sourceDB}" --selectsql "${selectsql}" --targetType "${targetType}" --targetdb "${targetDB}" --targetBefSQL "${targetBefSQL}" --insertsql "${insertsql}" --batchsize ${batchsize} --logfile "${logfile}"
ret=$?
if [ ${ret} -eq 0 ]
then
loginfo "Run job succeeded!"
else
loginfo "Run job Failed!"
exit ${ret}
fi
exit ${ret}