检查C++函数是否超过400行-优快云博客

本文链接：https://blog.youkuaiyun.com/sssssssssnake/article/details/140461957

analyzeNloc.py

import argparse
import gitlab
from lizardAnalysis import *
from prepareInfomation import *

gitlab_url = 'https://gitlab.test.com'
gitlab_prefix = "git@gitlab.test.com"
gitlab_token = ''
check_repo =[]

def convert_http_to_git(url):
    url=url.strip()
    if url.startswith('ssh://'):
        url = url[6:]
        parts = url.split('/', 1)  # 只分割一次
        url = parts[0] + ':' + parts[1] if len(parts) > 1 else parts[0]

    if ".git" not in url:
        url+='.git'

    pattern = gitlab_url + '/(.*)/(.*).git'
    match = re.search(pattern, url)
    if match:
        return gitlab_prefix + f':{match.group(1)}/{match.group(2)}.git'
    else:
        return url
def get_issue_lines(repo_path,commit_hash_before):
    lizardAnalysis = LizardAnalysis()
    issues_list_all = []
    supported_extensions = ['.h', '.h++', '.hh', '.hpp', '.hxx', '.cc', '.c++',
                            '.cpp', '.cxx']
    changedLineInfo_map = prepareInfomation(repo_path,commit_hash_before)
    for file in changedLineInfo_map:
            extension = file[file.rfind('.'):]
            if extension in supported_extensions:
                issues_list = []
                inner_file = repo_path + "/" + file
                issues_list = lizardAnalysis.get_line_issues_list(inner_file, changedLineInfo_map[file].changedLineNumbers,400,file)
                issues_list_all = issues_list_all + issues_list
    return issues_list_all


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('-p', type=str, help='repo path')
    parser.add_argument('-b', '--commit_hash_before',
                        metavar=str(), required=False, default='HEAD~1',
                        help="ef740178c96fbad6410e0b846de86610ab9c5ba8",
                        type=str)
    parser.add_argument('-r', '--repo_url',
                        metavar=str(), required=True, type=str)
    
    args = parser.parse_args()
    repo_path = args.p
    repo_url = args.repo_url
    repo_url = convert_http_to_git(repo_url)

    # 初始化GitLab连接
    gl = gitlab.Gitlab(url=gitlab_url, private_token=gitlab_token)
    gl.auth()  # 进行安全认证


    # # 通过完整路径获取项目
    try:
        repository_path=repo_url.split(':')[-1].split('.git')[0]
        project = gl.projects.get(repository_path)
        print(f"仓库ID: {project.id}")
    except Exception as e:
        print(f"无法获取仓库信息: {e}")
        exit(0)

    try:
        if project.id in check_repo:
            line_issues_list = get_issue_lines(repo_path,args.commit_hash_before)
            if len(line_issues_list) > 0:
                with open('error.txt', 'a') as f:
                    f.write("ERROR: 当前C++代码函数行数超过最大限制400行\n")
                    for func in line_issues_list:
                        f.write(f'{func["file_name"]} {func["long_name"]} {func["nloc"]}\n')
    except Exception as e:
        print(e)
        exit(0)

lizardAnalysis.py

import re
import sys
import lizard
import subprocess


class LizardAnalysis:
    def __init__(self):
        pass

    def analyze_code(self, file_path, function_name, func, count):
        command = f"lizard {file_path} -Ecpre"
        process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
        output, error = process.communicate()

        if error:
            return f"Error occurred: {error.decode('utf-8')}"

        output_str = output.decode('utf-8')
        function_name = self.remove_parentheses(function_name)
        results = self.extract_numbers_from_line(output_str, function_name)
        flag = False
        if results:
            print(f"匹配到函数名: {function_name}")
            for i, result in enumerate(results, 1):
                line_count, start_column, end_column = result
                print(f"匹配项{i}: 行数: {line_count}, 函数起止行数: {start_column}, {end_column}")
                if int(start_column) == func.start_line:
                    print("匹配成功")
                    if int(line_count) > count:
                        flag = True
                        break
        else:
            print("未匹配到指定函数名")
        return flag

    def remove_parentheses(self,input_string):
        # 使用正则表达式匹配括号及括号内的内容
        result = re.sub(r'\([^)]*\)', '', input_string).strip()

        return result

    def extract_numbers_from_line(self,line, function_name):
        pattern = rf'(\d+)\s+{function_name}@(\d+)-(\d+)'

        matches = re.finditer(pattern, line)
        results = []

        for match in matches:
            line_count = match.group(1)
            start_column = match.group(2)
            end_column = match.group(3)
            results.append((line_count, start_column, end_column))

        return results

    def get_line_issues_list(self, file: str, lines, count, file_name):
        result = lizard.analyze_file(file)
        issues_list = []
        for func in result.function_list:
            # print(func.nloc)
            # print(func.long_name)
            if func != None and func.nloc > count:
                # print(func.start_line, func.end_line)
                for line in lines:
                    if line >= func.start_line and line <= func.end_line:
                        if self.analyze_code(file, func.long_name, func, count):
                            func.file_name = file_name
                            issues_list.append(func.__dict__)
                        break
        return issues_list

prepareInfomation.py

import os
import re
from typing import *
import subprocess

class ChangedLineInfo:
    def __init__(self):
        self.changedLineNumbers = set()

    def addChangeInfo(self, startLine: int, lineCount: int):
        """
        补充改变的行号信息，
        startLine : 表示从该行起始有改变。
        lineCount : 表示从改行起，有多少行被改变了。
        """
        endLine = startLine + lineCount
        if (endLine < startLine):
            return
        for i in range(startLine, endLine):
            self.changedLineNumbers.add(i)

    def isLineChanged(self, line: int) -> bool:
        return line in self.changedLineNumbers

def getChangedLineInfoFromDiffLines(lines: List[str]) -> ChangedLineInfo:
    """
    该方法计算出 ，返回一个 ChangedLineInfo 对象
    参数 : lines , git-diff 命令输出的,对一个文件的描述
    返回 ： 返回一个ChangedLineInfo， 表示从 lines 中解析出来的 改变了的行 的信息。
    """
    changedLineInfo = ChangedLineInfo()
    # 根据 "@@ @@" 获取改变行信息
    # 匹配到的 [0] : ","+从此处起始的删除行数; [1] : 从此处新增的行数起始行; [2] : ","+从此处新增的行数数量
    reg = re.compile("^@@ -[0-9]+(,[0-9]+)? \+([0-9]+)(,[0-9]+)? @@")
    for line in lines:
        r = reg.findall(line)
        if len(r) > 0:
            changedLineStart = int(r[0][1])
            caughtLineCountStr = r[0][2]
            if len(caughtLineCountStr) > 0:
                changedLineCount = int(caughtLineCountStr[1:])
            else:
                changedLineCount = 1
            changedLineInfo.addChangeInfo(changedLineStart, changedLineCount)
    return changedLineInfo



def prepareInfomation(repo_path: str,commit_hash_before):
    """
    执行一些操作,准备好数据并且返回:
    :return: 一个 map, key 为本次 commit 导致的"非删除文件"的路径(相对于 git 仓库的路径); value 为 ChangedInfo 对象,表示该文件的改动行信息.
    """
    gitCmd = "git diff --unified=0 --diff-filter=d "+commit_hash_before+" HEAD"
    gitDiffOutputRaw = subprocess.check_output(gitCmd.split(" "), cwd=repo_path)
    outputStr = gitDiffOutputRaw.decode('utf-8')
    diffOutputLines = outputStr.splitlines()
    #print(diffOutputLines)

    map = {}
    # 匹配到这个的一行表示开始结果开始展示 一个新的文件的 diff 信息,匹配到的信息 [0] 表示文件名
    # !!!!!!!!!! 忽略带有空格的文件名的处理, 对于带有空格的文件名,该检测会出错.但是暂时忽略
    separateLineReg = re.compile("^diff --git a/\S+ b/(\S+)")
    currentCheckFileName = ""
    diffLinesForCurrentCheckFile = []
    for i in range(len(diffOutputLines)):
        l = diffOutputLines[i]
        # 如果当前匹配到了 separateLine ,则
        # 1. 解析 diffLinesForCurrentCheckFile,并且将 { currentCheckFileName : 解析后的信息} 加入到 map 中;
        # 2. 更新当前的 currentCheckFileName ; 清空 diffLinesForCurrentCheckFile
        # 如果未匹配到,加入 diffLinesForCurrentCheckFile , 继续下一行
        separateLineMatchResult = separateLineReg.findall(l)
        if len(separateLineMatchResult) > 0:
            if len(diffLinesForCurrentCheckFile) > 0:
                a = getChangedLineInfoFromDiffLines(diffLinesForCurrentCheckFile)
                map[currentCheckFileName] = a
                diffLinesForCurrentCheckFile.clear()
            # 只匹配了一个项,所以不需要使用 separateLineMatchResult[0][0]
            currentCheckFileName = separateLineMatchResult[0]
        else:
            diffLinesForCurrentCheckFile.append(l)
        # 已经是最后一行
        if i == len(diffOutputLines) - 1:
            a = getChangedLineInfoFromDiffLines(diffLinesForCurrentCheckFile)
            map[currentCheckFileName] = a
    return map