excel-xls-data_handling

本文介绍了一种基于Python的Excel数据处理方法,通过定义Data类继承WorkBook,实现了数据查找、标题获取、列表转换等功能。文章详细解释了如何在Excel中定位特定数据,包括行和列的匹配,以及如何转换数据列表以提高数据处理效率。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

from xls import WorkBook
from print import printdebug as p

class Data(WorkBook):
    title = dict()
    find_bool = True
    find_result = list()  # this is the result of fund

    def __init__(self):
        super().__init__()

    def find_data(self,value):
        if self.find_bool:
            fr = list()
        else:
            fr = self.find_result       #有修改,最后再传回修改后的值
        lc = self.list_content   #只判断不修改

        n = 0
        for i in range(self.cols):
            for j in range(self.rows):
                if lc[i][j] == value:
                    n += 1
                    fr.append([i+1,j+1])
        else:
            self.find_result = fr
            return n

    def __find_count(self,val1,val2):
        self.find_bool =False

        if self.find_data(val1) ==1:
            if self.find_data(val2) == 1:
                result = True
            else:
                result = False
        else:
            result = False

        self.find_bool = True
        return result

    def get_title(self,val1 = '',val2 = '',val3 = ''):
        #替换对象属性的方法的条件是只增加内部计算的效率
        #如果要保存数据还是要存到对象里边去

        title = self.title
        if val1 == '':
            val1 = self.val1
        if val2 == '':
            val2 = self.val2
        if val3 == '':
            val3 = self.val3

        if self.__find_count(val1,val2):
            fr = self.find_result
            if fr[0][0] == fr[1][0]:
                title['same'] = 'row'
                title['row']=fr[0][0]
                title[val1] = fr[0][1]
                title[val2] = fr[1][1]
            elif fr[0][1] == fr[1][1]:
                title['same'] = 'col'
                title['col']=fr[0][1]
                title[val1] = fr[0][0]
                title[val2] = fr[1][0]
        self.title = title
        print(title)

        print('the '+title['same']+' is the same: '+str(title['row']))

    def list_converte(self):
        #原内容列表
        content = self.list_content
        # the finally list of content by col
        list_col =list()
        for i in range(self.cols):
            list_col.append(list())
            for j in range(self.rows):
                list_col[i].append(content[j][i])
        p(list_col,'this is list of col')

    def get_title_dict(self):
        # title 字典 key to index of col
        d = dict()
        if self.title['same'] == 'row':
            list_key = self.list_content[self.title['row']-1]
            p(list_key,'listkey')
            for i in range(self.cols):
                d[list_key[i]] = i
            p(d,'this is title dict')


val1 = 'Total'
val2 = 'Total+directive'
val3 = 'name'


#xlapp.open_new(path)
'''
第一步,获得文件名 path  创建excel对象 xlapp = WorkBook()
每二步,打开文件 xlapp.get_workbooks(path)  这里已经有了 xlBook xlbk_name filename rows cols
第三步,执行获得使用区域   xlapp.get_usedrange()) 返回的是区域   ,保存的是 区域内的数据 list_content
第四步,确定标题栏的位置,是同行还是同列,查找类型,字符串(分别代表数据,和数据所对应的关键字)
第五步,读取数据list_to_data,确定数据位置,依据关键字,区分方法
第六步,锁定,目标工作表位置,插入数据位置,byrows,or,bycolumn,依据是关键字
第七步,设置数据的值data_to_list
第八步,要实现行列转换的思想,还要确定匹配度的问题

'''
# step 1
path = 'C:\\Users\\wangcc\\Desktop\\工作簿1.csv'

xlapp = Data()
# step 2
xlapp.get_workbooks(path)
# step 3
xlapp.get_usedrange()
# step 4
xlapp.get_title(val1,val2,val3)
# step 5
xlapp.list_converte()
xlapp.get_title_dict()



D:\ruanjian\anaconda\envs\project\python.exe D:/znjt/project/1DCNN_Fault_Detection-master/data/data_preprocess.py Traceback (most recent call last): File "D:\ruanjian\anaconda\envs\project\lib\site-packages\pandas\compat\_optional.py", line 138, in import_optional_dependency module = importlib.import_module(name) File "D:\ruanjian\anaconda\envs\project\lib\importlib\__init__.py", line 127, in import_module return _bootstrap._gcd_import(name[level:], package, level) File "<frozen importlib._bootstrap>", line 1014, in _gcd_import File "<frozen importlib._bootstrap>", line 991, in _find_and_load File "<frozen importlib._bootstrap>", line 973, in _find_and_load_unlocked ModuleNotFoundError: No module named 'openpyxl' During handling of the above exception, another exception occurred: Traceback (most recent call last): File "D:/znjt/project/1DCNN_Fault_Detection-master/data/data_preprocess.py", line 95, in <module> X_train, y_train, X_test, y_test = dp.process(window_size=opt.dim) File "D:/znjt/project/1DCNN_Fault_Detection-master/data/data_preprocess.py", line 59, in process df = self.load_data() File "D:/znjt/project/1DCNN_Fault_Detection-master/data/data_preprocess.py", line 27, in load_data df = pd.read_excel(file_path, sheet_name=self.sheet_name) File "D:\ruanjian\anaconda\envs\project\lib\site-packages\pandas\util\_decorators.py", line 311, in wrapper return func(*args, **kwargs) File "D:\ruanjian\anaconda\envs\project\lib\site-packages\pandas\io\excel\_base.py", line 457, in read_excel io = ExcelFile(io, storage_options=storage_options, engine=engine) File "D:\ruanjian\anaconda\envs\project\lib\site-packages\pandas\io\excel\_base.py", line 1419, in __init__ self._reader = self._engines[engine](self._io, storage_options=storage_options) File "D:\ruanjian\anaconda\envs\project\lib\site-packages\pandas\io\excel\_openpyxl.py", line 524, in __init__ import_optional_dependency("openpyxl") File "D:\ruanjian\anaconda\envs\project\lib\site-packages\pandas\compat\_optional.py", line 141, in import_optional_dependency raise ImportError(msg) ImportError: Missing optional dependency 'openpyxl'. Use pip or conda to install openpyxl. 进程已结束,退出代码1
06-13
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值