csv typing

#include "string.h"
#include "stdlib.h"
#include "stdio.h"






typedef char CHAR;
typedef unsigned char BYTE;
typedef int INT;
typedef unsigned int UINT;







#define Item_len_max  (2 * 1024)




typedef enum {
CSV_SUCCESS,
INVALID_CHAR, 
INVALID_DELIM,
DELIM_NOT_FOUND,
EMPTY_ITEM,
}ErrCSV;








char *Strstr(char *str1, char *str2);
/*
Description:
The strstr function locates the firstoccurrence in the string pointed to by s1 of the sequence of characters(excluding the terminating\
null character)in the string pointed to by s2.
Returns:
The Strstr function returns a pointer to the located string,or a null pointer if the string is not found.If s2 points to a string with\
zero length,the function return str1.
*/








int Isgraph(int c); //this function tests for any printing character except space(' ') 
int Isspace(int c);
char *StrstrSpan(char *str1, char *str2, int span);//if do not match the str2 ins the span of the str1,retrun NULL
ErrCSV ExtractItem(CHAR *item, BYTE size, CHAR **str, CHAR *delim); 
//ErrCSV CntItemLen(const CHAR *item, CHAR *delim);
ErrCSV CntLineLen(CHAR *line, int *len); //including the delimiter '\r' '\n'
int ReadLine(FILE* file, char *Buf);
ErrCSV CntItemNumPerLine(CHAR *line, int *num);
char* ItoC(INT i, char *str, INT radix); //converts integeral variable to string




#define BUF_SIZE 1024




char Buf[BUF_SIZE] = {0};
int SplitIntoSubLine(char **table, char *line, INT size)
{
//char *t = *table;
BYTE i;
char *delim = ",";
const BYTE itemNumPerLine = 2;
char *pos = line;




for(i = 0; i < itemNumPerLine; i++)
{
ExtractItem(pos, size, table, delim);
size -= strlen(pos);
pos += strlen(pos);
}
return pos - line;
}




#define SubLineSize  100




#define WAVE_LINE_NUM  33




void WriteLine(char **table, int lineSize)
{
//char buf[SubLineSize] = {0};
//char *pos = buf;
char *t = *table;
//BYTE index = 0;
  int num;  




ErrCSV rtn = CntItemNumPerLine(t, &num);
if(rtn == CSV_SUCCESS)
{
printf("the rtn is %x \n", rtn);
printf("the len is %d \n", num);
}




//for(index = 0; index < WAVE_LINE_NUM - 1; index++)
//{
// ItoC(index, pos, 10);
// pos += strlen(pos);
// *pos++ = ',';
// pos += SplitIntoSubLine(table, pos, lineSize - (pos - buf));
// *pos++ = ',';
//}
}




int main()
{
FILE* fInput;
FILE* fOutput;
int len = 0;
int ItemNum = 0;
int i = 0;
int lineLen = 0;




if((fInput = fopen("PPRD_06R10_cuted.csv", "r")) == NULL)
{
printf("Open input file failed!!!\n");
return -1;
}
if((fOutput = fopen("OutputFile.txt", "w+")) == NULL)
{
printf("Open the output file failed!!!");
return -1;
}


//for(i = 0; i < 33; i++)
{
printf("-----------line :%d -------\n", i);
memset(Buf, '\0', sizeof(Buf));
len = ReadLine(fInput, (char*)Buf);
printf("the len is %d \n", len);
printf("the Buf is %s \n", Buf);
printf("------------------the Item contents :\n");
CntLineLen(Buf, &lineLen);
printf("the lineLen is %d \n", lineLen);
CntItemNumPerLine(Buf, &ItemNum);
printf("the ItemNum is %d \n", ItemNum);



//printf("the Buf len is %d\n", strlen(Buf));
//printf("the terminal - 1 char is %d  %x\n", *(Buf + strlen(Buf) - 2), *(Buf + strlen(Buf) - 2));
//printf("the terminal char is %d %x\n", *(Buf + strlen(Buf) - 1), *(Buf + strlen(Buf) - 1));
}












// char *str1 = "This is it,2.345,300,\r\nabcd dummy\r\n";
// char *str1 = "\n";
//#define  buf_size  20
#define  buf_size  50




// char Item[buf_size] = "dummy char to";
// char *delim = ",";
// char *delim = "\r\n";
//char *delim = NULL;
// char **location = &str1;
// int len;
// ErrCSV rtn = CntLineLen(str1, 23, &len);
// printf("the rtn is %x \n", rtn);
// printf("the len is %d \n", len);




getchar();








// printf("before exe the func, the sub Str1 addr is %x \n", (int)*location);
// printf("the first char is %c \n", **location);
//
// ErrCSV rtn = ExtractItem(Item, buf_size, location, delim);
// 
// printf("after the exe the func, the sub Str1 addr  is %x \n", (int)*location);
// printf("the first char is %c %x \n", **location, **location);
//
// printf("Rtn is %x \n", rtn);
// printf("Item buf is:%s \n", Item);












// printf("before exe the func, the sub Str1 addr is %x \n", (int)*location);
// printf("the first char is %c \n", **location);
//
// ErrCSV rtn = ExtractItem(Item, buf_size, location, delim);
// 
// printf("after the exe the func, the sub Str1 addr  is %x \n", (int)*location);
// printf("the first char is %c %x \n", **location, **location);
//
// printf("Rtn is %x \n", rtn);
// printf("Item buf is:%s \n", Item);
// printf("the delim is %s \n", delim);
//
// 
// ErrCSV rtn = ExtractItem(Item, buf_size, location, delim);
// 
// printf("after the exe the func, the sub Str1 addr  is %x \n", (int)*location);
// printf("the first char is %c %x \n", **location, **location);
//
// printf("Rtn is %x \n", rtn);
// printf("Item buf is:%s \n", Item);
// printf("the delim is %s \n", delim);
//
// 
// rtn = ExtractItem(Item, buf_size, location, delim);
// 
// printf("after the exe the func, the sub Str1 addr  is %x \n", (int)*location);
// printf("the first char is %c %x \n", **location, **location);
//
// printf("Rtn is %x \n", rtn);
// printf("Item buf is:%s \n", Item);
// printf("the delim is %s \n", delim);
// 
// 
// 
// rtn = ExtractItem(Item, buf_size, location, delim);
// 
// printf("after the exe the func, the sub Str1 addr  is %x \n", (int)*location);
// printf("the first char is %c %x \n", **location, **location);
//
// printf("Rtn is %x \n", rtn);
// printf("Item buf is:%s \n", Item);
// printf("the delim is %s \n", delim);












/*
char *str = "This is it.Micheal Jackson.\n";
while(*str)
{
printf("%c  %x is ", *str, *str);
if(Isgraph(*str))
{
}
else
{
printf("not ");
}
printf("a graphyic character\n");








if(Isspace(*str))
{
printf("%c %x is a space!\n", *str, *str);
}




str++;
}
*/
//printf("%s", str1);
return 0;
}
















char *StrstrSpan(char *str1, char *str2, int span)
{
printf("the input str is %x \n", (int)str1);
while(*str1 && span)
{
char *pos = str1;
char *s2 = str2;




while(*s2 && *pos && (*pos == *s2) && pos < str1 + span)
{
pos++;
s2++;
}
if(!*s2)
return str1;
if(!*pos || pos >= str1 + span)
return NULL;




str1++;
span--;
}
return NULL;
}








char *Strstr(char *str1, char *str2)
{
while(*str1)
{
char *pos = str1;
char *s2 = str2;




while(*s2 && *pos && (*pos == *s2))
{
pos++;
s2++;
}
if(!*s2)
return str1;
if(!*pos)
return NULL;




str1++;
}
return NULL;
}








int Isgraph(int c)
{
return (c <= 0x7e && c >= 0x21);
}








int Isspace(int c)
{
return (c == ' ' || c == '\t' || c == '\v' || c == '\n' || c == '\f' || c == '\r');
}




char* ItoC(INT i, char *str, INT radix)
{
char *pos = str;
char *start = str;




if(radix == 0 || str == NULL)
return NULL;
if(i < 0)
{
i = -i;
*pos++ = '-';
start++;
}




while(i / radix)
{
*pos++ = i % radix + '0';
i /= radix;
}
*pos++ = i + '0';
*pos-- = '\0';




while(pos > start)
{
char tmp = *start;
*start++ = *pos;
*pos-- = tmp;
}
return str;
}
ErrCSV ExtractItem(CHAR *item, BYTE size, CHAR **str, CHAR *delim)
{
CHAR *s = *str;
CHAR *pos = NULL;




memset(item, '\0', size);
if(!delim || !*delim)
return INVALID_DELIM;
if(!(Isgraph(*s) || Isspace(*s)))
{
return INVALID_CHAR;
}
pos = StrstrSpan(s, delim, size + strlen(delim));
if(pos == NULL)
return DELIM_NOT_FOUND;








*str = pos + strlen(delim);




if(pos == s)
return EMPTY_ITEM;
memcpy(item, s, pos - s);




return CSV_SUCCESS;
}




ErrCSV CntItemNumPerLine(CHAR *line, int *num)
{
char ItemDelim[] = ",";
char LineDelim[] = "\r\n";
char *pos = line;
int ctr = 0;
int LineLen = 0; 




if(!(Isgraph(*line) || Isspace(*line))) 

return INVALID_CHAR;
}
CntLineLen(line, &LineLen); 


while((pos = StrstrSpan(line, ItemDelim, line + LineLen - strlen(LineDelim) - pos))) //avoid warnning by parentheses
{
line = pos + strlen(ItemDelim);
ctr++;
}
*num = ++ctr; //the last item delimited by the LF character
return CSV_SUCCESS;
}




ErrCSV CntLineLen(CHAR *line, int *len)
{
char *pos = NULL;
const int span = 1024;
char *delim = "\r\n"; //for arm platform
//char *delim = "\n"; //for pc windows platform




//for windows
// char *delim = "\n";
//for linux
//char *delim = "\r";
//for mac
if(!(Isgraph(*line) || Isspace(*line))) 

printf("CntLineLen(): INVALID_CHAR\n");
return INVALID_CHAR;
}




if(!(pos = StrstrSpan(line, delim, span)))
{
printf("the string delimiter is not find\n");
*len = 0;
return DELIM_NOT_FOUND;
}
printf("pos %x \n", (int)pos);
*len = pos - line + strlen(delim);
return CSV_SUCCESS;
}




/************************ For PC Platform ********************/
int ReadLine(FILE* file, char *buf)
{
char ch;
char *pos = buf;




if(!file)
{
return 0;
}


while((ch = fgetc(file)) != EOF)
{
if(!(Isgraph(ch) || Isspace(ch))) //the input character invalid
return 0;
if(ch == '\n')
{
*pos++ = ch;
return pos - buf;
}
*pos++ = ch;
}
return (int)(pos - buf);
### 使用Python将CSV文件数据导入并插入到数据库 #### 导入库 为了实现这一目标,首先需要确保已安装必要的库。对于MySQL数据库操作,`pandas`用于读取CSV文件而`PyMySQL`作为连接MySQL的接口被广泛采用。 ```bash pip install pymysql pandas ``` 如果遇到模块未找到错误,可以通过上述命令来解决相应依赖问题[^2]。 #### 连接配置与建立链接 创建一个函数用来初始化同MySQL服务器之间的通信链路: ```python import pymysql.cursors from typing import Tuple, Any def create_connection() -> Tuple[Any, Any]: connection = pymysql.connect( host='localhost', user='root', # 替换成自己的用户名 password='password', # 替换成自己的密码 database='test_db', # 数据库名称 cursorclass=pymysql.cursors.DictCursor, local_infile=1 # 启用LOAD DATA LOCAL INFILE功能 ) return connection, connection.cursor() ``` 这里启用了`local_infile`选项以便后续能够利用高效的批量加载语句[^5]。 #### CSV解析与预处理 借助于Pandas简化CSV文件的操作流程: ```python import pandas as pd def load_csv(file_path: str) -> pd.DataFrame: df = pd.read_csv(file_path) return df ``` 此部分负责把CSV文档转换成DataFrame对象形式,方便进一步加工处理。 #### 插入记录至表内 提供两种不同的策略向指定表格添加新条目——逐行提交或是整体迁移。 ##### 方法一:单次事务化插入 适合中小规模的数据集,在每次迭代过程中构建SQL指令并通过游标执行之。 ```python def insert_data_one_by_one(cursor, dataframe): for index, row in dataframe.iterrows(): sql_query = "INSERT INTO `table_name` (`column_1`, ...) VALUES (%s, ...)" values_tuple = tuple(row[col] for col in dataframe.columns) try: cursor.execute(sql_query, values_tuple) except Exception as e: print(f"Failed to execute query {e}") cursor.connection.commit() ``` 这种方法虽然直观易懂,但对于大型数据集效率较低。 ##### 方法二:使用LOAD DATA快速载入 针对海量数据场景推荐采取这种方式,它允许一次性完成整个CSV文件的内容传输工作。 ```sql LOAD DATA LOCAL INFILE '/path/to/csv' INTO TABLE table_name FIELDS TERMINATED BY ',' LINES TERMINATED BY '\r\n' IGNORE 1 ROWS; ``` 这段SQL脚本定义了分隔符以及跳过的头部信息等内容,具体路径需替换为实际存储位置。 最后调用如下代码片段实施以上逻辑: ```python if __name__ == "__main__": conn, cur = create_connection() csv_df = load_csv('data.csv') # Choose one of the following two lines based on your needs. # For small datasets or when you need more control over each record insertion process: # insert_data_one_by_one(cur, csv_df) # Or use this line for large-scale bulk loading operations with better performance: cur.execute(""" LOAD DATA LOCAL INFILE %s INTO TABLE table_name FIELDS TERMINATED BY ',' LINES TERMINATED BY '\\r\\n' IGNORE 1 LINES""", ('/absolute/path/data.csv',)) conn.close() ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值