未做buffer的程序代码:
1
#include <stdio.h>
2
#include <stdlib.h>
3
#include <sys/types.h>
4
#include <sys/stat.h>
5
#include <string.h>
6
#include <unistd.h>
7
#include <fcntl.h>
8
9
#define CHAR_BUFFER 1
10
#define BUFFER_SIZE 1024
11
#define LINE_BUFFER 128
12
#define MAX_FILE_LINE_NUM 1000000
13
14
int rw_ptr; // read-write pointer
15
int lineNum; // number of lines in the file
16
17
int position_rw_pntr(int fd, int num_lines);
18
char* get_next_line(int fd);
19
int get_next_char(int fd);
20
21
int main(int argc, char *argv[])
22

{
23
int midLines;
24
int i = 0;
25
char* lineString;
26
int succ;
27
int fd;
28
if (argc != 3)
{
29
printf("Usage: lab2 <lines> <file>\n");
30
return -1;
31
}
32
33
midLines = atoi(argv[1]);
34
if ((fd = open(argv[2], O_RDONLY)) == -1)
35
{
36
perror("Open file error");
37
return EXIT_FAILURE;
38
}
39
40
if (position_rw_pntr(fd, midLines) == -1)
41
{
42
perror("Position_rw_pntr Error");
43
return EXIT_FAILURE;
44
}
45
46
while (i != midLines && i != lineNum)
47
{
48
lineString = get_next_line(fd);
49
printf("%s\n", lineString);
50
i++;
51
}
52
53
free(lineString);
54
close(fd);
55
56
return EXIT_SUCCESS;
57
}
58
59
int position_rw_pntr(int fd, int num_lines)
60

{
61
int start;
62
int i, n;
63
char buf[BUFFER_SIZE];
64
int* lineCount = (int *)malloc(MAX_FILE_LINE_NUM * sizeof(int));
65
int byteNum = 0;
66
67
if (lseek(fd, 0, SEEK_SET) == -1)
68
{
69
return -1;
70
}
71
72
lineNum = 0;
73
lineCount[lineNum] = 0;
74
while ((n = read(fd, buf, BUFFER_SIZE)) != 0 )
75
{
76
for (i = 0; i < n; i++)
77
{
78
byteNum++;
79
if (buf[i] == '\n')
80
{
81
lineCount[++lineNum] = byteNum;
82
}
83
}
84
}
85
86
if (lineNum < num_lines)
87
{
88
rw_ptr = 0;
89
}
90
else
91
{
92
if (lineNum % 2)
93
{
94
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines + 1) % 2 - 1];
95
}
96
else
97
{
98
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines) % 2 - 1];
99
}
100
}
101
102
return 1;
103
}
104
105
char* get_next_line(int fd)
106

{
107
int n, i;
108
char byteChar;
109
char* buf;
110
111
if (lseek(fd, rw_ptr, SEEK_SET) == -1)
112
{
113
return NULL;
114
}
115
116
buf = (char *)malloc(LINE_BUFFER * sizeof(char));
117
118
for (i = 0; i < LINE_BUFFER; i++)
119
{
120
byteChar = (char)get_next_char(fd);
121
if (byteChar == EOF || byteChar == '\n')
122
{
123
buf[i] = '\0';
124
break;
125
}
126
else
127
{
128
buf[i] = byteChar;
129
}
130
}
131
132
return buf;
133
}
134
135
int get_next_char(int fd)
136

{
137
char charBuf[1];
138
if (lseek(fd, rw_ptr, SEEK_SET) == -1)
139
{
140
return EOF;
141
}
142
143
rw_ptr++;
144
if (read(fd, charBuf, 1) == 0)
145
{
146
return EOF;
147
}
148
return charBuf[0];
149
}
150

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22



23

24

25

26

27

28



29

30

31

32

33

34

35



36

37

38

39

40

41



42

43

44

45

46

47



48

49

50

51

52

53

54

55

56

57

58

59

60



61

62

63

64

65

66

67

68



69

70

71

72

73

74

75



76

77



78

79

80



81

82

83

84

85

86

87



88

89

90

91



92

93



94

95

96

97



98

99

100

101

102

103

104

105

106



107

108

109

110

111

112



113

114

115

116

117

118

119



120

121

122



123

124

125

126

127



128

129

130

131

132

133

134

135

136



137

138

139



140

141

142

143

144

145



146

147

148

149

150

做了buffer的程序代码:
1
#include <stdio.h>
2
#include <stdlib.h>
3
#include <sys/types.h>
4
#include <sys/stat.h>
5
#include <string.h>
6
#include <unistd.h>
7
#include <fcntl.h>
8
9
#define BUFFER_SIZE 1024
10
#define INIT_BUFF_SIZE 64
11
#define INC_BUFF_SIZE 8
12
#define LINE_BUFFER 128
13
#define MAX_FILE_LINE_NUM 1000000
14
15
int rw_ptr; // read-write pointer
16
int lineNum; // number of lines in the file
17
char lineBuf[INIT_BUFF_SIZE];
18
int linePtr;
19
int curBufSize;
20
21
int position_rw_pntr(int fd, int num_lines);
22
char* get_next_line(int fd);
23
int get_next_char(int fd);
24
25
int main(int argc, char *argv[])
26

{
27
int midLines;
28
int i = 0;
29
char* lineString;
30
int succ;
31
int fd;
32
if (argc != 3)
{
33
printf("Usage: lab2 <lines> <file>\n");
34
return -1;
35
}
36
37
midLines = atoi(argv[1]);
38
if ((fd = open(argv[2], O_RDONLY)) == -1)
39
{
40
perror("Open file error");
41
return EXIT_FAILURE;
42
}
43
44
if (position_rw_pntr(fd, midLines) == -1)
45
{
46
perror("Position_rw_pntr Error");
47
return EXIT_FAILURE;
48
}
49
50
while (i != midLines && i != lineNum)
51
{
52
lineString = get_next_line(fd);
53
printf("%s\n", lineString);
54
i++;
55
}
56
57
free(lineString);
58
close(fd);
59
60
return EXIT_SUCCESS;
61
}
62
63
int position_rw_pntr(int fd, int num_lines)
64

{
65
int start;
66
int i, n;
67
char buf[BUFFER_SIZE];
68
int* lineCount = (int *)malloc(MAX_FILE_LINE_NUM * sizeof(int));
69
int byteNum = 0;
70
71
if (lseek(fd, 0, SEEK_SET) == -1)
72
{
73
return -1;
74
}
75
76
lineNum = 0;
77
lineCount[lineNum] = 0;
78
while ((n = read(fd, buf, BUFFER_SIZE)) != 0 )
79
{
80
for (i = 0; i < n; i++)
81
{
82
byteNum++;
83
if (buf[i] == '\n')
84
{
85
lineCount[++lineNum] = byteNum;
86
}
87
}
88
}
89
90
if (lineNum < num_lines)
91
{
92
rw_ptr = 0;
93
}
94
else
95
{
96
if (lineNum % 2)
97
{
98
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines + 1) % 2 - 1];
99
}
100
else
101
{
102
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines) % 2 - 1];
103
}
104
}
105
106
return 1;
107
}
108
109
char* get_next_line(int fd)
110

{
111
int n, i;
112
char byteChar;
113
char* buf;
114
115
if (lseek(fd, rw_ptr, SEEK_SET) == -1)
116
{
117
return NULL;
118
}
119
120
linePtr = 0;
121
buf = (char *)malloc(LINE_BUFFER * sizeof(char));
122
123
for (i = 0; i < LINE_BUFFER; i++)
124
{
125
byteChar = (char)get_next_char(fd);
126
if (byteChar == EOF || byteChar == '\n')
127
{
128
buf[i] = '\0';
129
break;
130
}
131
else
132
{
133
buf[i] = byteChar;
134
}
135
}
136
137
return buf;
138
}
139
140
int get_next_char(int fd)
141

{
142
int n;
143
if (lseek(fd, rw_ptr, SEEK_SET) == -1)
144
{
145
return EOF;
146
}
147
148
if (linePtr == 0)
149
{
150
if ((curBufSize = read(fd, lineBuf, INIT_BUFF_SIZE)) == 0)
151
{
152
return EOF;
153
}
154
linePtr = 0;
155
}
156
157
if (linePtr < curBufSize)
158
{
159
rw_ptr++;
160
return lineBuf[linePtr++];
161
}
162
else
163
{
164
if ((curBufSize = read(fd, lineBuf, INC_BUFF_SIZE)) == 0)
165
{
166
return EOF;
167
}
168
else
169
{
170
rw_ptr++;
171
linePtr = 0;
172
return lineBuf[linePtr++];
173
}
174
}
175
}
176
177

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26



27

28

29

30

31

32



33

34

35

36

37

38

39



40

41

42

43

44

45



46

47

48

49

50

51



52

53

54

55

56

57

58

59

60

61

62

63

64



65

66

67

68

69

70

71

72



73

74

75

76

77

78

79



80

81



82

83

84



85

86

87

88

89

90

91



92

93

94

95



96

97



98

99

100

101



102

103

104

105

106

107

108

109

110



111

112

113

114

115

116



117

118

119

120

121

122

123

124



125

126

127



128

129

130

131

132



133

134

135

136

137

138

139

140

141



142

143

144



145

146

147

148

149



150

151



152

153

154

155

156

157

158



159

160

161

162

163



164

165



166

167

168

169



170

171

172

173

174

175

176

177

最后用了一个shell脚本来测试两个的运行时间,比较其优劣(其中的BigFile.txt是一个很大的文件):
#!/bin/bash
set `date`
echo start test part1 at $4
../part1/lab2.exe 300000 BigFile.txt > part1.bt
set `date`
echo finish test part1 at $4
set `date`
echo start test part2 at $4
../part2/lab2.exe 300000 BigFile.txt > part2.bt
set `date`
echo finish test part2 at $4
set `date`
echo start test part1 at $4
../part1/lab2.exe 300000 BigFile.txt > part1.bt
set `date`
echo finish test part1 at $4
set `date`
echo start test part2 at $4
../part2/lab2.exe 300000 BigFile.txt > part2.bt
set `date`
echo finish test part2 at $4
用个跑下来,前者要比后者慢一倍。可见buffer的好处。如果调高buffer的size,效果将更明显。