转自:http://www.cppblog.com/coreBugZJ/archive/2012/06/05/177654.html
ID3 算法实现决策树
1
/*
2
3
ID3 算法实现决策树
4
5
6
----问题描述:
7
8
Suppose we want ID3 to decide whether the weather is amenable to playing baseball. Over the course of 2 weeks, data is collected to help ID3 build a decision tree (see table 1).
9
10
The target classification is "should we play baseball?" which can be yes or no.
11
12
The weather attributes are outlook, temperature, humidity, and wind speed. They can have the following values:
13
14
• outlook = { sunny, overcast, rain }
15
• temperature = {hot, mild, cool }
16
• humidity = { high, normal }
17
• wind = {weak, strong }
18
19
Examples of set S are:
20
21
Table. 1
22
23
Day Outlook Temperature Humidity Wind Play ball
24
25
D1 Sunny Hot High Weak No
26
D2 Sunny Hot High Strong No
27
D3 Overcast Hot High Weak Yes
28
D4 Rain Mild High Weak Yes
29
D5 Rain Cool Normal Weak Yes
30
D6 Rain Cool Normal Strong No
31
D7 Overcast Cool Normal Strong Yes
32
D8 Sunny Mild High Weak No
33
D9 Sunny Cool Normal Weak Yes
34
D10 Rain Mild Normal Weak Yes
35
D11 Sunny Mild Normal Strong Yes
36
D12 Overcast Mild High Strong Yes
37
D13 Overcast Hot Normal Weak Yes
38
D14 Rain Mild High Strong No
39
40
41
----输入:
42
43
若干行,每行 5 个字符串,表示
44
45
Outlook Temperature Humidity Wind Play ball
46
47
如上表。
48
49
50
----输出:
51
52
决策树。
53
54
55
----分析:
56
57
经典 ID3 算法。
58
59
代码假设训练集相容。
60
61
非常经典的算法,第一次实现,如此仓促以至于得到如此恶心的代码!
62
我所写过的最恶心的代码!真想删了重新写。
63
64
*/
65
66
67
#include
<
iostream
>
68
#include
<
cstdio
>
69
#include
<
string
>
70
#include
<
map
>
71
#include
<
iomanip
>
72
#include
<
cmath
>
73
74
using
namespace
std;
75
76
const
int
EXAMPLE_NUM
=
1009
;
77
const
int
PROP_NUM
=
4
;
78
79
const
int
MAX_PROP[ PROP_NUM ]
=
{ 3, 3, 2, 2 }
;
80
81
struct
Example
82
{
83
int prop[ PROP_NUM ];
84
bool ignp[ PROP_NUM ];
85
bool play;
86
87
int node;
88
Example *link;
89
}
;
90
91
struct
Node;
92
93
struct
Link
94
{
95
int prop;
96
Node *node;
97
Link *link;
98
}
;
99
100
struct
Node
101
{
102
int pid;
103
Link *link;
104
bool play;
105
}
;
106
107
map
<
string
,
int
>
Str2Val;
108
map
<
int
,
string
>
Val2Str[ PROP_NUM ];
109
string
Pro2Str[ PROP_NUM ]
=
{ "Outlook", "Temperature", "Humidity", "Wind" }
;
110
111
112
double
entropy( Example
*
s,
int
nd,
int
&
ts )
{
113
int ct = 0, cf = 0, c = 0;
114
double es = 0;
115
while ( NULL != s ) {
116
if ( nd == s->node ) {
117
++c;
118
if ( s->play ) {
119
++ct;
120
}
121
else {
122
++cf;
123
}
124
}
125
s = s->link;
126
}
127
ts = c;
128
if ( 0 == c ) {
129
return 0;
130
}
131
if ( 0 != ct ) {
132
es += -(((double)(ct))/c) * log(((double)(ct))/c);
133
}
134
if ( 0 != cf ) {
135
es += -(((double)(cf))/c) * log(((double)(cf))/c);
136
}
137
return es;
138
}
139
//
pid 合法
140
double
gain( Example
*
s,
int
nd,
int
pid )
{
141
Example *e;
142
double es, ev;
143
int ts, tv, i;
144
145
es = entropy( s, nd, ts );
146
147
if ( 0 == ts ) {
148
return 0;
149
}
150
151
for ( i = 0; i < MAX_PROP[ pid ]; ++i ) {
152
for ( e = s; NULL != e; e = e->link ) {
153
if ( (nd == e->node) && (i == e->prop[pid]) && (! e->ignp[pid]) ) {
154
e->node = nd + 1;
155
}
156
}
157
158
ev = entropy( s, nd+1, tv );
159
160
for ( e = s; NULL != e; e = e->link ) {
161
if ( nd+1 == e->node ) {
162
e->node = nd;
163
}
164
}
165
166
es -= ev * ( ((double)(tv)) / ts );
167
}
168
169
return es;
170
}
171
172
int
gainMaxId( Example
*
s,
int
nd )
{
173
double m = -1e100, tm;
174
int k = -1, i;
175
bool ign[ PROP_NUM ] = { 0 };
176
177
for ( Example *e = s; NULL != e; e = e->link ) {
178
if ( e->node == nd ) {
179
for ( i = 0; i < PROP_NUM; ++i ) {
180
if ( e->ignp[ i ] ) {
181
ign[ i ] = true;
182
}
183
}
184
}
185
}
186
187
for ( i = 0; i < PROP_NUM; ++i ) {
188
if ( ign[ i ] ) {
189
continue;
190
}
191
192
tm = gain( s, nd, i );
193
if ( tm > m ) {
194
m = tm;
195
k = i;
196
}
197
}
198
199
return k;
200
}
201
//
s 非空
202
bool
sameDecd( Example
*
s,
int
nd,
bool
&
decd )
{
203
bool set = false;
204
bool play;
205
while ( NULL != s ) {
206
if ( s->node == nd ) {
207
play = s->play;
208
set = true;
209
break;
210
}
211
s = s->link;
212
}
213
if ( ! set ) {
214
return false; ////
215
}
216
while ( NULL != s ) {
217
if ( (s->node == nd) && (s->play != play) ) {
218
return false;
219
}
220
s = s->link;
221
}
222
decd = play;
223
return true;
224
}
225
//
用完所有属性
226
bool
isLeaf( Example
*
s,
int
nd,
bool
&
decd )
{
227
int i;
228
while ( NULL != s ) {
229
if ( s->node == nd ) {
230
for ( i = 0; (i < PROP_NUM)&&(s->ignp[i]); ++i ) {
231
}
232
if ( i < PROP_NUM ) {
233
return false;
234
}
235
decd = s->play;
236
}
237
s = s->link;
238
}
239
return true;
240
}
241
242
int
node;
243
Node
*
createTreeSub( Example
*
example )
{
244
Node *res = new Node;
245
res->link = NULL;
246
res->pid = -1;
247
if ( sameDecd( example, node, res->play ) ) {
248
return res;
249
}
250
if ( isLeaf( example, node, res->play ) ) {
251
return res;
252
}
253
254
res->pid = gainMaxId( example, node );
255
256
int i, c, nd = node;
257
Example *e;
258
for ( i = 0; i < MAX_PROP[ res->pid ]; ++i ) {
259
e = example;
260
c = 0;
261
++node;
262
while ( NULL != e ) {
263
if ( (nd == e->node) && (i == e->prop[ res->pid ]) ) {
264
e->ignp[ res->pid ] = true;
265
e->node = node;
266
++c;
267
}
268
e = e->link;
269
}
270
if ( 0 < c ) {
271
Link *link = new Link;
272
link->node = createTreeSub( example );
273
link->prop = i;
274
link->link = res->link;
275
res->link = link;
276
}
277
}
278
279
return res;
280
}
281
282
Node
*
createTree( Example
*
example )
{
283
Example *ptr;
284
int i;
285
286
if ( NULL == example ) {
287
return NULL;
288
}
289
290
for ( ptr = example; NULL != ptr; ptr = ptr->link ) {
291
for ( i = 0; i < PROP_NUM; ++i ) {
292
ptr->ignp[ i ] = false;
293
}
294
ptr->node = 0;
295
}
296
node = 0;
297
return createTreeSub( example );
298
}
299
300
void
outputTreeSub( Node
*
tree,
int
dep );
301
302
void
outputLink( Link
*
link,
int
dep,
int
pid )
{
303
if ( (NULL == link) || (0 > dep) ) {
304
return;
305
}
306
307
for ( int i = 0; i < dep; ++i ) {
308
cout << setw(16) << " ";
309
}
310
cout << left << setw(16) << Val2Str[pid][link->prop];
311
outputTreeSub( link->node, dep+1 );
312
313
outputLink( link->link, dep, pid );
314
}
315
316
void
outputTreeSub( Node
*
tree,
int
dep )
{
317
if ( (NULL == tree) || (0 > dep) ) {
318
return;
319
}
320
321
if ( 0 > tree->pid ) {
322
cout << (tree->play ? "Yes *" : "No *") << endl;
323
return;
324
}
325
326
cout << left << setw(16) << Pro2Str[tree->pid] << endl;
327
outputLink( tree->link, dep+1, tree->pid );
328
}
329
330
void
outputTree( Node
*
tree )
{
331
outputTreeSub( tree, 0 );
332
}
333
334
void
destroyTree( Node
**
pTree )
{
335
if ( (NULL == pTree) || (NULL == *pTree) ) {
336
return;
337
}
338
339
Link *link;
340
for ( link = (*pTree)->link; NULL != link; link = link->link ) {
341
destroyTree( &(link->node) );
342
}
343
344
delete *pTree;
345
*pTree = NULL;
346
}
347
348
void
destroyExample( Example
**
pExample )
{
349
if ( (NULL == pExample) || (NULL == *pExample) ) {
350
return;
351
}
352
Example *head = *pExample, *p;
353
while ( NULL != head ) {
354
p = head;
355
head = head->link;
356
delete p;
357
}
358
*pExample = NULL;
359
}
360
361
void
init()
{
362
Val2Str[ 0 ][ 0 ] = "Sunny";
363
Val2Str[ 0 ][ 1 ] = "Overcast";
364
Val2Str[ 0 ][ 2 ] = "Rain";
365
Str2Val[ "Sunny" ] = 0;
366
Str2Val[ "Overcast" ] = 1;
367
Str2Val[ "Rain" ] = 2;
368
369
Val2Str[ 1 ][ 0 ] = "Hot";
370
Val2Str[ 1 ][ 1 ] = "Mild";
371
Val2Str[ 1 ][ 2 ] = "Cool";
372
Str2Val[ "Hot" ] = 0;
373
Str2Val[ "Mild" ] = 1;
374
Str2Val[ "Cool" ] = 2;
375
376
Val2Str[ 2 ][ 0 ] = "High";
377
Val2Str[ 2 ][ 1 ] = "Normal";
378
Str2Val[ "High" ] = 0;
379
Str2Val[ "Normal" ] = 1;
380
381
Val2Str[ 3 ][ 0 ] = "Weak";
382
Val2Str[ 3 ][ 1 ] = "Strong";
383
Str2Val[ "Weak" ] = 0;
384
Str2Val[ "Strong" ] = 1;
385
}
386
387
Example
*
inputExample()
{
388
Example *preHead = new Example;
389
Example *ptr;
390
string Outlook, Temperature, Humidity, Wind, Play;
391
392
preHead->link = NULL;
393
ptr = preHead;
394
395
while ( cin >> Outlook >> Temperature >> Humidity >> Wind >> Play ) {
396
ptr->link = new Example;
397
ptr = ptr->link;
398
ptr->link = NULL;
399
400
if ( (Str2Val.find( Outlook ) == Str2Val.end()) ||
401
(Str2Val.find( Temperature ) == Str2Val.end()) ||
402
(Str2Val.find( Humidity ) == Str2Val.end()) ||
403
(Str2Val.find( Wind ) == Str2Val.end())
404
) {
405
destroyExample( &preHead );
406
return NULL;
407
}
408
ptr->prop[ 0 ] = Str2Val[ Outlook ];
409
ptr->prop[ 1 ] = Str2Val[ Temperature ];
410
ptr->prop[ 2 ] = Str2Val[ Humidity ];
411
ptr->prop[ 3 ] = Str2Val[ Wind ];
412
413
if ( "Yes" == Play ) {
414
ptr->play = true;
415
}
416
else if ( "No" == Play ) {
417
ptr->play = false;
418
}
419
else {
420
destroyExample( &preHead );
421
return NULL;
422
}
423
}
424
425
ptr = preHead->link;
426
delete preHead;
427
return ptr;
428
}
429
430
int
main()
{
431
init();
432
433
Example *example = inputExample();
434
if ( NULL == example ) {
435
cout << "输入不合法" << endl;
436
return 0;
437
}
438
439
Node *tree = createTree( example );
440
441
outputTree( tree );
442
443
destroyTree( &tree );
444
destroyExample( &example );
445
return 0;
446
}
447
448
449
/*
450
451
输入:
452
Sunny Hot High Weak No
453
Sunny Hot High Strong No
454
Overcast Hot High Weak Yes
455
Rain Mild High Weak Yes
456
Rain Cool Normal Weak Yes
457
Rain Cool Normal Strong No
458
Overcast Cool Normal Strong Yes
459
Sunny Mild High Weak No
460
Sunny Cool Normal Weak Yes
461
Rain Mild Normal Weak Yes
462
Sunny Mild Normal Strong Yes
463
Overcast Mild High Strong Yes
464
Overcast Hot Normal Weak Yes
465
Rain Mild High Strong No
466
467
输出:
468
469
Outlook
470
Rain Wind
471
Strong No *
472
Weak Yes *
473
Overcast Yes *
474
Sunny Humidity
475
Normal Yes *
476
High No *
477
478
*/
479

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479
