splitsize=2
maxbranch=3
outtree=tree;
input x y;
target c / level=nominal;
score out=out outfit=fit;
run;
proc print data=fit noobs label;
title3 'Fit Statistics for the Training Data';
run;
proc freq data=out;
tables f_c*i_c;
title3 'Misclassification Table';
run;
proc gplot data=out;
plot y*x=i_c / haxis=axis1 vaxis=axis2;
symbol c=black i=none v=dot;
symbol2 c=red i=none v=square;
symbol3 c=green i=none v=triangle;
axis1 c=black width=2.5 order=(0 to 30 by 5);
axis2 c=black width=2.5 minor=none order=(0 to 20 by 2);
title3 'Classification Results';
run;
proc split intree=tree;
score data=sampsio.dmsring nodmdb role=score out=gridout;
run;
proc gcontour data=gridout;
plot y*x=p_c1 / pattern ctext=black coutline=gray;
plot y*x=p_c2 / pattern ctext=black coutline=gray;
plot y*x=p_c3 / pattern ctext=black coutline=gray;
title2 'Posterior Probabilities';
pattern v=msolid;
legend frame;
title3 'Posterior Probabilities';
run;
proc gplot data=gridout;
plot y*x=_node_;;
symbol c=blue i=none v=dot;
symbol2 c=red i=none v=square;
symbol3 c=green i=none v=triangle;
symbol4 c=black i=none v=star;
symbol5 c=orange i=none v=plus;
symbol6 c=brown i=none v=circle;
symbol7 c=cyan i=none v==;
symbol8 c=black i=none v=hash;
symbol9 c=gold i=none v=:;
symbol10 c=yellow i=none v=x;
title3 'Leaf Nodes';
run;
Output
Scatter Plot of the Rings Training Data
Notice that the target levels are not linearly separable.
PROC PRINT Report of the Training Data Fit Statistics
SPLIT Example: RINGS Data
Entropy Criterion
Fit Statistics for the Training Data
Train:
Train: Sum Frequency Train:
Train: Sum of Case of Frequency of Train:
of Weights Classified Unclassified Misclassification
Frequencies Times Freq Cases Cases Rate
180 540 180 0 0
Train: Train: Train: Root
Maximum Train: Sum Average Average Train: Train: Total
Absolute of Squared Squared Squared Divisor Degrees of
Error Errors Error Error for VASE Freedom
0 0 0 0 540 360
PROC FREQ Misclassification Table for the Training Data
All target cases are correctly classified by the tree.
SPLIT Example: RINGS Data
Entropy Criterion
Misclassification Table
TABLE OF F_C BY I_C
F_C(Formatted Target Value) I_C(Predicted Category)
Frequency |
Percent |
Row Pct |
Col Pct | 1| 2| 3| Total
| | | |
-------------+--------+--------+--------+
1 | 8 | 0 | 0 | 8
| 4.44 | 0.00 | 0.00 | 4.44
| 100.00 | 0.00 | 0.00 |
| 100.00 | 0.00 | 0.00 |
-------------+--------+--------+--------+
2 | 0 | 59 | 0 | 59
| 0.00 | 32.78 | 0.00 | 32.78
| 0.00 | 100.00 | 0.00 |
| 0.00 | 95.16 | 0.00 |
-------------+--------+--------+--------+
3 | 0 | 3 | 110 | 113
| 0.00 | 1.67 | 61.11 | 62.78
| 0.00 | 2.65 | 97.35 |
| 0.00 | 4.84 | 100.00 |
-------------+--------+--------+--------+
Total 8 62 110 180
4.44 34.44 61.11 100.00
PROC GPLOT of the Classification Results
PROC GCONTOUR of the Posterior Probabilities
Note that in each of the contour plots, the contour with the largest posterior probabilities captures the actual distribution of the
target level.
GPLOT of the Leaf Nodes
Copyright 2000 by SAS Institute Inc., Cary, NC, USA. All rights reserved.
Dostları ilə paylaş: |