4 7 4
C H A P T E R 1 5
|
W R I T I N G N EW L E A R N I N G S C H E M E S
/**
* Builds Id3 decision tree classifier.
*
* @param data the training data
* @exception Exception if classifier can't be built successfully
*/
public void buildClassifier(Instances data) throws Exception {
if (!data.classAttribute().isNominal()) {
throw new UnsupportedClassTypeException("Id3: nominal class, please.");
}
Enumeration enumAtt = data.enumerateAttributes();
while (enumAtt.hasMoreElements()) {
if (!((Attribute) enumAtt.nextElement()).isNominal()) {
throw new UnsupportedAttributeTypeException("Id3: only nominal " +
"attributes, please.");
}
}
Enumeration enum = data.enumerateInstances();
while (enum.hasMoreElements()) {
if (((Instance) enum.nextElement()).hasMissingValue()) {
throw new NoSupportForMissingValuesException("Id3: no missing values, "
+ "please.");
}
}
data = new Instances(data);
data.deleteWithMissingClass();
makeTree(data);
}
/**
* Method for building an Id3 tree.
*
* @param data the training data
* @exception Exception if decision tree can't be built successfully
*/
private void makeTree(Instances data) throws Exception {
// Check if no instances have reached this node.
if (data.numInstances() == 0) {
m_Attribute = null;
Figure 15.1 (continued)
P088407-Ch015.qxd 4/30/05 10:58 AM Page 474
1 5 . 1
A N E X A M P L E C L A S S I F I E R
4 7 5
m_ClassValue = Instance.missingValue();
m_Distribution = new double[data.numClasses()];
return;
}
// Compute attribute with maximum information gain.
double[] infoGains = new double[data.numAttributes()];
Enumeration attEnum = data.enumerateAttributes();
while (attEnum.hasMoreElements()) {
Attribute att = (Attribute) attEnum.nextElement();
infoGains[att.index()] = computeInfoGain(data, att);
}
m_Attribute = data.attribute(Utils.maxIndex(infoGains));
// Make leaf if information gain is zero.
// Otherwise create successors.
if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
m_Attribute = null;
m_Distribution = new double[data.numClasses()];
Enumeration instEnum = data.enumerateInstances();
while (instEnum.hasMoreElements()) {
Instance inst = (Instance) instEnum.nextElement();
m_Distribution[(int) inst.classValue()]++;
}
Utils.normalize(m_Distribution);
m_ClassValue = Utils.maxIndex(m_Distribution);
m_ClassAttribute = data.classAttribute();
} else {
Instances[] splitData = splitData(data, m_Attribute);
m_Successors = new Id3[m_Attribute.numValues()];
for (int j = 0; j < m_Attribute.numValues(); j++) {
m_Successors[j] = new Id3();
m_Successors[j].makeTree(splitData[j]);
}
}
}
/**
* Classifies a given test instance using the decision tree.
*
* @param instance the instance to be classified
Figure 15.1 (continued)
P088407-Ch015.qxd 4/30/05 10:58 AM Page 475
4 7 6
C H A P T E R 1 5
|
W R I T I N G N EW L E A R N I N G S C H E M E S
if (instance.hasMissingValue()) {
throw new NoSupportForMissingValuesException("Id3: no missing values, "
+ "please.");
}
if (m_Attribute == null) {
return m_ClassValue;
} else {
return m_Successors[(int) instance.value(m_Attribute)].
classifyInstance(instance);
}
}
/**
* Computes class distribution for instance using decision tree.
*
* @param instance the instance for which distribution is to be computed
* @return the class distribution for the given instance
*/
public double[] distributionForInstance(Instance instance)
throws NoSupportForMissingValuesException {
if (instance.hasMissingValue()) {
throw new NoSupportForMissingValuesException("Id3: no missing values, "
+ "please.");
}
if (m_Attribute == null) {
return m_Distribution;
} else {
return m_Successors[(int) instance.value(m_Attribute)].
distributionForInstance(instance);
}
}
* @return the classification
*/
public double classifyInstance(Instance instance)
throws NoSupportForMissingValuesException {
/**
* Prints the decision tree using the private toString method from below.
*
Figure 15.1 (continued)
P088407-Ch015.qxd 4/30/05 10:58 AM Page 476
1 5 . 1
A N E X A M P L E C L A S S I F I E R
4 7 7
* @return a textual description of the classifier
*/
public String toString() {
if ((m_Distribution == null) && (m_Successors == null)) {
return "Id3: No model built yet.";
}
return "Id3\n\n" + toString(0);
}
/**
* Computes information gain for an attribute.
*
* @param data the data for which info gain is to be computed
* @param att the attribute
* @return the information gain for the given attribute and data
*/
private double computeInfoGain(Instances data, Attribute att)
throws Exception {
double infoGain = computeEntropy(data);
Instances[] splitData = splitData(data, att);
for (int j = 0; j < att.numValues(); j++) {
if (splitData[j].numInstances() > 0) {
infoGain -= ((double) splitData[j].numInstances() /
(double) data.numInstances()) *
computeEntropy(splitData[j]);
}
}
return infoGain;
}
/**
* Computes the entropy of a dataset.
*
* @param data the data for which entropy is to be computed
* @return the entropy of the data's class distribution
*/
private double computeEntropy(Instances data) throws Exception {
double [] classCounts = new double[data.numClasses()];
Figure 15.1 (continued)
P088407-Ch015.qxd 4/30/05 10:58 AM Page 477
Dostları ilə paylaş: |