Data Mining: Practical Machine Learning Tools and Techniques, Second Edition

Yüklə 4,3 Mb.

Pdf görüntüsü

səhifə	203/219
tarix	08.10.2017
ölçüsü	4,3 Mb.
	#3816

1 ... 199 200 201 202 203 204 205 206 ... 219

Figure 15.1

4 7 4

C H A P T E R 1 5

W R I T I N G N EW L E A R N I N G S C H E M E S

/**

* Builds Id3 decision tree classifier.

* @param data the training data

* @exception Exception if classifier can't be built successfully

public void buildClassifier(Instances data) throws Exception {

if (!data.classAttribute().isNominal()) {

throw new UnsupportedClassTypeException("Id3: nominal class, please.");

}

Enumeration enumAtt = data.enumerateAttributes();

while (enumAtt.hasMoreElements()) {

if (!((Attribute) enumAtt.nextElement()).isNominal()) {

throw new UnsupportedAttributeTypeException("Id3: only nominal " +

"attributes, please.");

}

Enumeration enum = data.enumerateInstances();

while (enum.hasMoreElements()) {

if (((Instance) enum.nextElement()).hasMissingValue()) {

throw new NoSupportForMissingValuesException("Id3: no missing values, "

+ "please.");

}

data = new Instances(data);

data.deleteWithMissingClass();

makeTree(data);

}

/**

* Method for building an Id3 tree.

* @param data the training data

* @exception Exception if decision tree can't be built successfully

private void makeTree(Instances data) throws Exception {

// Check if no instances have reached this node.

if (data.numInstances() == 0) {

m_Attribute = null;

Figure 15.1 (continued)

P088407-Ch015.qxd 4/30/05 10:58 AM Page 474

1 5 . 1

A N E X A M P L E C L A S S I F I E R

4 7 5

m_ClassValue = Instance.missingValue();

m_Distribution = new double[data.numClasses()];

return;

}

// Compute attribute with maximum information gain.

double[] infoGains = new double[data.numAttributes()];

Enumeration attEnum = data.enumerateAttributes();

while (attEnum.hasMoreElements()) {

Attribute att = (Attribute) attEnum.nextElement();

infoGains[att.index()] = computeInfoGain(data, att);

}

m_Attribute = data.attribute(Utils.maxIndex(infoGains));

// Make leaf if information gain is zero.

// Otherwise create successors.

if (Utils.eq(infoGains[m_Attribute.index()], 0)) {

m_Attribute = null;

m_Distribution = new double[data.numClasses()];

Enumeration instEnum = data.enumerateInstances();

while (instEnum.hasMoreElements()) {

Instance inst = (Instance) instEnum.nextElement();

m_Distribution[(int) inst.classValue()]++;

}

Utils.normalize(m_Distribution);

m_ClassValue = Utils.maxIndex(m_Distribution);

m_ClassAttribute = data.classAttribute();

} else {

Instances[] splitData = splitData(data, m_Attribute);

m_Successors = new Id3[m_Attribute.numValues()];

for (int j = 0; j < m_Attribute.numValues(); j++) {

m_Successors[j] = new Id3();

m_Successors[j].makeTree(splitData[j]);

}

/**

* Classifies a given test instance using the decision tree.

* @param instance the instance to be classified

Figure 15.1 (continued)

P088407-Ch015.qxd 4/30/05 10:58 AM Page 475

4 7 6

C H A P T E R 1 5

W R I T I N G N EW L E A R N I N G S C H E M E S

if (instance.hasMissingValue()) {

throw new NoSupportForMissingValuesException("Id3: no missing values, "

+ "please.");

}

if (m_Attribute == null) {

return m_ClassValue;

} else {

return m_Successors[(int) instance.value(m_Attribute)].

classifyInstance(instance);

}

/**

* Computes class distribution for instance using decision tree.

* @param instance the instance for which distribution is to be computed

* @return the class distribution for the given instance

public double[] distributionForInstance(Instance instance)

throws NoSupportForMissingValuesException {

if (instance.hasMissingValue()) {

throw new NoSupportForMissingValuesException("Id3: no missing values, "

+ "please.");

}

if (m_Attribute == null) {

return m_Distribution;

} else {

return m_Successors[(int) instance.value(m_Attribute)].

distributionForInstance(instance);

}

* @return the classification

public double classifyInstance(Instance instance)

throws NoSupportForMissingValuesException {

/**

* Prints the decision tree using the private toString method from below.

*

Figure 15.1 (continued)

P088407-Ch015.qxd 4/30/05 10:58 AM Page 476

1 5 . 1

A N E X A M P L E C L A S S I F I E R

4 7 7

* @return a textual description of the classifier

public String toString() {

if ((m_Distribution == null) && (m_Successors == null)) {

return "Id3: No model built yet.";

}

return "Id3\n\n" + toString(0);

}

/**

* Computes information gain for an attribute.

* @param data the data for which info gain is to be computed

* @param att the attribute

* @return the information gain for the given attribute and data

private double computeInfoGain(Instances data, Attribute att)

throws Exception {

double infoGain = computeEntropy(data);

Instances[] splitData = splitData(data, att);

for (int j = 0; j < att.numValues(); j++) {

if (splitData[j].numInstances() > 0) {

infoGain -= ((double) splitData[j].numInstances() /

(double) data.numInstances()) *

computeEntropy(splitData[j]);

}

return infoGain;

}

/**

* Computes the entropy of a dataset.

* @param data the data for which entropy is to be computed

* @return the entropy of the data's class distribution

private double computeEntropy(Instances data) throws Exception {

double [] classCounts = new double[data.numClasses()];

Figure 15.1 (continued)

P088407-Ch015.qxd 4/30/05 10:58 AM Page 477

Yüklə 4,3 Mb.

Dostları ilə paylaş:

1 ... 199 200 201 202 203 204 205 206 ... 219