Data Mining: Practical Machine Learning Tools and Techniques, Second Edition



Yüklə 4,3 Mb.
Pdf görüntüsü
səhifə203/219
tarix08.10.2017
ölçüsü4,3 Mb.
#3816
1   ...   199   200   201   202   203   204   205   206   ...   219

4 7 4

C H A P T E R   1 5

|

W R I T I N G   N EW   L E A R N I N G   S C H E M E S



  /**

   * Builds Id3 decision tree classifier.

   * 

   * @param data the training data



   * @exception Exception if classifier can't be built successfully

   */


  public void buildClassifier(Instances data) throws Exception { 

    if (!data.classAttribute().isNominal()) { 

      throw new UnsupportedClassTypeException("Id3: nominal class, please.");

    } 


    Enumeration enumAtt = data.enumerateAttributes();

    while (enumAtt.hasMoreElements()) { 

      if (!((Attribute) enumAtt.nextElement()).isNominal()) { 

        throw new UnsupportedAttributeTypeException("Id3: only nominal " + 

                                                    "attributes, please.");

      } 


    } 

    Enumeration enum = data.enumerateInstances();

    while (enum.hasMoreElements()) { 

      if (((Instance) enum.nextElement()).hasMissingValue()) { 

        throw new NoSupportForMissingValuesException("Id3: no missing values, " 

                                                     + "please.");

      } 

    } 


    data = new Instances(data);

    data.deleteWithMissingClass(); 

    makeTree(data);

  } 


  /**

   * Method for building an Id3 tree.

   * 

   * @param data the training data



   * @exception Exception if decision tree can't be built successfully

   */


  private void makeTree(Instances data) throws Exception { 

    // Check if no instances have reached this node.

    if (data.numInstances() == 0) { 

      m_Attribute = null;



Figure 15.1 (continued)

P088407-Ch015.qxd  4/30/05  10:58 AM  Page 474




1 5 . 1

A N   E X A M P L E   C L A S S I F I E R

4 7 5

      m_ClassValue = Instance.missingValue();



      m_Distribution = new double[data.numClasses()];

      return;

    } 

    // Compute attribute with maximum information gain.



    double[] infoGains = new double[data.numAttributes()];

    Enumeration attEnum = data.enumerateAttributes();

    while (attEnum.hasMoreElements()) { 

      Attribute att = (Attribute) attEnum.nextElement();

      infoGains[att.index()] = computeInfoGain(data, att);

    } 


    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero. 

    // Otherwise create successors.

    if (Utils.eq(infoGains[m_Attribute.index()], 0)) { 

      m_Attribute = null;

      m_Distribution = new double[data.numClasses()];

      Enumeration instEnum = data.enumerateInstances();

      while (instEnum.hasMoreElements()) { 

        Instance inst = (Instance) instEnum.nextElement();

        m_Distribution[(int) inst.classValue()]++;

      } 

      Utils.normalize(m_Distribution);

      m_ClassValue = Utils.maxIndex(m_Distribution);

      m_ClassAttribute = data.classAttribute();

    } else { 

      Instances[] splitData = splitData(data, m_Attribute);

      m_Successors = new Id3[m_Attribute.numValues()];

      for (int j = 0; j < m_Attribute.numValues(); j++) { 

        m_Successors[j] = new Id3();

        m_Successors[j].makeTree(splitData[j]);

      } 

    } 


  } 

  /**


   * Classifies a given test instance using the decision tree.

   * 


   * @param instance the instance to be classified

Figure 15.1 (continued)

P088407-Ch015.qxd  4/30/05  10:58 AM  Page 475




4 7 6

C H A P T E R   1 5

|

W R I T I N G   N EW   L E A R N I N G   S C H E M E S



    if (instance.hasMissingValue()) { 

      throw new NoSupportForMissingValuesException("Id3: no missing values, " 

                                                   + "please.");

    } 


    if (m_Attribute == null) { 

      return m_ClassValue;

    } else { 

      return m_Successors[(int) instance.value(m_Attribute)].

        classifyInstance(instance);

    } 


  } 

  /**


   * Computes class distribution for instance using decision tree.

   * 


   * @param instance the instance for which distribution is to be computed

   * @return the class distribution for the given instance

   */

  public double[] distributionForInstance(Instance instance) 



    throws NoSupportForMissingValuesException { 

    if (instance.hasMissingValue()) { 

      throw new NoSupportForMissingValuesException("Id3: no missing values, " 

                                                   + "please.");

    } 

    if (m_Attribute == null) { 



      return m_Distribution;

    } else { 

      return m_Successors[(int) instance.value(m_Attribute)].

        distributionForInstance(instance);

    } 

  } 


   * @return the classification

   */


  public double classifyInstance(Instance instance) 

    throws NoSupportForMissingValuesException { 

  /**

   * Prints the decision tree using the private toString method from below.



   * 

Figure 15.1 (continued)

P088407-Ch015.qxd  4/30/05  10:58 AM  Page 476




1 5 . 1

A N   E X A M P L E   C L A S S I F I E R

4 7 7

   * @return a textual description of the classifier



   */

  public String toString() { 

    if ((m_Distribution == null) && (m_Successors == null)) { 

      return "Id3: No model built yet.";

    } 

    return "Id3\n\n" + toString(0);



  } 

  /**


   * Computes information gain for an attribute.

   * 


   * @param data the data for which info gain is to be computed

   * @param att the attribute

   * @return the information gain for the given attribute and data

   */


  private double computeInfoGain(Instances data, Attribute att) 

    throws Exception { 

    double infoGain = computeEntropy(data);

    Instances[] splitData = splitData(data, att);

    for (int j = 0; j < att.numValues(); j++) { 

      if (splitData[j].numInstances() > 0) { 

        infoGain -= ((double) splitData[j].numInstances() / 

                     (double) data.numInstances()) * 

          computeEntropy(splitData[j]);

      } 


    } 

    return infoGain;

  } 

  /**


   * Computes the entropy of a dataset.

   * 


   * @param data the data for which entropy is to be computed

   * @return the entropy of the data's class distribution

   */

  private double computeEntropy(Instances data) throws Exception { 



    double [] classCounts = new double[data.numClasses()];

Figure 15.1 (continued)

P088407-Ch015.qxd  4/30/05  10:58 AM  Page 477




Yüklə 4,3 Mb.

Dostları ilə paylaş:
1   ...   199   200   201   202   203   204   205   206   ...   219




Verilənlər bazası müəlliflik hüququ ilə müdafiə olunur ©genderi.org 2024
rəhbərliyinə müraciət

    Ana səhifə