Data Mining: Practical Machine Learning Tools and Techniques, Second Edition

Yüklə 4,3 Mb.
Pdf görüntüsü
ölçüsü4,3 Mb.
1   ...   196   197   198   199   200   201   202   203   ...   219

1 4 . 2

G O I N G   T H RO U G H   T H E   C O D E

4 6 3


 * Java program for classifying text messages into two classes.


import weka.core.Attribute;

import weka.core.Instance;

import weka.core.Instances;

import weka.core.FastVector;

import weka.core.Utils;

import weka.classifiers.Classifier;

import weka.classifiers.trees.J48;

import weka.filters.Filter;

import weka.filters.unsupervised.attribute.StringToWordVector;


public class MessageClassifier implements Serializable { 

  /* The training data gathered so far. */

  private Instances m_Data = null;

  /* The filter used to generate the word counts. */

  private StringToWordVector m_Filter = new StringToWordVector();

  /* The actual classifier. */

  private Classifier m_Classifier = new J48();

  /* Whether the model is up to date. */

  private boolean m_UpToDate;


   * Constructs empty training dataset.


  public MessageClassifier() throws Exception { 

    String nameOfDataset = "MessageClassificationProblem";

    // Create vector of attributes.

    FastVector attributes = new FastVector(2);

    // Add attribute for holding messages.

    attributes.addElement(new Attribute("Message", (FastVector)null));

Figure 14.1 Source code for the message classifier.

P088407-Ch014.qxd  4/30/05  11:04 AM  Page 463

4 6 4

C H A P T E R   1 4


E M B E D D E D   M AC H I N E   L E A R N I N G

    // Add class attribute.

    FastVector classValues = new FastVector(2);



    attributes.addElement(new Attribute("Class", classValues));

    // Create dataset with initial capacity of 100, and set index of class.

    m_Data = new Instances(nameOfDataset, attributes, 100);

    m_Data.setClassIndex(m_Data.numAttributes() - 1);



   * Updates data using the given training message.


  public void updateData(String message, String classValue) throws Exception { 

    // Make message into instance.

    Instance instance = makeInstance(message, m_Data);

    // Set class value for instance.


    // Add instance to training data.


    m_UpToDate = false;



   * Classifies a given message.


  public void classifyMessage(String message) throws Exception { 

    // Check whether classifier has been built.

    if (m_Data.numInstances() == 0) { 

      throw new Exception("No classifier available.");


    // Check whether classifier and filter are up to date.

    if (!m_UpToDate) { 

Figure 14.1 (continued)

P088407-Ch014.qxd  4/30/05  11:04 AM  Page 464

1 4 . 2

G O I N G   T H RO U G H   T H E   C O D E

4 6 5

      // Initialize filter and tell it about the input format.


      // Generate word counts from the training data.

      Instances filteredData  = Filter.useFilter(m_Data, m_Filter);

      // Rebuild classifier.


      m_UpToDate = true;


    // Make separate little test set so that message

    // does not get added to string attribute in m_Data.

    Instances testset = m_Data.stringFreeStructure();

    // Make message into test instance.

    Instance instance = makeInstance(message, testset);

    // Filter instance.


    Instance filteredInstance = m_Filter.output();

    // Get index of predicted class value.

    double predicted = m_Classifier.classifyInstance(filteredInstance);

    // Output class value.

    System.err.println("Message classified as : " + 




   * Method that converts a text message into an instance.


  private Instance makeInstance(String text, Instances data) { 

    // Create instance of length two.

    Instance instance = new Instance(2);

    // Set value for message attribute

    Attribute messageAtt = data.attribute("Message");

    instance.setValue(messageAtt, messageAtt.addStringValue(text));

Figure 14.1 (continued)

P088407-Ch014.qxd  4/30/05  11:04 AM  Page 465

4 6 6

C H A P T E R   1 4


E M B E D D E D   M AC H I N E   L E A R N I N G

    // Give instance access to attribute information from the dataset.


    return instance;



   * Main method.


  public static void main(String[] options) { 

    try { 

      // Read message file into string.

      String messageName = Utils.getOption('m', options);

      if (messageName.length() == 0) { 

        throw new Exception("Must provide name of message file.");


      FileReader m = new FileReader(messageName);

      StringBuffer message = new StringBuffer(); int l;

      while ((l = != -1) { 




      // Check if class value is given.

      String classValue = Utils.getOption('c', options);

      // If model file exists, read it, otherwise create new one.

      String modelName = Utils.getOption('o', options);

      if (modelName.length() == 0) { 

        throw new Exception("Must provide name of model file.");


      MessageClassifier messageCl;

      try { 

        ObjectInputStream modelInObjectFile = 

          new ObjectInputStream(new FileInputStream(modelName));

        messageCl = (MessageClassifier) modelInObjectFile.readObject();


      } catch (FileNotFoundException e) { 

        messageCl = new MessageClassifier();

Figure 14.1 (continued)

P088407-Ch014.qxd  4/30/05  11:04 AM  Page 466

Yüklə 4,3 Mb.

Dostları ilə paylaş:
1   ...   196   197   198   199   200   201   202   203   ...   219

Verilənlər bazası müəlliflik hüququ ilə müdafiə olunur © 2024
rəhbərliyinə müraciət

    Ana səhifə