/* * CRFsuite C++/SWIG API. * * Copyright (c) 2007-2010, Naoaki Okazaki * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the names of the authors nor the names of its contributors * may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __CRFSUITE_API_HPP__ #define __CRFSUITE_API_HPP__ #include #include #include #include #ifndef __CRFSUITE_H__ #ifdef __cplusplus extern "C" { #endif/*__cplusplus*/ struct tag_crfsuite_model; typedef struct tag_crfsuite_model crfsuite_model_t; struct tag_crfsuite_data; typedef struct tag_crfsuite_data crfsuite_data_t; struct tag_crfsuite_trainer; typedef struct tag_crfsuite_trainer crfsuite_trainer_t; struct tag_crfsuite_tagger; typedef struct tag_crfsuite_tagger crfsuite_tagger_t; struct tag_crfsuite_dictionary; typedef struct tag_crfsuite_dictionary crfsuite_dictionary_t; struct tag_crfsuite_params; typedef struct tag_crfsuite_params crfsuite_params_t; #ifdef __cplusplus } #endif/*__cplusplus*/ #endif/*__CRFSUITE_H__*/ /** \page crfsuite_hpp_api CRFSuite C++/SWIG API @section crfsuite_hpp_api_intro Introduction The CRFSuite C++/SWIG API provides a high-level and easy-to-use library module for a number of programming languages. The C++/SWIG API is a wrapper for the CRFSuite C API. - @link crfsuite_hpp_api_doc API documentation @endlink @section crfsuite_hpp_api_cpp C++ API The C++ library is implemented in two header files, crfsuite_api.hpp and crfsuite.hpp. One can use the C++ API only by including crfsuite.hpp. The C++ library has a dependency to the CRFSuite C library, which means that the C header file (crfsuite.h) and libcrfsuite library are necessary. @section crfsuite_hpp_api_swig SWIG API The SWIG API is identical to the C++ API. Currently, the CRFsuite distribution includes a Python module for CRFsuite. Please read README under swig/python directory for the information to build the Python module. @subsection crfsuite_hpp_api_sample Sample code This code demonstrates how to use the crfsuite.Trainer object. The script reads a training data from STDIN, trains a model using 'l2sgd' algorithm, and stores the model to a file (the first argument of the commend line). @include swig/python/sample_train.py This code demonstrates how to use the crfsuite.Tagger object. The script loads a model from a file (the first argument of the commend line), reads a data from STDIN, predicts label sequences. @include swig/python/sample_tag.py */ namespace CRFSuite { /** * \addtogroup crfsuite_hpp_api_doc Data structures * @{ */ /** * Tuple of attribute and its value. */ class Attribute { public: /// Attribute. std::string attr; /// Attribute value (weight). double value; /** * Construct an attribute with the default name and value. */ Attribute() : value(1.) { } /** * Construct an attribute with the default value. * @param name The attribute name. */ Attribute(const std::string& name) : attr(name), value(1.) { } /** * Construct an attribute. * @param name The attribute name. * @param val The attribute value. */ Attribute(const std::string& name, double val) : attr(name), value(val) { } }; /** * Type of an item (equivalent to an attribute vector) in a sequence. */ typedef std::vector Item; /** * Type of an item sequence (equivalent to item vector). */ typedef std::vector ItemSequence; /** * Type of a string list. */ typedef std::vector StringList; /** * The trainer class. * This class maintains a data set for training, and provides an interface * to various graphical models and training algorithms. The standard * procedure for implementing a trainer is: * - create a class by inheriting this class * - overwrite message() function to receive messages of training progress * - call append() to append item/label sequences to the training set * - call select() to specify a graphical model and an algorithm * - call set() to configure parameters specific to the model and algorithm * - call train() to start a training process with the current setting */ class Trainer { protected: crfsuite_data_t *data; crfsuite_trainer_t *tr; public: /** * Construct a trainer. */ Trainer(); /** * Destruct a trainer. */ virtual ~Trainer(); /** * Remove all instances in the data set. */ void clear(); /** * Append an instance (item/label sequence) to the data set. * @param xseq The item sequence of the instance. * @param yseq The label sequence of the instance. The number * of elements in yseq must be identical to that * in xseq. * @param group The group number of the instance. * @throw std::invalid_argument Arguments xseq and yseq are invalid. * @throw std::runtime_error Out of memory. */ void append(const ItemSequence& xseq, const StringList& yseq, int group); /** * Initialize the training algorithm. * @param algorithm The name of the training algorithm. * @param type The name of the graphical model. * @return bool \c true if the training algorithm is successfully * initialized, \c false otherwise. */ bool select(const std::string& algorithm, const std::string& type); /** * Run the training algorithm. * This function starts the training algorithm with the data set given * by append() function. After starting the training process, the * training algorithm invokes the virtual function message() to report * the progress of the training process. * @param model The filename to which the trained model is stored. * If this value is empty, this function does not * write out a model file. * @param holdout The group number of holdout evaluation. The * instances with this group number will not be used * for training, but for holdout evaluation. Specify * \c -1 to use all instances for training. * @return int The status code. */ int train(const std::string& model, int holdout); /** * Obtain the list of parameters. * This function returns the list of parameter names available for the * graphical model and training algorithm specified by select() function. * @return StringList The list of parameters available for the current * graphical model and training algorithm. */ StringList params(); /** * Set a training parameter. * This function sets a parameter value for the graphical model and * training algorithm specified by select() function. * @param name The parameter name. * @param value The value of the parameter. * @throw std::invalid_argument The parameter is not found. */ void set(const std::string& name, const std::string& value); /** * Get the value of a training parameter. * This function gets a parameter value for the graphical model and * training algorithm specified by select() function. * @param name The parameter name. * @return std::string The value of the parameter. * @throw std::invalid_argument The parameter is not found. */ std::string get(const std::string& name); /** * Get the description of a training parameter. * This function obtains the help message for the parameter specified * by the name. The graphical model and training algorithm must be * selected by select() function before calling this function. * @param name The parameter name. * @return std::string The description (help message) of the parameter. */ std::string help(const std::string& name); /** * Receive messages from the training algorithm. * Override this member function to receive messages of the training * process. * @param msg The message */ virtual void message(const std::string& msg); protected: void init(); static int __logging_callback(void *userdata, const char *format, va_list args); }; /** * The tagger class. * This class provides the functionality for predicting label sequences for * input sequences using a model. */ class Tagger { protected: crfsuite_model_t *model; crfsuite_tagger_t *tagger; public: /** * Construct a tagger. */ Tagger(); /** * Destruct a tagger. */ virtual ~Tagger(); /** * Open a model file. * @param name The file name of the model file. * @return bool \c true if the model file is successfully opened, * \c false otherwise (e.g., when the model file is * not found). * @throw std::runtime_error An internal error in the model. */ bool open(const std::string& name); /** * Open a model from memory. * @param data A pointer to the model data. * Must be 16-byte aligned. * @param size A size (in bytes) of the model data. * @return bool \c true if the model file is successfully opened, * \c false otherwise (e.g., when the mode file is * not found). * @throw std::runtime_error An internal error in the model. */ bool open(const void* data, std::size_t size); /** * Close the model. */ void close(); /** * Obtain the list of labels. * @return StringList The list of labels in the model. * @throw std::invalid_argument A model is not opened. * @throw std::runtime_error An internal error. */ StringList labels(); /** * Predict the label sequence for the item sequence. * This function calls set() and viterbi() functions to obtain the * label sequence predicted for the item sequence. * @param xseq The item sequence to be tagged. * @return StringList The label sequence predicted. * @throw std::invalid_argument A model is not opened. * @throw std::runtime_error An internal error. */ StringList tag(const ItemSequence& xseq); /** * Set an item sequence. * This function sets an item sequence for future calls for * viterbi(), probability(), and marginal() functions. * @param xseq The item sequence to be tagged * @throw std::invalid_argument A model is not opened. * @throw std::runtime_error An internal error. */ void set(const ItemSequence& xseq); /** * Find the Viterbi label sequence for the item sequence. * @return StringList The label sequence predicted. * @throw std::invalid_argument A model is not opened. * @throw std::runtime_error An internal error. */ StringList viterbi(); /** * Compute the probability of the label sequence. * @param yseq The label sequence. * @throw std::invalid_argument A model is not opened. * @throw std::runtime_error An internal error. */ double probability(const StringList& yseq); /** * Compute the marginal probability of the label. * @param y The label. * @param t The position of the label. * @throw std::invalid_argument A model is not opened. * @throw std::runtime_error An internal error. */ double marginal(const std::string& y, const int t); }; /** * Obtain the version number of the library. * @return std::string The version string. */ std::string version(); /**@} */ } #endif/*__CRFSUITE_API_HPP__*/