Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

XMLNode.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 //
00014 // XMLNode
00015 //
00016 // 8 October 2003 - tds
00017 //
00018 
00019 #ifndef MONITOR_XMLNODE_H
00020 #define MONITOR_XMLNODE_H
00021 
00022 #include <string>
00023 #include <vector>
00024 #include <map>
00025 #include <sstream>
00026 #include <assert.h>
00027 #include <iomanip>
00028 #include <iostream>
00029 
00030 #include "indri/indri-platform.h"
00031 #include "lemur-compat.hpp"
00032 namespace indri
00033 {
00035   namespace xml
00036   {
00037     
00040     class XMLNode {
00041     public:
00042       typedef std::map<std::string,std::string> MAttributes;
00043   
00044     private:
00045       std::string _name;
00046       MAttributes _attributes;
00047       std::vector<XMLNode*> _children;
00048       std::string _value;
00049 
00050     public:
00053       XMLNode( const std::string& name );
00057       XMLNode( const std::string& name, const std::string& value );
00061       XMLNode( const std::string& name, const MAttributes& attributes );
00066       XMLNode( const std::string& name, const MAttributes& attributes, const std::string& value );
00068       ~XMLNode();
00071       void addChild( XMLNode* child );
00075       void addAttribute( const std::string& key, const std::string& value );
00078       void setValue( const std::string& value );
00080       const std::string& getName() const;
00082       const std::string& getValue() const;
00084       const MAttributes& getAttributes() const;
00088       std::string getAttribute( const std::string& name ) const;
00091       const std::vector<XMLNode*>& getChildren() const;
00095       const XMLNode* getChild( const std::string& name ) const;  
00099       std::string getChildValue( const std::string& name ) const; 
00100     };
00101   }
00102 }
00103 
00105 inline std::string i64_to_string( INT64 value ) {
00106   std::stringstream number;
00107 
00108   if( value > 1000000000 ) {
00109     number << (value/1000000000) << std::setw(9) << std::setfill('0') << (value%1000000000);
00110   } else {
00111     number << value;
00112   }
00113 
00114   return number.str();
00115 }
00116 
00118 inline INT64 string_to_i64( const std::string& str ) {
00119   INT64 result = 0;
00120   INT64 negative = 1;
00121   unsigned int i = 0;
00122 
00123   if( str.length() > 0 && str[0] == '-' ) {
00124     negative = -1;
00125     i = 1;
00126   }
00127 
00128   for( ; i<str.length(); i++ ) {
00129     result = result * 10 + (str[i] - '0');
00130   }
00131 
00132   return result * negative;
00133 }
00134 
00136 inline int string_to_int( const std::string& str ) {
00137   return (int) string_to_i64( str );
00138 }
00139 
00144 inline std::string base64_encode( const void* input, int length ) {
00145   static unsigned char lookup[] = {
00146     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
00147     'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
00148     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
00149     'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 
00150     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
00151   };
00152 
00153   const unsigned char* in = (const unsigned char*) input;
00154   std::string result;
00155   unsigned int value;
00156   unsigned int mainLength;
00157 
00158   // mainlength is the total length of contiguous 3-byte chunks
00159   if( length%3 ) {
00160     mainLength = length - length%3;
00161   } else {
00162     mainLength = length;
00163   }
00164 
00165   // reserve enough string space to hold the result
00166   result.reserve( (length/2+1)*3 );
00167 
00168   // main loop encodes each group of 3 8-bit chars as
00169   // 4 6-bit chars
00170   for( unsigned int i=0; i<mainLength; i+=3 ) {
00171     value = (in[i+0] & 0xff) << 16 |
00172       (in[i+1] & 0xff) <<  8 |
00173       (in[i+2] & 0xff);
00174 
00175     unsigned char fourth = lookup[value & 0x3f];
00176     value >>= 6;
00177     unsigned char third = lookup[value & 0x3f];
00178     value >>= 6;
00179     unsigned char second = lookup[value & 0x3f];
00180     value >>= 6;
00181     unsigned char first = lookup[value & 0x3f];
00182 
00183     result.push_back( first );
00184     result.push_back( second );
00185     result.push_back( third );
00186     result.push_back( fourth );
00187   }
00188 
00189   if( mainLength != length ) {
00190     value = 0;
00191     int remaining = length - mainLength;
00192 
00193     {
00194       // build a value based on the characters we 
00195       // have left
00196       unsigned char first = 0;
00197       unsigned char second = 0;
00198       unsigned char third = 0;
00199 
00200       if( remaining >= 1 )
00201         first = in[mainLength+0];
00202       if( remaining >= 2 )
00203         second = in[mainLength+1];
00204       if( remaining >= 3 )
00205         third = in[mainLength+2];
00206 
00207       value = first << 16 |
00208         second << 8 |
00209         third;
00210     }
00211 
00212     {
00213       // encode them
00214       unsigned char fourth = '=';
00215       unsigned char third = '=';
00216       unsigned char second = '=';
00217       unsigned char first = '=';
00218 
00219       if( remaining >= 3 )
00220         fourth = lookup[value & 0x3f];
00221       value >>= 6;
00222       if( remaining >= 2 )
00223         third = lookup[value & 0x3f];
00224       value >>= 6;
00225       if( remaining >= 1 )
00226         second = lookup[value & 0x3f];
00227       value >>= 6;
00228       first = lookup[value & 0x3f];
00229 
00230       result.push_back( first );
00231       result.push_back( second );
00232       result.push_back( third );
00233       result.push_back( fourth );
00234     }
00235   }
00236 
00237   return result;
00238 }
00239 
00245 inline int base64_decode( void* output, int outputLength, const std::string& input ) {
00246   assert( (input.size() % 4) == 0 );
00247 
00248   // encoding table built with a python script to match the encoding proposed in rfc1521
00249   static char lookup[] = {
00250     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00251     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00252     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   62,   -1,   -1,   -1,   63,
00253     52,   53,   54,   55,   56,   57,   58,   59,   60,   61,   -1,   -1,   -1,    0,   -1,   -1,
00254     -1,    0,    1,    2,    3,    4,    5,    6,    7,    8,    9,   10,   11,   12,   13,   14,
00255     15,   16,   17,   18,   19,   20,   21,   22,   23,   24,   25,   -1,   -1,   -1,   -1,   -1,
00256     -1,   26,   27,   28,   29,   30,   31,   32,   33,   34,   35,   36,   37,   38,   39,   40,
00257     41,   42,   43,   44,   45,   46,   47,   48,   49,   50,   51,   -1,   -1,   -1,   -1,   -1,
00258     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00259     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00260     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00261     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00262     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00263     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00264     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00265     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1
00266   };
00267 
00268   char* out = (char*) output;
00269   int trueOutputLength = 0;
00270 
00271   for( size_t i=0; i<input.size(); i+=4 ) {
00272     // decode 4 byte chunks
00273     unsigned char first = input[i];
00274     unsigned char second = input[i+1];
00275     unsigned char third = input[i+2];
00276     unsigned char fourth = input[i+3];
00277 
00278     unsigned int value;
00279 
00280     value = lookup[first] << 18 |
00281       lookup[second] << 12 |
00282       lookup[third] << 6 |
00283       lookup[fourth];
00284 
00285     if( fourth == '=' ) {
00286       // this chunk ends in padding, so handle it in a special way
00287       if( third == '=' ) {
00288         // only one additional byte
00289         out[trueOutputLength]   = (value >> 16) & 0xff;
00290         trueOutputLength++;
00291       } else {
00292         // two additional bytes
00293         out[trueOutputLength]   = (value >> 16) & 0xff;
00294         out[trueOutputLength+1] = (value >> 8) & 0xff;
00295         trueOutputLength+=2;
00296       }
00297     } else {
00298       out[trueOutputLength]   = (value >> 16) & 0xff;
00299       out[trueOutputLength+1] = (value >> 8) & 0xff;
00300       out[trueOutputLength+2] = (value) & 0xff;
00301 
00302       trueOutputLength += 3;
00303     }
00304   }
00305 
00306   assert( trueOutputLength <= outputLength );
00307   return trueOutputLength;
00308 }
00309 
00313 inline void base64_decode_string( std::string& out, const std::string& in ) {
00314   char* buf = new char[in.size()+1];
00315   size_t outLength = base64_decode( buf, (int)in.size()+5, in );
00316   buf[outLength] = 0;
00317   out = buf;
00318   delete[] buf;
00319 }
00320 
00321 #endif // MONITOR_XMLNODE_H
00322 

Generated on Tue Jun 15 11:02:56 2010 for Lemur by doxygen 1.3.4