Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

SequentialWriteBuffer.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // SequentialWriteBuffer
00014 //
00015 // 15 November 2004 -- tds
00016 //
00017 
00018 #ifndef INDRI_SEQUENTIALWRITEBUFFER_HPP
00019 #define INDRI_SEQUENTIALWRITEBUFFER_HPP
00020 
00021 #include "indri/File.hpp"
00022 #include "indri/InternalFileBuffer.hpp"
00023 namespace indri
00024 {
00025   namespace file
00026   {
00043     class SequentialWriteBuffer {
00044     private:
00045       File& _file;
00046       InternalFileBuffer _current;
00047       UINT64 _position;
00048       UINT64 _eof;
00049 
00050     public:
00058       SequentialWriteBuffer( File& file, size_t length ) :
00059         _file(file),
00060         _current(length),
00061         _position(0),
00062         _eof(0)
00063       {
00064         _eof = _file.size();
00065         _position = _eof;
00066       }
00067 
00072       void seek( UINT64 position ) {
00073         // this only resets the file pointer; notice that data may not be written to
00074         // disk until write is called
00075         _position = position;
00076       }
00077 
00088       char* write( size_t length ) {
00089         UINT64 endBuffer = _current.filePosition + _current.buffer.size();
00090         UINT64 endBufferData = _current.filePosition + _current.buffer.position();
00091         UINT64 endWrite = length + _position;
00092         UINT64 startWrite = _position;
00093         UINT64 startBuffer = _current.filePosition;
00094         char* writeSpot;
00095 
00096         // if this write starts before the buffered data does, we have to flush
00097         bool writeStartsBeforeBuffer = startBuffer > startWrite;
00098         // if this write ends after the end of our buffer, we need to flush the buffer
00099         // to make room for this data.
00100         bool writeEndsAfterBuffer = endBuffer < endWrite;
00101         // if this write creates a "gap" in the buffer, we have to write data.
00102 
00103         // here's an example.  Suppose we have already written 1MB of 0's to 
00104         // the file.  Then, we seek to the beginning of the file and write 
00105         // 20 bytes of 1's.  Then, we seek forward to 100 bytes and try to write
00106         // more 0's.  There is enough space in the buffer for that write to 
00107         // succeed, but that would leave an 80 byte gap in the buffer.
00108         // when the buffer is flushed to disk, that 80 byte gap would clobber
00109         // the 0's that are already on disk.
00110         // Notice that there's no problem if the 'gap' is after the current end of
00111         // the file, because that space is undefined anyway.
00112         bool dataGap = (endBufferData < _eof && startWrite > endBufferData);
00113 
00114         if( writeStartsBeforeBuffer || writeEndsAfterBuffer || dataGap ) {
00115           flush();
00116           _current.filePosition = _position;
00117 
00118           startBuffer = _current.filePosition;
00119           endBuffer = _current.filePosition + _current.buffer.size();
00120           endBufferData = _current.filePosition + _current.buffer.position();
00121         }
00122 
00123         // There's a possibility that there isn't enough room to buffer this write,
00124         // even though we cleared it out.  In that case, make it bigger.
00125         if( endWrite > endBufferData ) {
00126           // need to move the buffer pointer to the end, potentially resizing buffer
00127           _current.buffer.write( size_t(endWrite - endBufferData) );
00128           endBufferData = _current.filePosition + _current.buffer.position();
00129         }
00130 
00131         assert( endWrite <= endBufferData && startWrite >= startBuffer );
00132         writeSpot = _current.buffer.front() + (_position - _current.filePosition);
00133         assert( writeSpot + length <= _current.buffer.front() + _current.buffer.position() );
00134         _position += length;
00135 
00136         return writeSpot;
00137       }
00138 
00146       void write( const void* buffer, size_t length ) {
00147         memcpy( write( length ), buffer, length );
00148       }
00149   
00157       void unwrite( size_t length ) {
00158         assert( length <= _current.buffer.position() );
00159         _current.buffer.unwrite( length );
00160         _position -= length;
00161       }
00162 
00167       UINT64 tell() const {
00168         return _position;
00169       }
00170 
00175       void flush() {
00176         size_t bytes = _current.buffer.position();
00177         // write current buffered data out to the file
00178         _file.write( _current.buffer.front(), _current.filePosition, _current.buffer.position() );
00179         // clear out the data in the buffer
00180         _current.buffer.clear();
00181         _current.filePosition += bytes;
00182         // update the end of file marker if necessary
00183         _eof = lemur_compat::max( _current.filePosition, _eof );
00184       }
00185 
00186       void flushRegion( UINT64 start, UINT64 length ) {
00187         if( (start+length) >= _current.filePosition &&
00188             start <= _current.filePosition + _current.buffer.position() )
00189         {
00190           flush();
00191         }
00192       }
00193     };
00194   }
00195 }
00196 
00197 #endif // INDRI_SEQUENTIALWRITEBUFFER_HPP
00198 

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4