/vesta/vestasys.org/basics/basics/36/src/TextCommon.C

Go to the documentation of this file.
00001 // Copyright (C) 2001, Compaq Computer Corporation
00002 
00003 // This file is part of Vesta.
00004 
00005 // Vesta is free software; you can redistribute it and/or
00006 // modify it under the terms of the GNU Lesser General Public
00007 // License as published by the Free Software Foundation; either
00008 // version 2.1 of the License, or (at your option) any later version.
00009 
00010 // Vesta is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013 // Lesser General Public License for more details.
00014 
00015 // You should have received a copy of the GNU Lesser General Public
00016 // License along with Vesta; if not, write to the Free Software
00017 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00018 
00019 
00020 // Code shared by the "Text.C" and "TextGC.C" implementations. This file
00021 // is #included by both those files.
00022 
00023 #include "Basics.H"
00024 #include "Text.H"
00025 
00026 // The empty string to which the Text() constructor initializes new Text's
00027 static const char* const EmptyStr = "";
00028 
00029 static const char *StrCopy(const char *str) throw ()
00030 // Returns a newly allocated copy of the null-terminated string "str".
00031 {
00032     int len = strlen(str) + 1;  // include +1 for null terminator
00033     char *res = NEW_PTRFREE_ARRAY(char, len);
00034     memcpy(res, str, len);
00035     return res;
00036 }
00037 
00038 static const char *StrCopy(const char *s, int len) throw ()
00039 /* Returns a newly-allocated, null-terminated copy of the "len" chars pointed
00040    to by "s"; it is an unchecked run-time error for any of those bytes to
00041    be the null character. */
00042 {
00043     char *res = NEW_PTRFREE_ARRAY(char, len+1);
00044     memcpy(res, s, len);
00045     res[len] = '\0';
00046     return res;
00047 }
00048 
00049 // constructors
00050 Text::Text() throw ()
00051 {
00052     this->s = EmptyStr;
00053 }
00054 
00055 Text::Text(const char *str, void *copy) throw ()
00056 {
00057     this->s = (copy == (void *)NULL) ? StrCopy(str) : str;
00058 }
00059 
00060 Text::Text(const char *bytes, int len) throw ()
00061 {
00062     this->s = StrCopy(bytes, len);
00063 }
00064 
00065 Text Text::Sub(int start, int len) const throw ()
00066 {
00067     int tLen = Length();
00068     Text res;
00069     if (start >= tLen || len == 0) {
00070         res.s = EmptyStr;
00071     } else {
00072         if ((unsigned int)start + (unsigned int)len > tLen) {
00073             // In this case, we can include the null terminator in the copy
00074             len = tLen - start + 1; // include +1 for null terminator
00075             char *temp = NEW_PTRFREE_ARRAY(char, len);
00076             memcpy(temp, this->s + start, len);
00077             res.s = temp;
00078         } else {
00079             // include +1 for null terminator
00080             char *temp = NEW_PTRFREE_ARRAY(char, len+1);
00081             memcpy(temp, this->s + start, len);
00082             temp[len] = '\0';
00083             res.s = temp;
00084         }
00085     }
00086     return res;
00087 }
00088 
00089 int Text::FindChar(char c, int start) const throw ()
00090 {
00091     int tLen = Length();
00092     for (register int i = max(start, 0); i < tLen; i++) {
00093         if (this->s[i] == c) return i;
00094     }
00095     return -1;
00096 }
00097 
00098 int Text::FindCharR(char c, int start) const throw ()
00099 {
00100     int tLen = Length();
00101     for (register int i = min(start, tLen - 1); i >= 0; i--) {
00102         if (this->s[i] == c) return i;
00103     }
00104     return -1;
00105 }
00106 
00107 int Text::FindText(const Text &substr, int start) const throw ()
00108 {
00109     start = max(start, 0);
00110     if (start + substr.Length() > this->Length()) return -1;
00111     char *match = strstr(this->s + start, substr.s);
00112     if (match != (char *)NULL) return (match - this->s);
00113     return -1;
00114 }
00115 
00116 const int
00117   N = sizeof(Word),             // bytes per Word
00118   ByteBits = 8,                 // bits per byte
00119   WordBits = N * 8;             // bits per Word
00120 
00121 Word RotateWord(Word w, int b) throw ()
00122 // Returns the result of rotating "w" up by "b" bits (or down by -b
00123 // bits if b is negative).
00124 {
00125     b = (b % WordBits);
00126 
00127     // We know that on big-endian systems this function will only be
00128     // called with negative values and on little-endian systems it
00129     // will only be called with positive values, so we optimize away
00130     // the unused clause.
00131 #if BYTE_ORDER == BIG_ENDIAN
00132     if (b < 0)
00133       {
00134         b = -b;
00135         Word hiMask = (((Word) -1) << b);
00136         Word loMask = (((Word) -1) ^ hiMask);
00137         w = ((w & loMask) << (WordBits - b)) | ((w & hiMask) >> b);
00138       }
00139 #else
00140     // else
00141     if (b > 0)
00142       {
00143         Word hiMask = (((Word) -1) << (WordBits - b));
00144         Word loMask = (((Word) -1) ^ hiMask);
00145         w = ((w & loMask) << b) | ((w & hiMask) >> (WordBits - b));
00146       }
00147 #endif
00148 
00149     return w;
00150 }
00151 
00152 const int
00153   Up1 = ByteBits,               // bits per byte
00154   LgUp1 = 3;                    // log_2(Up1)
00155 
00156 Word Text::Hash() const throw ()
00157   // This function is based on the implementation in the file
00158   // "UnsafeHash.m3" that is part of the SRC Modula-3 distribution.
00159   // That file has a more complete description of the method.
00160 
00161   // Conceptually, this can be thought of as a faster version of this:
00162 
00163   //    Word res = 0;
00164   //    unsinged int N = sizeof(Word);
00165   //    char *resc = ((char *) res);
00166   //    for(unsigned int i = 0; i < strlen(this->s); i++)
00167   //    {
00168   //      res_c[i % N] = res_c[i % N] ^ this->s[i];
00169   //    }
00170   //    res += strlen(this->s);
00171   //    return res;
00172 
00173   // Although the result value of the code below will not be identical
00174   // (as it's affected by the byte-order of the host processor).
00175 
00176   // (A web search for "UnsafeHash.m3" should turn up the original.  I
00177   // considered simply reproducing the original description/proof
00178   // here, but that would undoubtedly cause license/copyright
00179   // issues. --KCS)
00180 {
00181     const PointerInt modNmask = (Word)N - 1UL;
00182     Word temp = 0UL;
00183     const char *p = this->s;
00184     Word m = (Word)strlen(p);
00185     const char *endp = p + m;
00186 
00187 #if defined(VALGRIND_SUPPORT)
00188     // Lower performance but completely safe, even as far as paranoid
00189     // run-time checkers like valgrind are concerned.
00190     char *temp_c = ((char *) &temp);
00191     for(unsigned int i = 0; i < m; i++)
00192       {
00193         temp_c[i % sizeof(temp)] ^= this->s[i];
00194       }
00195     return m + temp;
00196 #else
00197     // process at most N-1 initial chars if "p" is not word-aligned
00198     PointerInt jpre = (PointerInt)p & modNmask, jpost;
00199     if ((jpre != 0) && (p != endp))
00200       {
00201         // jpost is the number of junk bytes in the first word after
00202         // the non-junk bytes.  (This is only non-zero for very short
00203         // strings.)
00204         jpost = max(0, N - jpre - m);
00205 
00206         // Get the Word-aligned memory that contains the start of the
00207         // string (and jpre preceeding junk bytes).
00208         temp = *((Word *)(p-jpre));
00209 
00210         // Shift temp to remove preceeding junk bytes and following
00211         // junk bytes (to handle the case of very short strings where
00212         // jpost > 0).
00213 #if BYTE_ORDER == BIG_ENDIAN
00214         temp <<= (jpre << LgUp1);
00215         temp >>= ((jpost + jpre) << LgUp1);
00216 #else
00217         temp >>= (jpre << LgUp1);
00218         temp <<= ((jpost + jpre) << LgUp1);
00219 #endif
00220 
00221         // Skip forward to the next word-aligned chunk of the string
00222         // or the end of the string, whichever comes first.
00223         p += (N - jpre - jpost);
00224       }
00225 
00226     // process middle words
00227     while (p + N <= endp) {
00228         temp ^= *((Word *)p);
00229         p += N;
00230     }
00231     
00232     // process at most N-1 trailing chars
00233     if (p != endp)
00234       {
00235         // w is the last Word-aligned chunk of the string, including
00236         // some trailing junk characters.
00237         Word w = *((Word *)p);
00238 
00239         // jpost is the number of trailing junk characters in this
00240         // Word but not in the string.  jpostUp1 is the number of bits
00241         // in jpost bytes.
00242         jpost = N - (endp - p);
00243         Word jpostUp1 = (jpost << LgUp1);
00244 
00245         // Shift w to remove bytes past the end of the string and
00246         // thereby avoid including random garbage in the hash value.
00247         // Rotate temp by the same amount in the same direction as the
00248         // shift to align it with w.
00249 #if BYTE_ORDER == BIG_ENDIAN
00250         w >>= jpostUp1;
00251         temp = RotateWord(temp, -jpostUp1);
00252 #else
00253         w <<= jpostUp1;
00254         temp = RotateWord(temp, jpostUp1);
00255 #endif
00256 
00257         // Combine w (now free of junk bytes) into the working hash
00258         // value.
00259         temp ^= w;
00260     }
00261 
00262     // Finally, we rotate the hash value by the same number of bytes
00263     // as the string length and add the string length to the result
00264     // (to ensure that strings which are an even number of identical
00265     // repitions of Word-sized sequences don't all get the same hash
00266     // value).
00267 #if BYTE_ORDER == BIG_ENDIAN
00268     return m + RotateWord(temp, -(m << LgUp1));
00269 #else
00270     return m + RotateWord(temp, (m << LgUp1));
00271 #endif
00272 
00273 #endif // SAFE_HASH
00274 }
00275 
00276 Text Text::WordWrap(const Text &prefix, unsigned int columns)
00277   const throw ()
00278 {
00279   Text result = prefix;
00280   unsigned int line_len = result.Length();
00281   const char *read_p = s;
00282 
00283   // Until we run out of original...
00284   while(*read_p)
00285     {
00286       // Find the beginning and end of the next word.
00287       const char *next_word_start = read_p, *next_word_end;
00288       while(*next_word_start && isspace(*next_word_start))
00289         next_word_start++;
00290       next_word_end = next_word_start;
00291       while(*next_word_end && !isspace(*next_word_end))
00292         next_word_end++;
00293 
00294       // See if adding this word to the current like would put us over
00295       // the line length limit.
00296       if((line_len + (next_word_end - read_p)) > columns)
00297         {
00298           // Replace the whitespace before this word with a newline,
00299           // and update the line length counter.
00300           unsigned int next_word_len = (next_word_end - next_word_start);
00301           result += (Text("\n") + prefix +
00302                      Text(next_word_start, next_word_len));
00303           line_len = prefix.Length() + next_word_len;
00304         }
00305       // If this would still be within the line length limit, just
00306       // copy them over.
00307       else
00308         {
00309           unsigned int added_len = (next_word_end - read_p);
00310           result += Text(read_p, added_len);
00311           line_len += added_len;
00312         }
00313 
00314       // Regardless of what we did, move past the next word.
00315       read_p = next_word_end;
00316     }
00317 
00318   // Return the result.
00319   return result;
00320 }
00321 
00322 // Shared code for PadLeft and PadRight: computes the number of copies
00323 // needed (including possibly a partial copy) and allocates the
00324 // storage for the new string.
00325 
00326 static void PaddingCopies(unsigned int baseLen,
00327                           unsigned int finalLen,
00328                           unsigned int padLen,
00329                           /*OUT*/ unsigned int &copies,
00330                           /*OUT*/ unsigned int &partial,
00331                           /*OUT*/ char *&dest)
00332 {
00333   assert(padLen > 0);
00334   unsigned int needed = (finalLen > baseLen) ? (finalLen - baseLen) : 0;
00335   copies = needed/padLen;
00336   partial = needed - (copies*padLen);
00337   unsigned int totalLen = baseLen + (copies*padLen) + partial;
00338   assert(totalLen >= finalLen);
00339   dest = NEW_PTRFREE_ARRAY(char, totalLen+1);
00340   *dest = 0;
00341 }
00342 
00343 // Shared code for PadLeft and PadRight: copies the padding string
00344 // multiple times (including possibly a final partial copy).
00345 
00346 static void CopyPadding(unsigned int copies, unsigned int partial,
00347                         char *dest, const Text &padding)
00348 {
00349   while(copies > 0)
00350     {
00351       strcat(dest, padding.cchars());
00352       copies--;
00353     }
00354   if(partial)
00355     {
00356       strncat(dest, padding.cchars(), partial);
00357     }
00358 }
00359 
00360 Text Text::PadLeft(unsigned int toLen, const Text &padding)
00361   const throw()
00362 {
00363 
00364   unsigned int copies, partial;
00365   char *dest;
00366   PaddingCopies(this->Length(), toLen, padding.Length(),
00367                 copies, partial, dest);
00368 
00369   // Padding on the left
00370   CopyPadding(copies, partial, dest, padding);
00371   // Original on the right
00372   strcat(dest, this->s);
00373 
00374   Text res;
00375   res.s = dest;
00376   return res;
00377 }
00378 
00379 Text Text::PadRight(unsigned int toLen, const Text &padding)
00380   const throw()
00381 {
00382   unsigned int copies, partial;
00383   char *dest;
00384   PaddingCopies(this->Length(), toLen, padding.Length(),
00385                 copies, partial, dest);
00386 
00387   // Oritinal on the left
00388   strcat(dest, this->s);
00389   // Padding on the right
00390   CopyPadding(copies, partial, dest, padding);
00391 
00392   Text res;
00393   res.s = dest;
00394   return res;
00395 }

Generated on Thu May 24 23:01:37 2007 for Vesta by  doxygen 1.5.1