OpenClonk
StdBuf.cpp
Go to the documentation of this file.
1 /*
2  * OpenClonk, http://www.openclonk.org
3  *
4  * Copyright (c) 2001-2009, RedWolf Design GmbH, http://www.clonk.de/
5  * Copyright (c) 2009-2016, The OpenClonk Team and contributors
6  *
7  * Distributed under the terms of the ISC license; see accompanying file
8  * "COPYING" for details.
9  *
10  * "Clonk" is a registered trademark of Matthes Bender, used with permission.
11  * See accompanying file "TRADEMARK" for details.
12  *
13  * To redistribute this file separately, substitute the full license texts
14  * for the above references.
15  */
16 #include "C4Include.h"
17 #include "lib/StdBuf.h"
18 
19 #include "lib/StdCompiler.h"
20 #include "lib/StdAdaptors.h"
21 
22 #ifdef _WIN32
24 #else
25 #define O_BINARY 0
26 #define O_SEQUENTIAL 0
27 #endif
28 #include <sys/stat.h>
29 
30 // *** StdBuf
31 
32 bool StdBuf::LoadFromFile(const char *szFile)
33 {
34  // Open file
35 #ifdef _WIN32
36  int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
37 #else
38  int fh = open(szFile, O_BINARY | O_CLOEXEC | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
39 #endif
40  if (fh < 0) return false;
41  // Create buf
42  New(FileSize(fh));
43  // Read
44  if (read(fh, getMData(), getSize()) != (signed int) getSize())
45  {
46  close(fh);
47  return false;
48  }
49  close(fh);
50  // Ok
51  return true;
52 }
53 bool StdBuf::SaveToFile(const char *szFile) const
54 {
55  // Open file
56 #ifdef _WIN32
57  int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
58 #else
59  int fh = open(szFile, O_BINARY | O_CLOEXEC | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
60 #endif
61  if (fh < 0) return false;
62  // Write data
63  if (write(fh, getData(), getSize()) != (signed int) getSize())
64  {
65  close(fh);
66  return false;
67  }
68  close(fh);
69  // Ok
70  return true;
71 }
72 
73 bool StdStrBuf::LoadFromFile(const char *szFile)
74 {
75  // Open file
76 #ifdef _WIN32
77  int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
78 #else
79  int fh = open(szFile, O_BINARY | O_CLOEXEC | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
80 #endif
81  if (fh < 0) return false;
82  // Create buf
83  SetLength(FileSize(fh));
84  // Read
85  if (read(fh, getMData(), getLength()) != (ssize_t) getLength())
86  {
87  close(fh);
88  return false;
89  }
90  close(fh);
91  // Ok
92  return true;
93 }
94 bool StdStrBuf::SaveToFile(const char *szFile) const
95 {
96  // Open file
97 #ifdef _WIN32
98  int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
99 #else
100  int fh = open(szFile, O_BINARY | O_CLOEXEC | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
101 #endif
102  if (fh < 0) return false;
103  // Write data
104  if (write(fh, getData(), getLength()) != (ssize_t) getLength())
105  {
106  close(fh);
107  return false;
108  }
109  close(fh);
110  // Ok
111  return true;
112 }
113 
114 void StdBuf::CompileFunc(StdCompiler *pComp, int iType)
115 {
116  // Size (guess it is a small value most of the time - if it's big, an extra byte won't hurt anyway)
117  uint32_t tmp = iSize; pComp->Value(mkIntPackAdapt(tmp)); iSize = tmp;
119  // Read/write data
120  if (pComp->isDeserializer())
121  {
122  New(iSize);
123  pComp->Raw(getMData(), iSize, StdCompiler::RawCompileType(iType));
124  }
125  else
126  {
127  pComp->Raw(const_cast<void *>(getData()), iSize, StdCompiler::RawCompileType(iType));
128  }
129 }
130 
131 // *** StdStringBuf
132 
133 #ifdef _WIN32
134 StdStrBuf::StdStrBuf(const wchar_t * utf16)
135 {
136  int len = WideCharToMultiByte(CP_UTF8, 0, utf16, -1, nullptr, 0, nullptr, nullptr);
137  SetSize(len);
138  WideCharToMultiByte(CP_UTF8, 0, utf16, -1, getMData(), getSize(), nullptr, nullptr);
139 }
140 StdStrBuf::wchar_t_holder StdStrBuf::GetWideChar() const
141 {
142  if (!getSize()) return StdStrBuf::wchar_t_holder(nullptr);
143 
144  int len = MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), nullptr, 0);
145  wchar_t * p = new wchar_t[len];
146  MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), p, len);
147  return StdStrBuf::wchar_t_holder(p);
148 }
150 {
151  int len = MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), nullptr, 0);
152  StdBuf r; r.SetSize(len * sizeof(wchar_t));
153  MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), getMBufPtr<wchar_t>(r), len);
154  return r;
155 }
156 StdStrBuf::wchar_t_holder GetWideChar(const char * utf8, bool double_null_terminate)
157 {
158  int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, nullptr, 0);
159  if (double_null_terminate) ++len;
160  wchar_t * p = new wchar_t[len];
161  MultiByteToWideChar(CP_UTF8, 0, utf8, -1, p, len);
162  if (double_null_terminate) p[len - 1] = wchar_t(0);
163  return StdStrBuf::wchar_t_holder(p);
164 }
165 StdBuf GetWideCharBuf(const char * utf8)
166 {
167  int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, nullptr, 0);
168  StdBuf r; r.SetSize(len * sizeof(wchar_t));
169  MultiByteToWideChar(CP_UTF8, 0, utf8, -1, getMBufPtr<wchar_t>(r), len);
170  return r;
171 }
172 #endif
173 
174 void StdStrBuf::Format(const char *szFmt, ...)
175 {
176  // Create argument list
177  va_list args; va_start(args, szFmt);
178  // Format
179  FormatV(szFmt, args);
180 }
181 
182 void StdStrBuf::FormatV(const char *szFmt, va_list args)
183 {
184  // Clear previous contents
185  Clear();
186  // Format
187  AppendFormatV(szFmt, args);
188 }
189 
190 void StdStrBuf::AppendFormat(const char *szFmt, ...)
191 {
192  // Create argument list
193  va_list args; va_start(args, szFmt);
194  // Format
195  AppendFormatV(szFmt, args);
196 }
197 
198 void StdStrBuf::AppendFormatV(const char *szFmt, va_list args)
199 {
200 #ifdef HAVE_VASPRINTF
201  // Format
202  char *pStr; int iBytes = vasprintf(&pStr, szFmt, args);
203  if (iBytes < 0 || !pStr) return;
204  // Append
205  if (isNull())
206  Take(pStr, iBytes);
207  else
208  {
209  Append(pStr, iBytes);
210  free(pStr);
211  }
212 #elif defined(HAVE_VSCPRINTF)
213  // Save append start
214  int iStart = getLength();
215  // Calculate size, allocate
216  int iLength = vscprintf(szFmt, args);
217  Grow(iLength);
218  // Format
219  char *pPos = getMElem<char>(*this, iSize - iLength - 1);
220  vsprintf(getMPtr(iStart), szFmt, args);
221 #else
222  // Save append start
223  int iStart = getLength(), iBytes;
224  do
225  {
226  // Grow
227  Grow(512);
228  // Try output
229  va_list args_copy;
230  #ifdef va_copy
231  va_copy(args_copy, args);
232  #else
233  args_copy = args;
234  #endif
235  iBytes = vsnprintf(getMPtr(iStart), getLength() - iStart, szFmt, args_copy);
236  #ifdef va_copy
237  va_end(args_copy);
238  #endif
239  }
240  while (iBytes < 0 || (unsigned int)(iBytes) >= getLength() - iStart);
241  // Calculate real length, if vsnprintf didn't return anything of value
242  iBytes = strlen(getMPtr(iStart));
243  // Shrink to fit
244  SetSize(iStart + iBytes + 1);
245 #endif
246 }
247 
249 {
250  if(getLength() && *getPtr(getLength() - 1) == DirectorySeparator) return;
252 }
253 
254 void StdStrBuf::CompileFunc(StdCompiler *pComp, int iRawType)
255 {
256  if (pComp->isDeserializer())
257  {
258  char *pnData;
259  pComp->String(&pnData, StdCompiler::RawCompileType(iRawType));
260  Take(pnData);
261  }
262  else
263  {
264  char *pData = const_cast<char *>(getData());
265  if (!pData) pData = const_cast<char *>("");
266  pComp->String(&pData, StdCompiler::RawCompileType(iRawType));
267  }
268 }
269 
270 StdStrBuf FormatString(const char *szFmt, ...)
271 {
272  va_list args; va_start(args, szFmt);
273  return FormatStringV(szFmt, args);
274 }
275 
276 StdStrBuf FormatStringV(const char *szFmt, va_list args)
277 {
278  StdStrBuf Buf;
279  Buf.FormatV(szFmt, args);
280  return Buf;
281 }
282 
283 // replace all occurences of one string with another. Return number of replacements.
284 int StdStrBuf::Replace(const char *szOld, const char *szNew, size_t iStartSearch)
285 {
286  if (!getPtr(0) || !szOld) return 0;
287  if (!szNew) szNew = "";
288  int cnt=0;
289  size_t iOldLen = strlen(szOld), iNewLen = strlen(szNew);
290  if (iOldLen != iNewLen)
291  {
292  // count number of occurences to calculate new string length
293  size_t iResultLen = getLength();
294  const char *szPos = getPtr(iStartSearch);
295  while ((szPos = SSearch(szPos, szOld)))
296  {
297  iResultLen += iNewLen - iOldLen;
298  ++cnt;
299  }
300  if (!cnt) return 0;
301  // now construct new string by replacement
302  StdStrBuf sResult;
303  sResult.New(iResultLen+1);
304  const char *szRPos = getPtr(0), *szRNextPos;
305  char *szWrite = sResult.getMPtr(0);
306  if (iStartSearch)
307  {
308  memcpy(szWrite, szRPos, iStartSearch * sizeof(char));
309  szRPos += iStartSearch;
310  szWrite += iStartSearch;
311  }
312  while ((szRNextPos = SSearch(szRPos, szOld)))
313  {
314  memcpy(szWrite, szRPos, (szRNextPos - szRPos - iOldLen) * sizeof(char));
315  szWrite += (szRNextPos - szRPos - iOldLen);
316  memcpy(szWrite, szNew, iNewLen * sizeof(char));
317  szWrite += iNewLen;
318  szRPos = szRNextPos;
319  }
320  strcpy(szWrite, szRPos);
321  Take(std::move(sResult));
322  }
323  else
324  {
325  // replace directly in this string
326  char *szRPos = getMPtr(iStartSearch);
327  while ((szRPos = const_cast<char *>(SSearch(szRPos, szOld))))
328  {
329  memcpy(szRPos - iOldLen, szNew, iOldLen * sizeof(char));
330  ++cnt;
331  }
332  }
333  return cnt;
334 }
335 
336 int StdStrBuf::ReplaceChar(char cOld, char cNew)
337 {
338  if (isNull()) return 0;
339  char *szPos = getMPtr(0);
340  if (!cOld) return 0;
341  if (!cNew) cNew = '_';
342  int cnt=0;
343  while ((szPos = strchr(szPos, cOld)))
344  {
345  *szPos++ = cNew;
346  ++cnt;
347  }
348  return cnt;
349 }
350 
351 void StdStrBuf::ReplaceEnd(size_t iPos, const char *szNewEnd)
352 {
353  size_t iLen = getLength();
354  assert(iPos <= iLen); if (iPos > iLen) return;
355  size_t iEndLen = strlen(szNewEnd);
356  if (iLen - iPos != iEndLen) SetLength(iPos + iEndLen);
357  memcpy(getMPtr(iPos), szNewEnd, iEndLen * sizeof(char));
358 }
359 
360 bool StdStrBuf::ValidateChars(const char *szInitialChars, const char *szMidChars)
361 {
362  // only given chars may be in string
363  for (size_t i=0; i<getLength(); ++i)
364  if (!strchr(i ? szMidChars : szInitialChars, getData()[i]))
365  return false;
366  return true;
367 }
368 
369 bool StdStrBuf::GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator) const
370 {
371  assert(psOutSection);
372  psOutSection->Clear();
373  const char *szStr = getData(), *szSepPos;
374  if (!szStr) return false; // invaid argument
375  while ((szSepPos = strchr(szStr, cSeparator)) && idx) { szStr = szSepPos+1; --idx; }
376  if (idx) return false; // indexed section not found
377  // fill output buffer with section, if not empty
378  if (!szSepPos) szSepPos = getData() + getLength();
379  if (szSepPos != szStr) psOutSection->Copy(szStr, szSepPos - szStr);
380  // return true even if section is empty, because the section obviously exists
381  // (to enable loops like while (buf.GetSection(i++, &sect)) if (sect) ...)
382  return true;
383 }
384 
386 {
387  if (!isNull())
388  for (char *szPos = getMPtr(0); *szPos; ++szPos)
389  *szPos = tolower(*szPos);
390 }
391 
392 void StdStrBuf::AppendCharacter(uint32_t unicodechar)
393 {
394  if (unicodechar < 0x80)
395  AppendChar(unicodechar);
396  else if (unicodechar < 0x800)
397  {
398  Grow(2);
399  *getMPtr(getLength() - 2) = (0xC0 | (unicodechar >> 6));
400  *getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
401  }
402  else if (unicodechar < 0x10000)
403  {
404  Grow(3);
405  *getMPtr(getLength() - 3) = (0xE0 | (unicodechar >> 12));
406  *getMPtr(getLength() - 2) = (0x80 | ((unicodechar >> 6) & 0x3F));
407  *getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
408  }
409  else if (unicodechar < 0x110000)
410  {
411  Grow(4);
412  *getMPtr(getLength() - 4) = (0xF0 | (unicodechar >> 18));
413  *getMPtr(getLength() - 3) = (0x80 | ((unicodechar >> 12) & 0x3F));
414  *getMPtr(getLength() - 2) = (0x80 | ((unicodechar >> 6) & 0x3F));
415  *getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
416  }
417  else /* not an unicode code point, ignore */ {}
418 }
419 
420 // Returns true if charset was converted.
422 {
423  // assume that it's windows-1252 and convert to utf-8
424  if (!IsValidUtf8(getData(), getLength()))
425  {
426  size_t j = 0;
427  StdStrBuf buf;
428  buf.Grow(getLength());
429  // totally unfounded statistic: most texts have less than 20 umlauts.
430  enum { GROWSIZE = 20 };
431  for (size_t i = 0; i < getSize(); ++i)
432  {
433  unsigned char c = *getPtr(i);
434  // ASCII
435  if (c < 0x80)
436  {
437  if (j >= buf.getLength())
438  buf.Grow(GROWSIZE);
439  *buf.getMPtr(j++) = c;
440  continue;
441  }
442  // Is c one of the control characters only in ISO/IEC_8859-1 or part of the common subset with windows-1252?
443  if (c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D || c >= 0xA0)
444  {
445  if (j + 1 >= buf.getLength())
446  buf.Grow(GROWSIZE);
447  *buf.getMPtr(j++) = (0xC0 | (c >> 6));
448  *buf.getMPtr(j++) = (0x80 | (c & 0x3F));
449  continue;
450  }
451  // Extra windows-1252-characters
452  buf.SetLength(j);
453  static const char * extra_chars [] =
454  {
455  //"€", 0, "‚", "ƒ", "„", "…", "†", "‡", "ˆ", "‰", "Š", "‹", "Œ", 0, "Ž", 0,
456  // 0, "‘", "’", "“", "”", "•", "–", "—", "˜", "™", "š", "›", "œ", 0, "ž", "Ÿ" };
457  "\xe2\x82\xac", nullptr, "\xe2\x80\x9a", "\xc6\x92", "\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1", "\xcb\x86", "\xe2\x80\xb0", "\xc5\xa0", "\xe2\x80\xb9", "\xc5\x92", nullptr, "\xc5\xbd", nullptr,
458  nullptr, "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c", "\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94", "\xcb\x9c", "\xe2\x84\xa2", "\xc5\xa1", "\xe2\x80\xba", "\xc5\x93", nullptr, "\xc5\xbe", "\xc5\xb8"
459  };
460  buf.Append(extra_chars[c - 0x80]);
461  j += strlen(extra_chars[c - 0x80]);
462  }
463  buf.SetLength(j);
464  Take(std::move(buf));
465  return true;
466  }
467  return false;
468 }
469 
471 {
472  // get left trim
473  int32_t iSpaceLeftCount = 0, iLength = getLength();
474  if (!iLength) return false;
475  const char *szStr = getData();
476  while (iSpaceLeftCount < iLength)
477  if (isspace((unsigned char)(unsigned char) szStr[iSpaceLeftCount]))
478  ++iSpaceLeftCount;
479  else
480  break;
481  // only spaces? Clear!
482  if (iSpaceLeftCount == iLength)
483  {
484  Clear();
485  return true;
486  }
487  // get right trim
488  int32_t iSpaceRightCount = 0;
489  while (isspace((unsigned char)szStr[iLength - 1 - iSpaceRightCount])) ++iSpaceRightCount;
490  // anything to trim?
491  if (!iSpaceLeftCount && !iSpaceRightCount) return false;
492  // only right trim? Can do this by shortening
493  if (!iSpaceLeftCount)
494  {
495  SetLength(iLength - iSpaceRightCount);
496  return true;
497  }
498  // left trim involved - move text and shorten
499  memmove(getMPtr(0), szStr+iSpaceLeftCount, iLength - iSpaceLeftCount - iSpaceRightCount);
500  SetLength(iLength - iSpaceLeftCount - iSpaceRightCount);
501  return true;
502 }
503 
504 #ifdef _WIN32
505 std::string WStrToString(wchar_t *ws)
506 {
507  int len = WideCharToMultiByte(CP_UTF8, 0, ws, -1, nullptr, 0, nullptr, nullptr);
508  assert(len >= 0);
509  if (len <= 0) return std::string{};
510 
511  std::string s(static_cast<size_t>(len), '\0');
512  s.resize(WideCharToMultiByte(CP_UTF8, 0, ws, -1, &s[0], s.size(), nullptr, nullptr) - 1);
513  return s;
514 }
515 #endif
#define s
StdStrBuf::wchar_t_holder GetWideChar(const char *utf8, bool double_null_terminate=false)
StdBuf GetWideCharBuf(const char *utf8)
#define DirectorySeparator
ptrdiff_t ssize_t
#define O_CLOEXEC
const char * SSearch(const char *szString, const char *szIndex)
Definition: Standard.cpp:369
bool IsValidUtf8(const char *text, int length)
Definition: Standard.cpp:702
StdIntPackAdapt< T > mkIntPackAdapt(T &rVal)
Definition: StdAdaptors.h:791
StdStrBuf FormatString(const char *szFmt,...)
Definition: StdBuf.cpp:270
#define O_BINARY
Definition: StdBuf.cpp:25
StdStrBuf FormatStringV(const char *szFmt, va_list args)
Definition: StdBuf.cpp:276
#define O_SEQUENTIAL
Definition: StdBuf.cpp:26
size_t FileSize(const char *fname)
Definition: StdBuf.h:30
size_t getSize() const
Definition: StdBuf.h:101
void * getMData()
Definition: StdBuf.h:100
void SetSize(size_t inSize)
Definition: StdBuf.h:204
void CompileFunc(class StdCompiler *pComp, int iType=0)
Definition: StdBuf.cpp:114
bool SaveToFile(const char *szFile) const
Definition: StdBuf.cpp:53
void New(size_t inSize)
Definition: StdBuf.h:146
const void * getData() const
Definition: StdBuf.h:99
unsigned int iSize
Definition: StdBuf.h:92
bool LoadFromFile(const char *szFile)
Definition: StdBuf.cpp:32
virtual void Raw(void *pData, size_t iSize, RawCompileType eType=RCT_Escaped)=0
virtual bool Separator(Sep eSep=SEP_SEP)
Definition: StdCompiler.h:119
void Value(const T &rStruct)
Definition: StdCompiler.h:161
virtual void String(char *szString, size_t iMaxLength, RawCompileType eType=RCT_Escaped)=0
virtual bool isDeserializer()
Definition: StdCompiler.h:53
void ToLowerCase()
Definition: StdBuf.cpp:385
size_t getSize() const
Definition: StdBuf.h:444
void AppendFormatV(const char *szFmt, va_list args)
Definition: StdBuf.cpp:198
void AppendCharacter(uint32_t unicodechar)
Definition: StdBuf.cpp:392
void SetLength(size_t iLength)
Definition: StdBuf.h:509
void CompileFunc(class StdCompiler *pComp, int iRawType=0)
Definition: StdBuf.cpp:254
void AppendFormat(const char *szFmt,...) GNUC_FORMAT_ATTRIBUTE_O
Definition: StdBuf.cpp:190
void FormatV(const char *szFmt, va_list args)
Definition: StdBuf.cpp:182
int Replace(const char *szOld, const char *szNew, size_t iStartSearch=0)
Definition: StdBuf.cpp:284
void ReplaceEnd(size_t iPos, const char *szNewEnd)
Definition: StdBuf.cpp:351
const char * getData() const
Definition: StdBuf.h:442
char * getMData()
Definition: StdBuf.h:443
bool EnsureUnicode()
Definition: StdBuf.cpp:421
StdStrBuf()=default
void AppendBackslash()
Definition: StdBuf.cpp:248
bool isNull() const
Definition: StdBuf.h:441
void AppendChar(char cChar)
Definition: StdBuf.h:588
bool ValidateChars(const char *szInitialChars, const char *szMidChars)
Definition: StdBuf.cpp:360
void Copy()
Definition: StdBuf.h:467
void Grow(size_t iGrow)
Definition: StdBuf.h:498
bool LoadFromFile(const char *szFile)
Definition: StdBuf.cpp:73
void Append(const char *pnData, size_t iChars)
Definition: StdBuf.h:519
int ReplaceChar(char cOld, char cNew)
Definition: StdBuf.cpp:336
bool TrimSpaces()
Definition: StdBuf.cpp:470
bool SaveToFile(const char *szFile) const
Definition: StdBuf.cpp:94
const char * getPtr(size_t i) const
Definition: StdBuf.h:448
char * getMPtr(size_t i)
Definition: StdBuf.h:449
void Clear()
Definition: StdBuf.h:466
size_t getLength() const
Definition: StdBuf.h:445
bool GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator=';') const
Definition: StdBuf.cpp:369
void Take(char *pnData)
Definition: StdBuf.h:457
void Format(const char *szFmt,...) GNUC_FORMAT_ATTRIBUTE_O
Definition: StdBuf.cpp:174