icedb  version 0.5.1
Snow particle scattering database API
shapeIOtextParsers2.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <sstream>
3 #include <string>
4 #include <vector>
5 #include <algorithm>
6 #include <fstream>
7 #include <cmath>
8 
9 #include <icedb/fs_backend.hpp>
10 #include <boost/lexical_cast.hpp>
11 //#include <boost/iostreams/copy.hpp>
12 //#include <boost/iostreams/filtering_stream.hpp>
13 #include "shape.hpp"
14 namespace icedb {
15  namespace Examples {
16  namespace Shapes {
17 
19  const char* in, const size_t inlen, float* out, const size_t outlen, float& max_element)
20  {
21  max_element = 0;
22  size_t curout = 0;
23  // Accepts numbers of the form: [0-9]*
24  // No negatives, exponents or decimals.
25 
26  float numerator = 0;
27  assert(in);
28  const char* end = in + inlen;
29  bool readnums = false;
30  for (const char* cur = in; (cur <= end) && (curout < outlen); ++cur) {
31  if ((*cur <= '9') && (*cur >= '0')) {
32  numerator *= 10;
33  numerator += (*cur - '0');
34  readnums = true;
35  } else if(readnums){
36  out[curout] = numerator;
37  if (numerator > max_element) max_element = numerator;
38  curout++;
39  numerator = 0;
40  readnums = false;
41  }
42  }
43  return curout;
44  }
45 
46 
48  const char* in, const size_t inlen, float* out, const size_t outlen)
49  {
50  size_t curout = 0;
51  // Accepts numbers of the form: (+- )[0-9]*.[0-9]*(eE)(+- )[0-9]*.[0-9]*
52  bool isNegative = false;
53  bool inExponent = false;
54  bool expIsNeg = false;
55  bool pastDecimal = false;
56 
57  uint64_t numerator, numeratorExp;
58  uint64_t digits_denom, digits_denom_Exp;
59  auto resetNum = [&]()
60  {numerator = 0; digits_denom = 0;
61  numeratorExp = 0; digits_denom_Exp = 0;
62  isNegative = false; inExponent = false;
63  expIsNeg = false; pastDecimal = false; };
64  resetNum();
65 
66  const char* cur = in;
67  const char* end = in + inlen;
68  // Advance to the start of a number
69  assert(cur);
70  const char* numbers = "0123456789-+.eE";
71  const char* whitespace = " \t\n";
72  const char* numEnd = nullptr;
73  auto isNumber = [](char c) -> bool {
74  if (c >= '0' && c <= '9') return true;
75  if (c == '-' || c == '+' || c == '.' || c == 'e' || c == 'E') return true;
76  return false;
77  };
78  auto isControl = [](char c) -> bool {
79  if (c == '-' || c == '+' || c == '.' || c == 'e' || c == 'E') return true;
80  return false;
81  };
82  auto advanceToNumber = [](const char* in, const char* end) -> const char* {
83  while (in < end) {
84  if (*in >= '0' && *in <= '9') return in;
85  else if (*in == '-' || *in == '+' || *in == '.' || *in == 'e' || *in == 'E') return in;
86  else ++in;
87  }
88  return in;
89  };
90  while ((cur < end) && (curout < outlen)) {
91  //cur = strpbrk(cur, numbers);
92  cur = advanceToNumber(cur, end);
93  while (isNumber(*cur)) {
94  if (!isControl(*cur)) {
95  if (!inExponent) {
96  numerator *= 10;
97  numerator += (*cur - '0');
98  if (pastDecimal) digits_denom++;
99  } else {
100  numeratorExp *= 10;
101  numeratorExp += (*cur - '0');
102  if (pastDecimal) digits_denom_Exp++;
103  }
104  } else {
105  if (*cur == '.') pastDecimal = true;
106  if (*cur == '-' && !inExponent) isNegative = true;
107  if (*cur == '-' && inExponent) expIsNeg = true;
108  if (*cur == 'e' || *cur == 'E') {
109  inExponent = true; pastDecimal = false; isNegative = false;
110  }
111  }
112  ++cur;
113  }
114  if (!numerator) continue;
115 
116  // Number is loaded. Assign and advance.
117  float exponent = 0;
118  if (numeratorExp) {
119  exponent = static_cast<float>(numeratorExp);
120  if (digits_denom_Exp) exponent /= powf(10.f, static_cast<float>(digits_denom_Exp));
121  if (expIsNeg) exponent *= -1;
122  }
123 
124  float num = 0;
125  num = static_cast<float>(numerator);
126  if (digits_denom) num /= powf(10.f, static_cast<float>(digits_denom));
127  if (isNegative) num *= -1;
128 
129  float fnum = num;
130  if (numeratorExp) fnum *= powf(10.f, exponent);
131 
132  out[curout] = fnum;
133  resetNum();
134  curout++;
135 
136  ++cur;
137  }
138  return curout;
139  }
140 
141  template <class T>
142  T m_atof(const char* x, size_t len)
143  {
144  T res = 0;
145  unsigned int remainder = 0;
146  unsigned int rembase = 1;
147  unsigned int digit = 0;
148  // Sign false indicates positive. True is negative
149  bool sign = false;
150  bool expsign = false;
151  unsigned int expi = 0;
152  const char* p = x; // Set pointer to beginning of character stream
153  bool exponent = false;
154  bool decimal = false;
155  size_t i = 0;
156  while (*p != '\0' && ((len) ? i<len : true))
157  {
158  // Do digit checks here (no calls to isdigit)
159  // Ignore whitespace
160  if (*p == 'e' || *p == 'E')
161  {
162  exponent = true;
163  }
164  else if (*p == '.') {
165  decimal = true;
166  }
167  else if (*p == '-') {
168  if (!exponent)
169  {
170  sign = true;
171  }
172  else {
173  expsign = true;
174  }
175  }
176  else if (*p == '+') {
177  if (!exponent)
178  {
179  sign = false;
180  }
181  else {
182  expsign = false;
183  }
184  }
185  else if (*p == ' ' || *p == '\t') {
186  // Ignore whitespace (but disallow endlines)
187  }
188  else {
189  // It's a digit!
190  switch (*p)
191  {
192  case '0':
193  digit = 0;
194  break;
195  case '1':
196  digit = 1;
197  break;
198  case '2':
199  digit = 2;
200  break;
201  case '3':
202  digit = 3;
203  break;
204  case '4':
205  digit = 4;
206  break;
207  case '5':
208  digit = 5;
209  break;
210  case '6':
211  digit = 6;
212  break;
213  case '7':
214  digit = 7;
215  break;
216  case '8':
217  digit = 8;
218  break;
219  case '9':
220  digit = 9;
221  break;
222  default:
223  // Invalid input
224  p++;
225  continue;
226  break;
227  }
228  // Digit is set. Next, see what to do with it
229  if (!decimal && !exponent)
230  {
231  res *= 10;
232  res += digit;
233  }
234  else if (decimal && !exponent) {
235  remainder *= 10;
236  rembase *= 10;
237  remainder += digit;
238  }
239  else if (exponent) {
240  expi *= 10;
241  expi += digit;
242  }
243  }
244 
245  p++;
246  i++;
247  }
248  // Iterated through the string
249  // Now, to combine the elements into my double
250  res += (T)remainder / (T)rembase;
251  if (sign) res *= -1;
252  if (exponent)
253  {
254  if (!expsign)
255  {
256  res *= (T)std::pow(10, (T)expi);
257  }
258  else {
259  res *= (T)std::pow(10, -1.0 * (T)expi);
260  }
261  }
262  return res;
263  }
264 
265  template <class T>
266  T m_atoi(const char *x, size_t len)
267  {
268  T res = 0;
269  int digit = 0;
270  bool sign = false; // false is pos, true is neg
271  bool done = false;
272  size_t i = 0;
273  const char* p = x; // Set pointer to beginning of character stream
274  while (*p != '\0' && done == false && ((len) ? i<len : true))
275  {
276  if (*p == '-') {
277  sign = true;
278  }
279  else if (*p == '+') {
280  sign = false;
281  }
282  else if (*p == ' ' || *p == '\t') {
283  // Ignore whitespace (but disallow endlines)
284  }
285  else {
286  // It's a digit!
287  switch (*p)
288  {
289  case '0':
290  digit = 0;
291  break;
292  case '1':
293  digit = 1;
294  break;
295  case '2':
296  digit = 2;
297  break;
298  case '3':
299  digit = 3;
300  break;
301  case '4':
302  digit = 4;
303  break;
304  case '5':
305  digit = 5;
306  break;
307  case '6':
308  digit = 6;
309  break;
310  case '7':
311  digit = 7;
312  break;
313  case '8':
314  digit = 8;
315  break;
316  case '9':
317  digit = 9;
318  break;
319  default:
320  // Invalid input
321  done = true;
322  break;
323  }
324  // Digit is set. Next, see what to do with it
325  if (done) break;
326  res *= 10;
327  res += digit;
328  }
329  p++;
330  i++;
331  }
332 
333  // Return the value
334  if (sign) res *= -1;
335  return res;
336  }
337 
338 
339  ShapeDataBasic readTextFile(const std::string &filename);
340  ShapeDataBasic readDDSCAT(const char* in);
341  void readHeader(const char* in, std::string &desc, size_t &np, size_t &headerEnd);
342  void readDDSCATtextContents(const char *iin, size_t numExpectedPoints, size_t headerEnd, ShapeDataBasic& p);
343  ShapeDataBasic readRawText(const char *iin);
344 
346  const std::string &filename) {
347  // Open the file and copy to a string. Check the first few lines to see if any
348  // alphanumeric characters are present. If there are, treat it as a DDSCAT file.
349  // Otherwise, treat as a raw text file.
350  std::ifstream in(filename.c_str());
351  uintmax_t sz = sfs::file_size(sfs::path(filename));
352  std::string s(sz, ' ');
353  in.read(&s[0], sz);
354 
355  auto end = s.find_first_of("\n\0");
356  Expects(end != std::string::npos);
357  std::string ssub = s.substr(0, end);
358  auto spos = ssub.find_first_not_of("0123456789. \t\n");
359 
360  if (std::string::npos == spos) // This is a raw text file
361  return readRawText(s.c_str());
362  else return readDDSCAT(s.c_str()); // This is a DDSCAT file
363  //if ((std::string::npos != spos) && (spos < end)) {
364  // return readDDSCAT(s.c_str());
365  //}
366  //else {
367  // return readTextRaw(s.c_str());
368  //}
369  }
370 
371  ShapeDataBasic readDDSCAT(const char* in)
372  {
373  using namespace std;
374  ShapeDataBasic res;
375 
376  std::string str(in);
377  std::string desc; // Currently unused
378  size_t headerEnd = 0, numPoints = 0;
379  readHeader(str.c_str(), desc, numPoints, headerEnd);
380  //res.required.particle_id = desc;
381  //data->resize((int)numPoints, ::scatdb::shape::backends::NUM_SHAPECOLS);
382  readDDSCATtextContents(str.c_str(), numPoints, headerEnd, res);
383  return res;
384  }
385 
386  void readHeader(const char* in, std::string &desc, size_t &np,
387  size_t &headerEnd)
388  {
389  using namespace std;
390 
391  // Do header processing using istreams.
392  // The previous method used strings, but this didn't work with compressed reads.
393  //size_t &pend = headerEnd;
394  const char* pend = in;
395  const char* pstart = in;
396 
397  // The header is seven lines long
398  for (size_t i = 0; i < 7; i++)
399  {
400  pstart = pend;
401  pend = strchr(pend, '\n');
402  pend++; // Get rid of the newline
403  //pend = in.find_first_of("\n", pend+1);
404  string lin(pstart, pend - pstart - 1);
405  if (*(lin.rbegin()) == '\r') lin.pop_back();
406  //std::getline(in,lin);
407 
408  size_t posa = 0, posb = 0;
409  //Eigen::Array3f *v = nullptr;
410  switch (i)
411  {
412  case 0: // Title line
413  desc = lin;
414  break;
415  case 1: // Number of dipoles
416  {
417  // Seek to first nonspace character
418  posa = lin.find_first_not_of(" \t\n", posb);
419  // Find first space after this position
420  posb = lin.find_first_of(" \t\n", posa);
421  size_t len = posb - posa;
422  string s = lin.substr(posa, len);
423  np = boost::lexical_cast<size_t>(s);
424  //np = macros::m_atoi<size_t>(&(lin.data()[posa]), len);
425  }
426  break;
427  case 6: // Junk line
428  default:
429  break;
430  case 2: // a1
431  case 3: // a2
432  case 4: // d
433  case 5: // x0
434  // These all have the same structure. Read in three doubles, then assign.
435  /*
436  {
437  Eigen::Array3f v;
438  for (size_t j = 0; j < 3; j++)
439  {
440  // Seek to first nonspace character
441  posa = lin.find_first_not_of(" \t\n,", posb);
442  // Find first space after this position
443  posb = lin.find_first_of(" \t\n,", posa);
444  size_t len = posb - posa;
445  string s = lin.substr(posa, len);
446  v(j) = boost::lexical_cast<float>(s);
447  //v(j) = macros::m_atof<float>(&(lin.data()[posa]), len);
448  }
449  hdr->block<3, 1>(0, i - 2) = v;
450 
451  }
452  */
453  break;
454  }
455  }
456 
457  headerEnd = (pend - in) / sizeof(char);
458  }
459 
461  void readDDSCATtextContents(const char *iin, size_t numExpectedPoints, size_t headerEnd, ShapeDataBasic& p)
462  {
463  using namespace std;
464 
465  //Eigen::Vector3f crdsm, crdsi; // point location and diel entries
466  const char* pa = &iin[headerEnd];
467  const char* pb = strchr(pa + 1, '\0');
468 
469  std::vector<float> parser_vals; //(numPoints*8);
470  parser_vals.resize(7 * numExpectedPoints);
471  float max_element = 0;
472  size_t numRead = strints_array_to_floats(pa, pb - pa, parser_vals.data(), parser_vals.size(), max_element);
473 
474  assert(numRead % 7 == 0);
476  numPoints = parser_vals.size() / 7;
477  assert(numPoints == numExpectedPoints);
478  p.optional.particle_scattering_element_number.resize(numPoints);
479  p.required.particle_scattering_element_coordinates.resize(numPoints * 3);
480  std::set<uint8_t> constituents;
481 
482  for (size_t i = 0; i < numPoints; ++i)
483  {
484  size_t pIndex = 7 * i;
485  p.optional.particle_scattering_element_number[i] = static_cast<uint64_t>(parser_vals[pIndex]);
486  p.required.particle_scattering_element_coordinates[3 * i] = parser_vals[pIndex + 1];
487  p.required.particle_scattering_element_coordinates[(3 * i) + 1] = parser_vals[pIndex + 2];
488  p.required.particle_scattering_element_coordinates[(3 * i) + 2] = parser_vals[pIndex + 3];
489 
490  constituents.emplace(static_cast<uint8_t>(parser_vals[pIndex + 4]));
491  //p.required.particle_scattering_element_composition[i] = parser_vals[pIndex + 4]; //! Redo pass later and map
492 
493  }
494  p.optional.particle_constituent_number = Int8Data_t(constituents.begin(), constituents.end());
496  if (constituents.size() >= UINT8_MAX) throw (std::invalid_argument("Shape has too many constituents."));
497  p.required.number_of_particle_constituents = static_cast<uint8_t>(constituents.size());
498 
499  for (size_t i = 0; i < numPoints; ++i)
500  {
501  size_t pIndex = 7 * i;
502  uint64_t substance_id = static_cast<uint64_t>(parser_vals[pIndex + 4]);
503  size_t offset = 0;
504  for (auto it = constituents.cbegin(); it != constituents.cend(); ++it, ++offset) {
505  if ((*it) == substance_id) break;
506  }
507  size_t idx = (i*constituents.size()) + offset;
509  }
512  }
513 
514 
515 
516 
519  ShapeDataBasic readRawText(const char *iin)
520  {
521  using namespace std;
522  ShapeDataBasic res;
523  //Eigen::Vector3f crdsm, crdsi; // point location and diel entries
524  const char* pa = iin; // Start of the file
525  const char* pb = strchr(pa + 1, '\0'); // End of the file
526  const char* pNumStart = pa;
527  // Search for the first line that is not a comment.
528  // Nmat lines for ADDA are also ignored.
529  while ((pNumStart[0] == '#' || pNumStart[0] == 'N' || pNumStart[0] == 'n') && pNumStart < pb) {
530  const char* lineEnd = strchr(pNumStart + 1, 'n');
531  pNumStart = lineEnd + 1;
532  }
533  if (pNumStart >= pb) throw(std::invalid_argument("Cannot find any points in a shapefile."));
534 
535  const char* firstLineEnd = strchr(pNumStart + 1, '\n'); // End of the first line containing numeric data.
536  // Attempt to guess the number of points based on the number of lines in the file.
537  // The implementation using std::count is unfortunately slow
538  //int guessNumPoints = (int) std::count(pNumStart, pb, '\n');
539  // This is much faster, and allows for auto-vectorization
540  int guessNumPoints = 1; // Just in case there is a missing newline at the end
541  // This format does not pre-specify the number of points.
542  for (const char* c = pNumStart; c != pb; ++c)
543  if (c[0] == '\n') guessNumPoints++;
544 
545  float max_element = -1, junk_f = -1;
546  std::array<float, 4> firstLineVals; //(numPoints*8);
547  //std::vector<float> &parser_vals = res.required.particle_scattering_element_coordinates;
548  std::vector<float> parser_vals((guessNumPoints * 4), 0);
549 
550  size_t actualNumReads = strints_array_to_floats(pNumStart, pb - pNumStart, parser_vals.data(), parser_vals.size(), max_element);
551  if (actualNumReads == 0) throw (std::invalid_argument("Bad read"));
552  parser_vals.resize(actualNumReads);
553 
554  //parse_shapefile_entries(pNumStart, pb, parser_vals);
555  //const void* floatloc = memchr(pNumStart, '.', pb - pNumStart);
556  //res.required.particle_scattering_element_coordinates_are_integral = (floatloc) ? 0 : 1;
558 
559  // Also parse just the first line to get the number of columns
560  size_t numCols = strints_array_to_floats(pNumStart, firstLineEnd - pNumStart, firstLineVals.data(), firstLineVals.size(), junk_f);
561 
562  bool good = false;
563  if (numCols == 3) good = true; // Three columns, x, y and z
564  if (numCols == 4) good = true; // Four columns, x, y, z and material
565  if (!good) throw (std::invalid_argument("Bad read"));
566 
567  size_t actualNumPoints = actualNumReads / numCols;
568  assert(actualNumPoints == guessNumPoints);
569 
570 
571  res.required.number_of_particle_scattering_elements = actualNumPoints;
572  if (numCols == 3) {
575  }
576  else if (numCols == 4) {
577  // Count the number of distinct materials
578  uint8_t max_constituent = 1;
579  for (size_t i = 0; i < actualNumPoints; ++i) {
580  auto constit = parser_vals[(4 * i) + 3];
581  assert(static_cast<uint8_t>(constit) < UINT8_MAX);
582  if (static_cast<uint8_t>(constit) > max_constituent)
583  max_constituent = static_cast<uint8_t>(constit);
584  }
585  res.required.number_of_particle_constituents = max_constituent;
586  res.optional.particle_constituent_number.resize(max_constituent);
587  for (size_t i = 0; i < max_constituent; ++i)
588  res.optional.particle_constituent_number[i] = static_cast<uint8_t>(i + 1); // assert-checked before
589 
590  res.required.particle_scattering_element_coordinates.resize(actualNumPoints * 3);
591  res.optional.particle_scattering_element_composition_whole.resize(actualNumPoints);
592  for (size_t i = 0; i < actualNumPoints; ++i) {
593  size_t crdindex = (3 * i);
594  size_t parserindex = (4 * i);
595  res.required.particle_scattering_element_coordinates[crdindex + 0] = parser_vals[parserindex + 0];
596  res.required.particle_scattering_element_coordinates[crdindex + 1] = parser_vals[parserindex + 1];
597  res.required.particle_scattering_element_coordinates[crdindex + 2] = parser_vals[parserindex + 2];
598  res.optional.particle_scattering_element_composition_whole[i] = static_cast<uint8_t>(parser_vals[parserindex + 3]);
599  }
600  }
601 
602  res.required.particle_id = "";
604 
605  return res;
606  }
607 
608 
609  }
610  }
611 }
std::vector< uint8_t > Int8Data_t
Definition: shape.hpp:12
T m_atoi(const char *x, size_t len)
ShapeDataBasic readRawText(const char *iin)
size_t strints_array_to_floats(const char *in, const size_t inlen, float *out, const size_t outlen, float &max_element)
uint8_t particle_scattering_element_coordinates_are_integral
Definition: shape.hpp:21
STL namespace.
void readDDSCATtextContents(const char *iin, size_t numExpectedPoints, size_t headerEnd, ShapeDataBasic &p)
Read ddscat text contents.
ShapeDataBasic readDDSCAT(const char *in)
ShapeDataBasic readTextFile(const std::string &filename)
T m_atof(const char *x, size_t len)
ShapeCommonOptionalData optional
Definition: shape.hpp:44
void readHeader(const char *in, std::string &desc, size_t &np, size_t &headerEnd)
size_t array_to_floats(const char *in, const size_t inlen, float *out, const size_t outlen)