icedb  version 0.5.1
Snow particle scattering database API
shapeIOtextParsers.cpp
Go to the documentation of this file.
1 // This is REALLY rough. Mostly to serve as an example / get the test shape ingest program running.
2 #pragma warning( disable : 4996 ) // -D_SCL_SECURE_NO_WARNINGS
3 #pragma warning( disable : 4244 ) // 'argument': conversion from 'std::streamsize' to 'int', possible loss of data - boost::copy
4 #define _HAS_AUTO_PTR_ETC 1
5 #include <iostream>
6 #include <sstream>
7 #include <string>
8 #include <vector>
9 #include <algorithm>
10 #include <fstream>
11 #include <boost/spirit/include/karma.hpp>
12 #include <boost/spirit/include/phoenix_core.hpp>
13 #include <boost/spirit/include/phoenix_operator.hpp>
14 #include <boost/spirit/include/phoenix_stl.hpp>
15 #include <boost/iostreams/copy.hpp>
16 #include <boost/iostreams/filtering_stream.hpp>
17 #include <boost/spirit/include/qi.hpp>
18 #include "shape.hpp"
19 #ifdef min
20 #undef min
21 #endif
22 #ifdef max
23 #undef max
24 #endif
25 
27 namespace {
28  namespace qi = boost::spirit::qi;
29  namespace ascii = boost::spirit::ascii;
30  namespace phoenix = boost::phoenix;
31 
34  template <typename Iterator>
35  bool parse_shapefile_entries(Iterator first, Iterator last, std::vector<float>& v)
36  {
37  using qi::double_;
38  using qi::long_;
39  using qi::float_;
40  using qi::phrase_parse;
41  using qi::_1;
42  using ascii::space;
43  using phoenix::push_back;
44 
45  bool r = phrase_parse(first, last,
46 
47  // Begin grammar
48  (
49  // *long_[push_back(phoenix::ref(v), _1)]
50  *float_
51  )
52  ,
53  // End grammar
54 
55  space, v);
56 
57  if (first != last) // fail if we did not get a full match
58  return false;
59  return r;
60  }
61 
63  template <typename OutputIterator, typename Container>
64  bool print_shapefile_entries(OutputIterator& sink, Container const& v)
65  {
66  using boost::spirit::karma::long_;
67  using boost::spirit::karma::float_;
68  using boost::spirit::karma::repeat;
69  using boost::spirit::karma::generate;
70  //using boost::spirit::karma::generate_delimited;
71  using boost::spirit::ascii::space;
72 
73  bool r = generate(
74  sink, // destination: output iterator
75  *(
76  //repeat(7)()
77  '\t' << long_ << '\t' << // point id
78  float_ << '\t' << float_ << '\t' << float_ << '\t' << // point coordinates
79  long_ << '\t' << long_ << '\t' << long_ << '\n' // dielectric
80  ),
81  //space, // the delimiter-generator
82  v // the data to output
83  );
84  return r;
85  }
86 
87  template <typename OutputIterator, typename Container>
88  bool print_shapefile_pts(OutputIterator& sink, Container const& v)
89  {
90  using boost::spirit::karma::long_;
91  using boost::spirit::karma::float_;
92  using boost::spirit::karma::repeat;
93  using boost::spirit::karma::generate;
94  //using boost::spirit::karma::generate_delimited;
95  using boost::spirit::ascii::space;
96 
97  bool r = generate(
98  sink, // destination: output iterator
99  *(
100  //repeat(7)()
101  float_ << '\t' << float_ << '\t' << float_ << '\n'
102  ),
103  //space, // the delimiter-generator
104  v // the data to output
105  );
106  return r;
107  }
108 }
109 
110 namespace icedb {
111  namespace Examples {
112  namespace Shapes {
113  void readHeader(const char* in, std::string &desc, size_t &np,
114  size_t &headerEnd)
115  {
116  using namespace std;
117 
118  // Do header processing using istreams.
119  // The previous method used strings, but this didn't work with compressed reads.
120  //size_t &pend = headerEnd;
121  const char* pend = in;
122  const char* pstart = in;
123 
124  // The header is seven lines long
125  for (size_t i = 0; i < 7; i++)
126  {
127  pstart = pend;
128  pend = strchr(pend, '\n');
129  pend++; // Get rid of the newline
130  //pend = in.find_first_of("\n", pend+1);
131  string lin(pstart, pend - pstart - 1);
132  if (*(lin.rbegin()) == '\r') lin.pop_back();
133  //std::getline(in,lin);
134 
135  size_t posa = 0, posb = 0;
136  //Eigen::Array3f *v = nullptr;
137  switch (i)
138  {
139  case 0: // Title line
140  desc = lin;
141  break;
142  case 1: // Number of dipoles
143  {
144  // Seek to first nonspace character
145  posa = lin.find_first_not_of(" \t\n", posb);
146  // Find first space after this position
147  posb = lin.find_first_of(" \t\n", posa);
148  size_t len = posb - posa;
149  string s = lin.substr(posa, len);
150  np = boost::lexical_cast<size_t>(s);
151  //np = macros::m_atoi<size_t>(&(lin.data()[posa]), len);
152  }
153  break;
154  case 6: // Junk line
155  default:
156  break;
157  case 2: // a1
158  case 3: // a2
159  case 4: // d
160  case 5: // x0
161  // These all have the same structure. Read in three doubles, then assign.
162  /*
163  {
164  Eigen::Array3f v;
165  for (size_t j = 0; j < 3; j++)
166  {
167  // Seek to first nonspace character
168  posa = lin.find_first_not_of(" \t\n,", posb);
169  // Find first space after this position
170  posb = lin.find_first_of(" \t\n,", posa);
171  size_t len = posb - posa;
172  string s = lin.substr(posa, len);
173  v(j) = boost::lexical_cast<float>(s);
174  //v(j) = macros::m_atof<float>(&(lin.data()[posa]), len);
175  }
176  hdr->block<3, 1>(0, i - 2) = v;
177 
178  }
179  */
180  break;
181  }
182  }
183 
184  headerEnd = (pend - in) / sizeof(char);
185  }
187  void readDDSCATtextContents(const char *iin, size_t numExpectedPoints, size_t headerEnd, ShapeDataBasic& p)
188  {
189  using namespace std;
190 
191  //Eigen::Vector3f crdsm, crdsi; // point location and diel entries
192  const char* pa = &iin[headerEnd];
193  const char* pb = strchr(pa + 1, '\0');
194 
195  std::vector<float> parser_vals; //(numPoints*8);
196  parser_vals.reserve(7 * numExpectedPoints);
197  parse_shapefile_entries(pa, pb, parser_vals);
198  assert(parser_vals.size() % 7 == 0);
199  size_t &numPoints = p.required.number_of_particle_scattering_elements;
200  numPoints = parser_vals.size() / 7;
201  assert(numPoints == numExpectedPoints);
202  p.optional.particle_scattering_element_number.resize(numPoints);
203  p.required.particle_scattering_element_coordinates.resize(numPoints * 3);
204  std::set<uint8_t> constituents;
205 
206  for (size_t i = 0; i < numPoints; ++i)
207  {
208  size_t pIndex = 7 * i;
209  p.optional.particle_scattering_element_number[i] = static_cast<uint64_t>(parser_vals[pIndex]);
210  p.required.particle_scattering_element_coordinates[3*i] = parser_vals[pIndex+1];
211  p.required.particle_scattering_element_coordinates[(3 * i)+1] = parser_vals[pIndex + 2];
212  p.required.particle_scattering_element_coordinates[(3 * i)+2] = parser_vals[pIndex + 3];
213 
214  constituents.emplace(static_cast<uint8_t>(parser_vals[pIndex + 4]));
215  //p.required.particle_scattering_element_composition[i] = parser_vals[pIndex + 4]; //! Redo pass later and map
216 
217  }
218  p.optional.particle_constituent_number = Int8Data_t(constituents.begin(), constituents.end());
220  if (constituents.size() >= UINT8_MAX) throw (std::invalid_argument("Shape has too many constituents."));
221  p.required.number_of_particle_constituents = static_cast<uint8_t>(constituents.size());
222 
223  for (size_t i = 0; i < numPoints; ++i)
224  {
225  size_t pIndex = 7 * i;
226  uint64_t substance_id = static_cast<uint64_t>(parser_vals[pIndex + 4]);
227  size_t offset = 0;
228  for (auto it = constituents.cbegin(); it != constituents.cend(); ++it, ++offset) {
229  if ((*it) == substance_id) break;
230  }
231  size_t idx = (i*constituents.size()) + offset;
233  }
235  }
236 
237 
238  ShapeDataBasic readDDSCAT(const char* in)
239  {
240  using namespace std;
241  ShapeDataBasic res;
242 
243  std::string str(in);
244  std::string desc; // Currently unused
245  size_t headerEnd = 0, numPoints = 0;
246  readHeader(str.c_str(), desc, numPoints, headerEnd);
247  //res.required.particle_id = desc;
248  //data->resize((int)numPoints, ::scatdb::shape::backends::NUM_SHAPECOLS);
249  readDDSCATtextContents(str.c_str(), numPoints, headerEnd, res);
250  return res;
251  }
252 
253 
254 
255  void writeDDSCAT(const std::string &filename, const ShapeDataBasic& p)
256  {
257  using namespace std;
258  std::ofstream out(filename.c_str());
259 
260  out << p.required.particle_id << endl;
261  out << p.required.number_of_particle_scattering_elements << "\t= Number of lattice points" << endl;
262 
263  out << "1.0\t1.0\t1.0\t= target vector a1 (in TF)" << endl;
264  out << "0.0\t1.0\t0.0\t= target vector a2 (in TF)" << endl;
265  out << "1.0\t1.0\t1.0\t= d_x/d d_y/d d_x/d (normally 1 1 1)" << endl;
266 
267  //out << (*hdr)(X0, 0) << "\t" << (*hdr)(X0, 1) << "\t" << (*hdr)(X0, 2);
268  out << "0.0\t0.0\t0.0\t= X0(1-3) = location in lattice of target origin" << endl;
269  out << "\tNo.\tix\tiy\tiz\tICOMP(x, y, z)" << endl;
270  //size_t i = 1;
271 
272  const size_t numPoints = p.required.number_of_particle_scattering_elements;
273  std::vector<float> oi(numPoints * 7);
274 
275  for (size_t j = 0; j < numPoints; j++)
276  {
277  const float &x = p.required.particle_scattering_element_coordinates[j * 3+0];
278  const float &y = p.required.particle_scattering_element_coordinates[j * 3+1];
279  const float &z = p.required.particle_scattering_element_coordinates[j * 3+2];
281  oi[j * 7 + 0] = j+1;
282  }
283  else {
284  oi[j * 7 + 0] = p.optional.particle_scattering_element_number[j];
285  }
286  oi[j * 7 + 1] = x;
287  oi[j * 7 + 2] = y;
288  oi[j * 7 + 3] = z;
289 
290  uint64_t comp = 1;
294  }
296  throw(std::invalid_argument("Cannot write a DDSCAT shape file with this type of dielectric!"));
297  }
298  }
299  oi[j * 7 + 4] = comp;
300  oi[j * 7 + 5] = comp;
301  oi[j * 7 + 6] = comp;
302  }
303 
304  std::string generated;
305  std::back_insert_iterator<std::string> sink(generated);
306  if (!print_shapefile_entries(sink, oi))
307  {
308  throw(std::invalid_argument("Somehow unable to print the shape points properly."));
309  }
310  out << generated;
311  }
312 
313 
314 
317  ShapeDataBasic readRawText(const char *iin)
318  {
319  using namespace std;
320  ShapeDataBasic res;
321  //Eigen::Vector3f crdsm, crdsi; // point location and diel entries
322  const char* pa = iin; // Start of the file
323  const char* pb = strchr(pa + 1, '\0'); // End of the file
324  const char* pNumStart = pa;
325  // Search for the first line that is not a comment.
326  // Nmat lines for ADDA are also ignored.
327  while ((pNumStart[0] == '#' || pNumStart[0] == 'N' || pNumStart[0] == 'n') && pNumStart < pb) {
328  const char* lineEnd = strchr(pNumStart + 1, 'n');
329  pNumStart = lineEnd + 1;
330  }
331  if (pNumStart >= pb) throw(std::invalid_argument("Cannot find any points in a shapefile."));
332 
333  const char* firstLineEnd = strchr(pNumStart + 1, '\n'); // End of the first line containing numberic data.
334  // Attempt to guess the number of points based on the number of lines in the file.
335  int guessNumPoints = std::count(pNumStart, pb, '\n');
336  std::vector<float> firstLineVals; //(numPoints*8);
337  //std::vector<float> &parser_vals = res.required.particle_scattering_element_coordinates;
338  std::vector<float> parser_vals;
339  parser_vals.reserve(guessNumPoints * 4);
340  parse_shapefile_entries(pNumStart, pb, parser_vals);
341  const void* floatloc = memchr(pNumStart, '.', pb - pNumStart);
343 
344  parse_shapefile_entries(pNumStart, firstLineEnd, firstLineVals);
345 
346  size_t numCols = firstLineVals.size();
347  bool good = false;
348  if (numCols == 3) good = true; // Three columns, x, y and z
349  if (numCols == 4) good = true; // Four columns, x, y, z and material
350  if (!good) throw (std::invalid_argument("Bad read"));
351  if (parser_vals.size() == 0) throw (std::invalid_argument("Bad read"));
352 
353  size_t actualNumPoints = parser_vals.size() / numCols;
354  assert(actualNumPoints == guessNumPoints);
355 
356  res.required.number_of_particle_scattering_elements = actualNumPoints;
357  if (numCols == 3) {
360  }
361  else if (numCols == 4) {
362  // Count the number of distinct materials
363  uint8_t max_constituent = 1;
364  for (size_t i = 0; i < actualNumPoints; ++i) {
365  auto constit = parser_vals[(4 * i) + 3];
366  assert(static_cast<uint8_t>(constit) < UINT8_MAX);
367  if (static_cast<uint8_t>(constit) > max_constituent)
368  max_constituent = static_cast<uint8_t>(constit);
369  }
370  res.required.number_of_particle_constituents = max_constituent;
371  res.optional.particle_constituent_number.resize(max_constituent);
372  for (size_t i = 0; i < max_constituent; ++i)
373  res.optional.particle_constituent_number[i] = static_cast<uint8_t>(i + 1); // assert-checked before
374 
375  res.required.particle_scattering_element_coordinates.resize(actualNumPoints * 3);
376  res.optional.particle_scattering_element_composition_whole.resize(actualNumPoints);
377  for (size_t i = 0; i < actualNumPoints; ++i) {
378  size_t crdindex = (3 * i);
379  size_t parserindex = (4 * i);
380  res.required.particle_scattering_element_coordinates[crdindex + 0] = parser_vals[parserindex + 0];
381  res.required.particle_scattering_element_coordinates[crdindex + 1] = parser_vals[parserindex + 1];
382  res.required.particle_scattering_element_coordinates[crdindex + 2] = parser_vals[parserindex + 2];
383  res.optional.particle_scattering_element_composition_whole[i] = static_cast<uint8_t>(parser_vals[parserindex + 3]);
384  }
385  }
386 
387  res.required.particle_id = "";
388 
389  return res;
390  }
391 
392  void writeTextRaw(const std::string &filename, const ShapeDataBasic& p)
393  {
394  using namespace std;
395  std::ofstream out(filename.c_str());
396  std::string generated;
397  std::back_insert_iterator<std::string> sink(generated);
399  {
400  throw(std::invalid_argument("Somehow unable to print the shape points properly."));
401  }
402  out << generated;
403  }
404 
406  const std::string &filename) {
407  // Open the file and copy to a string. Check the first few lines to see if any
408  // alphanumeric characters are present. If there are, treat it as a DDSCAT file.
409  // Otherwise, treat as a raw text file.
410  std::ifstream in(filename.c_str());
411  std::ostringstream so;
412  boost::iostreams::copy(in, so);
413  std::string s = so.str();
414 
415  auto end = s.find_first_of("\n\0");
416  Expects(end != std::string::npos);
417  std::string ssub = s.substr(0,end);
418  auto spos = ssub.find_first_not_of("0123456789. \t\n");
419 
420  if (std::string::npos == spos) // This is a raw text file
421  return readRawText(s.c_str());
422  else return readDDSCAT(s.c_str()); // This is a DDSCAT file
423  //if ((std::string::npos != spos) && (spos < end)) {
424  // return readDDSCAT(s.c_str());
425  //}
426  //else {
427  // return readTextRaw(s.c_str());
428  //}
429  }
430 
431  }
432  }
433 }
std::vector< uint8_t > Int8Data_t
Definition: shape.hpp:12
void writeDDSCAT(const std::string &filename, const ShapeDataBasic &p)
ShapeDataBasic readRawText(const char *iin)
uint8_t particle_scattering_element_coordinates_are_integral
Definition: shape.hpp:21
bool print_shapefile_entries(OutputIterator &sink, Container const &v)
Used in quickly printing shapefile.
STL namespace.
void readDDSCATtextContents(const char *iin, size_t numExpectedPoints, size_t headerEnd, ShapeDataBasic &p)
Read ddscat text contents.
ShapeDataBasic readDDSCAT(const char *in)
ShapeDataBasic readTextFile(const std::string &filename)
ShapeCommonOptionalData optional
Definition: shape.hpp:44
bool parse_shapefile_entries(Iterator first, Iterator last, std::vector< float > &v)
Parses space-separated shapefile entries.
bool print_shapefile_pts(OutputIterator &sink, Container const &v)
void writeTextRaw(const std::string &filename, const ShapeDataBasic &p)
void readHeader(const char *in, std::string &desc, size_t &np, size_t &headerEnd)