icedb  version 0.5.1
Snow particle scattering database API
splitSet.cpp
Go to the documentation of this file.
1 #include <algorithm>
2 #include <string>
3 #include <map>
4 #include <vector>
5 #include <set>
6 #include <boost/exception/all.hpp>
7 #include <boost/math/constants/constants.hpp>
8 #include <boost/tokenizer.hpp>
9 #include <boost/lexical_cast.hpp>
10 #include <boost/algorithm/string/trim.hpp>
11 #include <sstream>
12 #include <iostream>
13 
14 #include "../icedb/splitSet.hpp"
15 #include "../icedb/error.hpp"
16 
17 #pragma warning( disable : 4244 ) // lots of template math involves doubles, and I'm sick of static casting
18 
19 namespace icedb {
20  namespace splitSet {
21 
22  template <class T>
23  void stringifyRange(const T &Tstart, const T &Tend, const T &Tinterval,
24  const std::string &Tspecializer, std::string &out)
25  {
26  using namespace std;
27  ostringstream os;
28  os << Tstart << ":" << Tinterval << ":" << Tend << ":"
29  << Tspecializer;
30  out = os.str();
31  }
32 
33 #define DOTYPES(f) f(int); f(float); f(double); f(long); f(long long); \
34  f(unsigned int); f(unsigned long); f(unsigned long long);
35 
36  template <class T>
37  void splitSet(
38  const T &Tstart, const T &Tend, const T &Tinterval,
39  const std::string &Tspecializer,
40  std::set<T> &expanded)
41  {
42  using namespace std;
43  double start, end, interval;
44  start = boost::lexical_cast<double>(Tstart);
45  end = boost::lexical_cast<double>(Tend);
46  interval = boost::lexical_cast<double>(Tinterval);
47  std::string specializer(Tspecializer);
48  std::transform(specializer.begin(),specializer.end(),specializer.begin(),::tolower);
49  std::string srange;
50  stringifyRange(Tstart,Tend,Tinterval,Tspecializer,srange);
51 
52  if (interval < 0) {
54  .add<std::string>("range-name", srange)
55  .add<std::string>("specializer-type", specializer)
56  .add<double>("interval", interval)
57  .template add<T>("Tstart", Tstart)
58  .template add<T>("Tend", Tend)
59  .template add<T>("Tinterval", Tinterval);
60  }
61 
62  if (specializer == "")
63  {
64  if ((start > end && interval > 0) || (start < end && interval < 0))
65  {
67  .add<std::string>("range-name", srange)
68  .add<std::string>("specializer-type", specializer)
69  .add<double>("interval", interval)
70  .template add<T>("Tstart", Tstart)
71  .template add<T>("Tend", Tend)
72  .template add<T>("Tinterval", Tinterval);
73  }
74  for (double j=start;j<=end+(interval/100.0);j+=interval)
75  {
76  if (expanded.count((T) j) == 0)
77  expanded.insert((T) j);
78  }
79  } else if (specializer == "lin") {
80  // Linear spacing
81  double increment = (end - start) / (interval); // so interval of 1 gives midpoint
82  if (!increment) expanded.insert((T) start);
83  for (double j=start+(increment/2.0); j<end+(increment/100.0);j+=increment)
84  {
85  if (expanded.count((T) j) == 0)
86  expanded.insert((T) j);
87  }
88  } else if (specializer == "log") {
89  if (start == 0 || end == 0)
91  .add<std::string>("range-name", srange)
92  .add<std::string>("specializer-type", specializer)
93  .add<double>("interval", interval)
94  .template add<T>("Tstart", Tstart)
95  .template add<T>("Tend", Tend)
96  .template add<T>("Tinterval", Tinterval);
97  double is = log10( (double) start);
98  double ie = log10( (double) end);
99  double increment = (ie - is) / (interval);
100  if (!increment) expanded.insert((T) start);
101  for (double j=is+(increment/2.0); j<ie+(increment/100.0);j+=increment)
102  {
103  double k = pow((double) 10.0, (double) j);
104  if (expanded.count((T) k) == 0)
105  expanded.insert((T) k);
106  }
107  } else if (specializer == "inv") {
108  if (start == 0 || end == 0)
110  .add<std::string>("range-name", srange)
111  .add<std::string>("specializer-type", specializer)
112  .add<double>("interval", interval)
113  .template add<T>("Tstart", Tstart)
114  .template add<T>("Tend", Tend)
115  .template add<T>("Tinterval", Tinterval);
116  double is = 1.0 / start;
117  double ie = 1.0 / end;
118  double increment = (is - ie) / (interval);
119  if (!increment) expanded.insert((T) start);
120  for (double j=ie+(increment/2.0); j<is+(increment/100.0);j+=increment)
121  {
122  double k = (1.0) / j;
123  if (expanded.count((T) k) == 0)
124  expanded.insert((T) k);
125  }
126  } else if (specializer == "cos") {
127  // Linear in cos
128  // start, end are in degrees
129  const double pi = boost::math::constants::pi<double>();
130  int ai = (int) (interval) % 2;
131  double cs = cos(start * pi / 180.0);
132  double ce = cos(end * pi / 180.0);
133  double increment = (ai) ? (ce - cs) / (interval-1) : (ce - cs) / (interval);
134  if (increment == 0) expanded.insert(start);
135  if (increment != increment) // nan check - occurs when selecting only one value, and bounds are the same
136  {
137  expanded.insert(start);
138  return;
139  }
140  if (increment < 0)
141  {
142  increment *= -1.0;
143  std::swap(cs,ce);
144  }
145  // For even n, divide into intervals and use the midpoint of the interval.
146  // For odd n, use the endpoints. Note that the weights for orientations
147  // (not computed here) will be different for the two choices.
148  if (!ai) cs += increment/2.0;
149  for (double j=cs; j<ce+(increment/10.0);j+=increment)
150  {
151  // max and min to avoid j>1 and j<-1 error from rounding
152  double k = (acos((double) max(min(j,1.0),-1.0)) * 180.0 / pi);
153  if (expanded.count((T) k) == 0)
154  expanded.insert((T) k);
155  }
156  } else {
158  .add<std::string>("range-name", srange)
159  .add<std::string>("specializer-type", specializer)
160  .add<double>("interval", interval)
161  .template add<T>("Tstart", Tstart)
162  .template add<T>("Tend", Tend)
163  .template add<T>("Tinterval", Tinterval);
164  }
165  }
166 
167 #define SPEC_SPLITSET_A(T) \
168  template DL_ICEDB void splitSet<T>(const T&, const T&, \
169  const T&, const std::string&, std::set<T> &);
170 
171  //SPEC_SPLITSET_A(int);
172  //SPEC_SPLITSET_A(size_t);
173  //SPEC_SPLITSET_A(float);
174  //SPEC_SPLITSET_A(double);
175 
177 
178 
179  template <class T>
181  const std::string &instr,
182  T &start, T &end, T &interval, size_t &num, std::string &specializer)
183  {
184  using namespace std;
185  // Prepare tokenizer
186  typedef boost::tokenizer<boost::char_separator<char> >
187  tokenizer;
188  boost::char_separator<char> seprange(":/");
189  bool isRange = false;
190  if (instr.find('/') != string::npos) { isRange = true; specializer = "range"; }
191  tokenizer trange(instr,seprange);
192  vector<T> range;
193  size_t i = 0;
194  for (auto rt = trange.begin(); rt != trange.end(); rt++, i++)
195  {
196  try {
197  string s = *rt;
198  boost::algorithm::trim(s);
199  if (i < 3)
200  {
201  range.push_back(boost::lexical_cast<T>(s));
202  } else {
203  specializer = s;
204  }
205  }
206  catch (...)
207  {
209  .add<std::string>("range-name", instr)
210  .add<std::string>("specializer-type", specializer)
211  .add<double>("interval", interval)
212  .template add<T>("Tstart", start)
213  .template add<T>("Tend", end)
214  .template add<size_t>("num", num);
215  }
216  }
217  // Look at range. If one element, just add it. If two or
218  // three, calculate the inclusive interval
219  if (range.size() == 1)
220  {
221  start = range[0];
222  end = range[0];
223  interval = 0;
224  num = 1;
225  } else {
226  start = range[0];
227  end = range[range.size()-1];
228  interval = 0;
229  if (specializer.size())
230  {
231  if (range.size() > 2) num = (size_t) range[1];
232  } else {
233  if (range.size() > 2) interval = range[1];
234  // Linear spacing, starting at start.
235  num = (size_t) ( ( (end - start) / interval) + 1);
236  }
237  }
238  }
239 
240 #define SPEC_SPLITSET_INTERVAL(T) \
241  template DL_ICEDB void extractInterval<T>( \
242  const std::string&, T&, T&, T&, size_t&, std::string&);
243 
244  //SPEC_SPLITSET_INTERVAL(int);
245  //SPEC_SPLITSET_INTERVAL(size_t);
246  //SPEC_SPLITSET_INTERVAL(float);
247  //SPEC_SPLITSET_INTERVAL(double);
248 
250 
251 
252 
253  template <class T>
254  void splitSet(const std::string &instr, std::set<T> &expanded,
255  const std::map<std::string, std::string> *aliases)
256  {
257  using namespace std;
258  // Prepare tokenizer
259  typedef boost::tokenizer<boost::char_separator<char> >
260  tokenizer;
261  boost::char_separator<char> sep(",");
262  boost::char_separator<char> seprange(":");
263  {
264  tokenizer tcom(instr,sep);
265  for (auto ot = tcom.begin(); ot != tcom.end(); ot++)
266  {
267  // At this junction, do any alias substitution
268  std::string ssubst;
269 
270  std::map<std::string, std::string> defaliases;
271  if (!aliases) aliases = &defaliases; // Provides a convenient default
272 
273  if (aliases->count(*ot))
274  {
275  ssubst = aliases->at(*ot);
276  // Recursively call splitSet to handle bundles of aliases
277  splitSet<T>(ssubst, expanded, aliases);
278  } else {
279  // Separated based on commas. Expand for dashes and colons
280  tokenizer trange(*ot,seprange);
281  vector<T> range;
282  string specializer;
283  size_t i = 0;
284  for (auto rt = trange.begin(); rt != trange.end(); rt++, i++)
285  {
286  try {
287  string s = *rt;
288  boost::algorithm::trim(s);
289  if (i < 3)
290  {
291  range.push_back(boost::lexical_cast<T>(s));
292  } else {
293  specializer = s;
294  }
295  }
296  catch (...)
297  {
299  .add<std::string>("range-name", instr);
300  }
301  }
302  // Look at range. If one element, just add it. If two or
303  // three, calculate the inclusive interval
304  if (range.size() == 1)
305  {
306  if (expanded.count(range[0]) == 0)
307  expanded.insert(range[0]);
308  } else {
309  double start, end = 0, interval = 1;
310  start = (double) range[0];
311  end = (double) range[range.size()-1];
312  if (range.size() > 2) interval = (double) range[1];
313 
314  // I'm moving the logic to the other template definition, as it
315  // let's me split stuff without casts back to strings.
316  splitSet<T>(start, end, interval, specializer, expanded);
317  }
318  }
319  }
320  }
321  }
322 
323  template <> void splitSet<std::string>(const std::string &instr, std::set<std::string> &expanded,
324  const std::map<std::string, std::string> *aliases)
325  {
326  using namespace std;
327  // Prepare tokenizer
328  typedef boost::tokenizer<boost::char_separator<char> >
329  tokenizer;
330  boost::char_separator<char> sep(",;");
331 
332  std::string ssubst;
333 
334  std::map<std::string, std::string> defaliases;
335  if (!aliases) aliases = &defaliases; // Provides a convenient default
336 
337  tokenizer tcom(instr,sep);
338  for (auto ot = tcom.begin(); ot != tcom.end(); ot++)
339  {
340  if (aliases->count(*ot))
341  {
342  ssubst = aliases->at(*ot);
343  // Recursively call splitSet to handle bundles of aliases
344  splitSet<std::string>(ssubst, expanded, aliases);
345  } else {
346  if (expanded.count(*ot) == 0)
347  expanded.insert(*ot);
348  }
349  }
350  }
351 
352 #define SPEC_SPLITSET(T) \
353  template DL_ICEDB void splitSet<T>(const std::string &instr, std::set<T> &expanded, \
354  const std::map<std::string, std::string> *aliases);
355 
357  //SPEC_SPLITSET(int);
358  //SPEC_SPLITSET(size_t);
359  //SPEC_SPLITSET(float);
360  //SPEC_SPLITSET(double);
361 
362 
363 
364 
366  const std::string &instr, std::vector<std::string> &out, char delim)
367  {
368  using namespace std;
369  //out.clear();
370  if (!instr.size()) return;
371 
372  // Fast string splitting based on null values.
373  const char* start = instr.data();
374  const char* stop = instr.data() + instr.size();
375  while (start < stop)
376  {
377  // Find the next null character
378  const char* sep = start;
379  sep = std::find(start, stop, delim);
380  if (*start == delim)
381  {
382  start = sep+1;
383  continue;
384  }
385  out.push_back(std::string(start, sep));
386  start = sep+1;
387  }
388  }
389 
391  const std::string &instr, std::map<std::string, std::string> &out)
392  {
393  using namespace std;
394  if (!instr.size()) return;
395 
396  // Fast string splitting based on null values.
397  const char* start = instr.data();
398  const char* stop = instr.data() + instr.size();
399  while (start < stop)
400  {
401  // Find the next null character
402  const char* sep = start;
403  sep = std::find(start, stop, '\0');
404  if (*start == '\0')
405  {
406  start = sep+1;
407  continue;
408  }
409  // Split based on location of equal sign
410  //out.push_back(std::string(start, sep - 1));
411  const char* sepc = std::find(start, sep, '=');
412  // If the = cannot be found, then it is a key with an empty value.
413  std::string key(start, sepc);
414  if (!key.size())
415  {
416  start = sep+1;
417  continue;
418  }
419  std::string val;
420  if (sepc < sep)
421  val = std::string(sepc + 1, sep);
422  out.insert(std::make_pair(key, val));
423  start = sep+1;
424  }
425  }
426 
427 
428  template <class T>
429  intervals<T>::intervals(const std::string &s) { if (s.size()) append(s); }
430 
431  template <class T>
432  intervals<T>::intervals(const std::vector<std::string> &s) { append(s); }
433 
434  template <class T>
436 
437  template <class T>
438  void intervals<T>::append(const std::string &instr,
439  const std::map<std::string, std::string> *aliases)
440  {
441  std::vector<std::string> splits;
442  splitVector(instr, splits, ',');
443  std::set<T> vals;
444  for (const auto &s : splits)
445  {
446  std::map<std::string, std::string> defaliases;
447  if (!aliases) aliases = &defaliases; // Provides a convenient default
448 
449  if (aliases->count(s))
450  {
451  std::string ssubst = aliases->at(s);
452  // Recursively call splitSet to handle bundles of aliases
453  append(ssubst, aliases);
454  }
455  else {
456  T start, end, interval;
457  size_t n;
458  std::string specializer;
459  bool isRange = false;
460  extractInterval(s, start, end, interval, n, specializer);
461  if (specializer == "range")
462  {
463  ranges.push_back(std::pair<T, T>(start, end));
464  } else if (start == end) {
465  ranges.push_back(std::pair<T, T>(start, end));
466  } else {
467  splitSet(start, end, interval, specializer, vals);
468  }
469  }
470  }
471  for (const auto &v : vals)
472  {
473  ranges.push_back(std::pair<T, T>(v, v));
474  }
475  }
476 
477  template <class T>
478  void intervals<T>::append(const std::vector<std::string> &s,
479  const std::map<std::string, std::string> *aliases)
480  {
481  for (const auto &str : s) append(str, aliases);
482  }
483 
484  template <class T>
486  {
487  ranges.insert(ranges.end(), src.ranges.begin(), src.ranges.end());
488  }
489  template <class T>
490  bool intervals<T>::inRange(const T& val) const
491  {
492  for (const auto &r : ranges)
493  {
494  if (val >= r.first && val < r.second) return true;
495  if (r.first == r.second) {
496  if (val == r.first) return true;
497  }
498  }
499  return false;
500  }
501  template <class T>
502  bool intervals<T>::isNear(const T& val, const T& linSep, const T& factorSep) const
503  {
504  for (const auto &r : ranges)
505  {
506  T lower = (r.first * (static_cast<T>(1) - factorSep)) - linSep,
507  upper = (r.second * (static_cast<T>(1) + factorSep)) + linSep;
508  if (val >= lower && val < upper) return true;
509  }
510  return false;
511  }
512 
513 #define IMPL_INTS(T) template class DL_ICEDB intervals < T >;
515  }
516 }
517 
518 
bool isNear(const T &val, const T &linSep, const T &factorSep) const
Definition: splitSet.cpp:502
void extractInterval(const std::string &instr, T &start, T &end, T &interval, size_t &num, std::string &specializer)
Extracts ICEDB_LOG_INFOrmation from interval notation.
Definition: splitSet.cpp:180
DOTYPES(SPEC_SPLITSET_A)
#define IMPL_INTS(T)
Definition: splitSet.cpp:513
#define SPEC_SPLITSET_INTERVAL(T)
Definition: splitSet.cpp:240
STL namespace.
Class to define and search on intervals.
Definition: splitSet.hpp:67
void splitSet(const std::string &instr, std::set< T > &expanded, const std::map< std::string, std::string > *aliases)
Definition: splitSet.cpp:254
intervals(const std::string &s="")
Definition: splitSet.cpp:429
void splitNullMap(const std::string &instr, std::map< std::string, std::string > &out)
Convenience function to split a null-separated string list into a map of strings. ...
Definition: splitSet.cpp:390
#define ICEDB_throw(x)
Definition: error.hpp:88
void append(const std::string &instr, const std::map< std::string, std::string > *aliases=nullptr)
Definition: splitSet.cpp:438
void splitVector(const std::string &instr, std::vector< std::string > &out, char delim)
Convenience function to split a null-separated string list into a vector of strings.
Definition: splitSet.cpp:365
void splitSet(const T &Tstart, const T &Tend, const T &Tinterval, const std::string &Tspecializer, std::set< T > &expanded)
Shortcut that already passes parsed ICEDB_LOG_INFOrmation.
Definition: splitSet.cpp:37
void stringifyRange(const T &Tstart, const T &Tend, const T &Tinterval, const std::string &Tspecializer, std::string &out)
Definition: splitSet.cpp:23
#define SPEC_SPLITSET(T)
Definition: splitSet.cpp:352
#define SPEC_SPLITSET_A(T)
Definition: splitSet.cpp:167
bool inRange(const T &val) const
Definition: splitSet.cpp:490
std::vector< std::pair< T, T > > ranges
Definition: splitSet.hpp:70