You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

477 lines
12 KiB

  1. #include <Python.h>
  2. #include <structmember.h>
  3. #include <endian.h>
  4. /* Somewhat arbitrary, just so we can use fixed sizes for strings
  5. etc. */
  6. static const int MAX_LAYOUT_COUNT = 64;
  7. typedef union {
  8. int8_t i8[8];
  9. uint8_t u8[8];
  10. int16_t i16[4];
  11. uint16_t u16[4];
  12. int32_t i32[2];
  13. uint32_t u32[2];
  14. int64_t i64[1];
  15. uint64_t u64[1];
  16. float f[2];
  17. double d[1];
  18. } union_t;
  19. typedef enum {
  20. LAYOUT_TYPE_NONE,
  21. LAYOUT_TYPE_INT8,
  22. LAYOUT_TYPE_UINT8,
  23. LAYOUT_TYPE_INT16,
  24. LAYOUT_TYPE_UINT16,
  25. LAYOUT_TYPE_INT32,
  26. LAYOUT_TYPE_UINT32,
  27. LAYOUT_TYPE_INT64,
  28. LAYOUT_TYPE_UINT64,
  29. LAYOUT_TYPE_FLOAT32,
  30. LAYOUT_TYPE_FLOAT64,
  31. } layout_type_t;
  32. struct {
  33. char *string;
  34. layout_type_t layout;
  35. int size;
  36. } type_lookup[] = {
  37. { "int8", LAYOUT_TYPE_INT8, 1 },
  38. { "uint8", LAYOUT_TYPE_UINT8, 1 },
  39. { "int16", LAYOUT_TYPE_INT16, 2 },
  40. { "uint16", LAYOUT_TYPE_UINT16, 2 },
  41. { "int32", LAYOUT_TYPE_INT32, 4 },
  42. { "uint32", LAYOUT_TYPE_UINT32, 4 },
  43. { "int64", LAYOUT_TYPE_INT64, 8 },
  44. { "uint64", LAYOUT_TYPE_UINT64, 8 },
  45. { "float32", LAYOUT_TYPE_FLOAT32, 4 },
  46. { "float64", LAYOUT_TYPE_FLOAT64, 8 },
  47. { NULL }
  48. };
  49. typedef struct {
  50. PyObject_HEAD
  51. layout_type_t layout_type;
  52. int layout_count;
  53. int binary_size;
  54. } Rocket;
  55. static void Rocket_dealloc(Rocket *self)
  56. {
  57. self->ob_type->tp_free((PyObject *)self);
  58. }
  59. static PyObject *Rocket_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  60. {
  61. Rocket *self;
  62. self = (Rocket *)type->tp_alloc(type, 0);
  63. if (!self)
  64. return NULL;
  65. self->layout_type = LAYOUT_TYPE_NONE;
  66. self->layout_count = 0;
  67. self->binary_size = 0;
  68. return (PyObject *)self;
  69. }
  70. static int Rocket_init(Rocket *self, PyObject *args, PyObject *kwds)
  71. {
  72. const char *layout;
  73. static char *kwlist[] = { "layout", NULL };
  74. if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", kwlist, &layout))
  75. return -1;
  76. if (!layout)
  77. return -1;
  78. const char *under;
  79. char *tmp;
  80. under = strchr(layout, '_');
  81. if (!under) {
  82. PyErr_SetString(PyExc_ValueError, "no such layout: "
  83. "badly formatted string");
  84. return -1;
  85. }
  86. self->layout_count = strtoul(under+1, &tmp, 10);
  87. if (self->layout_count < 1 || *tmp != '\0') {
  88. PyErr_SetString(PyExc_ValueError, "no such layout: "
  89. "bad count");
  90. return -1;
  91. }
  92. if (self->layout_count >= MAX_LAYOUT_COUNT) {
  93. PyErr_SetString(PyExc_ValueError, "no such layout: "
  94. "count too high");
  95. return -1;
  96. }
  97. int i;
  98. for (i = 0; type_lookup[i].string; i++)
  99. if (strncmp(layout, type_lookup[i].string, under-layout) == 0)
  100. break;
  101. if (!type_lookup[i].string) {
  102. PyErr_SetString(PyExc_ValueError, "no such layout: "
  103. "bad data type");
  104. return -1;
  105. }
  106. self->layout_type = type_lookup[i].layout;
  107. self->binary_size = 8 + (type_lookup[i].size * self->layout_count);
  108. return 0;
  109. }
  110. FILE *PyFile_AsFile(PyObject *file)
  111. {
  112. PyObject *result;
  113. int fd;
  114. result = PyObject_CallMethod(file, "fileno", NULL);
  115. if (result == NULL)
  116. return NULL;
  117. fd = PyInt_AsLong(result);
  118. if (fd < 0)
  119. return NULL;
  120. return fdopen(fd, "a+b");
  121. }
  122. static inline void write_pyobject(FILE *out, PyObject *val, layout_type_t type)
  123. {
  124. union_t u;
  125. int ret = 0;
  126. switch (type) {
  127. #define CASE(type, pyconvert, pytype, disktype, htole, bytes) \
  128. case LAYOUT_TYPE_##type: \
  129. u.pytype[0] = pyconvert(val); \
  130. if (PyErr_Occurred()) \
  131. return; \
  132. u.disktype[0] = htole(u.disktype[0]); \
  133. ret = fwrite(&u.disktype[0], bytes, 1, out); \
  134. break
  135. CASE(INT8, PyInt_AsLong, i8, u8, , 1);
  136. CASE(UINT8, PyInt_AsLong, u8, u8, , 1);
  137. CASE(INT16, PyInt_AsLong, i16, u16, htole16, 2);
  138. CASE(UINT16, PyInt_AsLong, u16, u16, htole16, 2);
  139. CASE(INT32, PyInt_AsLong, i32, u32, htole32, 4);
  140. CASE(UINT32, PyInt_AsLong, u32, u32, htole32, 4);
  141. CASE(INT64, PyInt_AsLong, i64, u64, htole64, 8);
  142. CASE(UINT64, PyInt_AsLong, u64, u64, htole64, 8);
  143. CASE(FLOAT32, PyFloat_AsDouble, f, u32, htole32, 4);
  144. CASE(FLOAT64, PyFloat_AsDouble, d, u64, htole64, 8);
  145. #undef CASE
  146. default:
  147. PyErr_SetString(PyExc_TypeError, "unknown type");
  148. return;
  149. }
  150. if (ret <= 0)
  151. PyErr_SetFromErrno(PyExc_OSError);
  152. }
  153. static inline void *read_pyobject(FILE *in, layout_type_t type)
  154. {
  155. union_t u;
  156. switch (type) {
  157. #define CASE(type, pyconvert, pytype, disktype, letoh, bytes) \
  158. case LAYOUT_TYPE_##type: \
  159. if (fread(&u.disktype[0], bytes, 1, in) <= 0) \
  160. break; \
  161. u.disktype[0] = letoh(u.disktype[0]); \
  162. return pyconvert(u.pytype[0]); \
  163. break
  164. CASE(INT8, PyInt_FromLong, i8, u8, , 1);
  165. CASE(UINT8, PyInt_FromLong, u8, u8, , 1);
  166. CASE(INT16, PyInt_FromLong, i16, u16, le16toh, 2);
  167. CASE(UINT16, PyInt_FromLong, u16, u16, le16toh, 2);
  168. CASE(INT32, PyInt_FromLong, i32, u32, le32toh, 4);
  169. CASE(UINT32, PyInt_FromLong, u32, u32, le32toh, 4);
  170. CASE(INT64, PyInt_FromLong, i64, u64, le64toh, 8);
  171. CASE(UINT64, PyInt_FromLong, u64, u64, le64toh, 8);
  172. CASE(FLOAT32, PyFloat_FromDouble, f, u32, le32toh, 4);
  173. CASE(FLOAT64, PyFloat_FromDouble, d, u64, le64toh, 8);
  174. #undef CASE
  175. default:
  176. PyErr_SetString(PyExc_TypeError, "unknown type");
  177. return NULL;
  178. }
  179. PyErr_SetFromErrno(PyExc_OSError);
  180. return NULL;
  181. }
  182. static PyObject *Rocket_append_list(Rocket *self, PyObject *args)
  183. {
  184. PyObject *file, *list;
  185. FILE *out;
  186. if (!PyArg_ParseTuple(args, "OO:append_list", &file, &list))
  187. return NULL;
  188. if ((out = PyFile_AsFile(file)) == NULL)
  189. return NULL;
  190. if (!PyList_Check(list)) {
  191. PyErr_SetString(PyExc_TypeError, "need a list");
  192. return NULL;
  193. }
  194. Py_ssize_t count = PyList_Size(list);
  195. Py_ssize_t row;
  196. for (row = 0; row < count; row++) {
  197. PyObject *rowlist = PyList_GetItem(list, row);
  198. if (!PyList_Check(list)) {
  199. PyErr_SetString(PyExc_TypeError, "rows must be lists");
  200. fflush(out);
  201. return NULL;
  202. }
  203. if (PyList_Size(rowlist) != self->layout_count + 1) {
  204. PyErr_SetString(PyExc_TypeError, "short row list");
  205. fflush(out);
  206. return NULL;
  207. }
  208. /* Extract and write timestamp */
  209. write_pyobject(out, PyList_GetItem(rowlist, 0),
  210. LAYOUT_TYPE_FLOAT64);
  211. if (PyErr_Occurred()) {
  212. fflush(out);
  213. return NULL;
  214. }
  215. /* Extract and write values */
  216. int i;
  217. for (i = 0; i < self->layout_count; i++) {
  218. write_pyobject(out, PyList_GetItem(rowlist, i+1),
  219. self->layout_type);
  220. if (PyErr_Occurred()) {
  221. fflush(out);
  222. return NULL;
  223. }
  224. }
  225. }
  226. fflush(out);
  227. /* All done */
  228. Py_INCREF(Py_None);
  229. return Py_None;
  230. }
  231. static int _extract_handle_params(PyObject *args, FILE **file, long *count)
  232. {
  233. PyObject *pyfile, *pyoffset, *pycount;
  234. long offset;
  235. if (!PyArg_ParseTuple(args, "OOO",
  236. &pyfile, &pyoffset, &pycount))
  237. return -1;
  238. if ((*file = PyFile_AsFile(pyfile)) == NULL)
  239. return -1;
  240. offset = PyLong_AsLong(pyoffset);
  241. if (PyErr_Occurred())
  242. return -1;
  243. *count = PyLong_AsLong(pycount);
  244. if (PyErr_Occurred())
  245. return -1;
  246. /* Seek to target location */
  247. if (fseek(*file, offset, SEEK_SET) < 0) {
  248. PyErr_SetFromErrno(PyExc_OSError);
  249. return -1;
  250. }
  251. return 0;
  252. }
  253. static PyObject *Rocket_extract_list(Rocket *self, PyObject *args)
  254. {
  255. FILE *in;
  256. long count;
  257. if (_extract_handle_params(args, &in, &count) < 0)
  258. return NULL;
  259. /* Make a list to return */
  260. PyObject *retlist = PyList_New(0);
  261. if (!retlist)
  262. return NULL;
  263. /* Read data into new Python lists */
  264. int row;
  265. for (row = 0; row < count; row++)
  266. {
  267. PyObject *rowlist = PyList_New(self->layout_count + 1);
  268. if (!rowlist) {
  269. Py_DECREF(retlist);
  270. return NULL;
  271. }
  272. /* Timestamp */
  273. PyObject *entry = read_pyobject(in, LAYOUT_TYPE_FLOAT64);
  274. if (!entry || (PyList_SetItem(rowlist, 0, entry) < 0)) {
  275. Py_DECREF(rowlist);
  276. Py_DECREF(retlist);
  277. return NULL;
  278. }
  279. /* Data */
  280. int i;
  281. for (i = 0; i < self->layout_count; i++) {
  282. PyObject *ent = read_pyobject(in, self->layout_type);
  283. if (!ent || (PyList_SetItem(rowlist, i+1, ent) < 0)) {
  284. Py_DECREF(rowlist);
  285. Py_DECREF(retlist);
  286. return NULL;
  287. }
  288. }
  289. /* Add row to return value */
  290. if (PyList_Append(retlist, rowlist) < 0) {
  291. Py_DECREF(rowlist);
  292. Py_DECREF(retlist);
  293. return NULL;
  294. }
  295. Py_DECREF(rowlist);
  296. }
  297. return retlist;
  298. }
  299. static PyObject *Rocket_extract_string(Rocket *self, PyObject *args)
  300. {
  301. FILE *in;
  302. long count;
  303. if (_extract_handle_params(args, &in, &count) < 0)
  304. return NULL;
  305. char *str = NULL, *new;
  306. long len_alloc = 0;
  307. long len = 0;
  308. int ret;
  309. /* min space free in string (and the maximum length of one
  310. line); this is generous */
  311. const int min_free = 32 * MAX_LAYOUT_COUNT;
  312. /* how much to allocate at once */
  313. const int alloc_size = 1048576;
  314. int row, i;
  315. union_t u;
  316. for (row = 0; row < count; row++) {
  317. /* Make sure there's space for a line */
  318. if ((len_alloc - len) < min_free) {
  319. /* grow by 1 meg at a time */
  320. len_alloc += alloc_size;
  321. new = realloc(str, len_alloc);
  322. if (new == NULL)
  323. goto err;
  324. str = new;
  325. }
  326. /* Read and print timestamp */
  327. if (fread(&u.u64[0], 8, 1, in) != 1)
  328. goto err;
  329. /* Timestamps are always printed to the microsecond */
  330. ret = sprintf(&str[len], "%.6f", u.d[0]);
  331. if (ret <= 0)
  332. goto err;
  333. len += ret;
  334. /* Read and print values */
  335. switch (self->layout_type) {
  336. #define CASE(type, fmt, fmttype, disktype, letoh, bytes) \
  337. case LAYOUT_TYPE_##type: \
  338. /* read and format in a loop */ \
  339. for (i = 0; i < self->layout_count; i++) { \
  340. if (fread(&u.disktype[0], bytes, \
  341. 1, in) < 0) \
  342. goto err; \
  343. u.disktype[0] = letoh(u.disktype[0]); \
  344. ret = sprintf(&str[len], " " fmt, \
  345. u.fmttype[0]); \
  346. if (ret <= 0) \
  347. goto err; \
  348. len += ret; \
  349. } \
  350. break
  351. CASE(INT8, "%hhd", i8, u8, , 1);
  352. CASE(UINT8, "%hhu", u8, u8, , 1);
  353. CASE(INT16, "%hd", i16, u16, le16toh, 2);
  354. CASE(UINT16, "%hu", u16, u16, le16toh, 2);
  355. CASE(INT32, "%d", i32, u32, le32toh, 4);
  356. CASE(UINT32, "%u", u32, u32, le32toh, 4);
  357. CASE(INT64, "%ld", i64, u64, le64toh, 8);
  358. CASE(UINT64, "%lu", u64, u64, le64toh, 8);
  359. /* These next two are a bit debatable. floats
  360. are 6-9 significant figures, doubles are
  361. 15-19. This matches the old prep format,
  362. for float32. */
  363. CASE(FLOAT32, "%.6e", f, u32, le32toh, 4);
  364. CASE(FLOAT64, "%.16e", d, u64, le64toh, 8);
  365. #undef CASE
  366. default:
  367. PyErr_SetString(PyExc_TypeError, "unknown type");
  368. if (str) free(str);
  369. return NULL;
  370. }
  371. str[len++] = '\n';
  372. }
  373. PyObject *pystr = PyString_FromStringAndSize(str, len);
  374. free(str);
  375. return pystr;
  376. err:
  377. if (str) free(str);
  378. PyErr_SetFromErrno(PyExc_OSError);
  379. return NULL;
  380. }
  381. static PyMemberDef Rocket_members[] = {
  382. { "binary_size", T_INT, offsetof(Rocket, binary_size), 0,
  383. "binary size per row" },
  384. { NULL },
  385. };
  386. static PyMethodDef Rocket_methods[] = {
  387. { "append_list", (PyCFunction)Rocket_append_list, METH_VARARGS,
  388. "Append the list data to the file" },
  389. { "extract_list", (PyCFunction)Rocket_extract_list, METH_VARARGS,
  390. "Extract count rows of data from the file at offset offset. "
  391. "Return a list of lists [[row],[row],...]" },
  392. { "extract_string", (PyCFunction)Rocket_extract_string, METH_VARARGS,
  393. "Extract count rows of data from the file at offset offset. "
  394. "Return an ascii formatted string according to the layout" },
  395. { NULL },
  396. };
  397. static PyTypeObject RocketType = {
  398. PyObject_HEAD_INIT(NULL)
  399. .tp_name = "rocket.Rocket",
  400. .tp_basicsize = sizeof(Rocket),
  401. .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
  402. .tp_new = Rocket_new,
  403. .tp_dealloc = (destructor)Rocket_dealloc,
  404. .tp_init = (initproc)Rocket_init,
  405. .tp_methods = Rocket_methods,
  406. .tp_members = Rocket_members,
  407. .tp_doc = ("C implementation of the \"rocket\" data parsing "
  408. "interface, which translates between the binary "
  409. "format on disk and the ASCII or Python list "
  410. "format used when communicating with the rest of "
  411. "the system.")
  412. };
  413. static PyMethodDef module_methods[] = {
  414. { NULL },
  415. };
  416. PyMODINIT_FUNC
  417. initrocket(void)
  418. {
  419. PyObject *module;
  420. RocketType.tp_new = PyType_GenericNew;
  421. if (PyType_Ready(&RocketType) < 0)
  422. return;
  423. module = Py_InitModule3("rocket", module_methods,
  424. "Rocket data parsing and formatting module");
  425. Py_INCREF(&RocketType);
  426. PyModule_AddObject(module, "Rocket", (PyObject *)&RocketType);
  427. return;
  428. }