Compare commits
738 Commits
nilmdb-ori
...
master
Author | SHA1 | Date | |
---|---|---|---|
517b237636 | |||
07f138e0f4 | |||
7538c6201b | |||
4d9a106ca1 | |||
e90a79ddad | |||
7056c5b4ec | |||
df4e7f0967 | |||
b6bba16505 | |||
d4003d0d34 | |||
759492298a | |||
b5f6fcc253 | |||
905e325ded | |||
648b6f4b70 | |||
7f8a2c7027 | |||
276fbc652a | |||
10b34f5937 | |||
83daeb148a | |||
d65f00e8b2 | |||
71dc01c9a7 | |||
bcd21b3498 | |||
a1dee0e6f2 | |||
99ac47cf0d | |||
4cdaef51c1 | |||
88466dcafe | |||
8dfb8da15c | |||
6cc1f6b7b2 | |||
8dc36c2d37 | |||
3738430103 | |||
a41111b045 | |||
85f822e1c4 | |||
0222dfebf0 | |||
70914690c1 | |||
10400f2b07 | |||
56153ff7ad | |||
671f87b047 | |||
2f2faeeab7 | |||
2ed544bd30 | |||
6821b2a97b | |||
b20bb92988 | |||
699de7b11f | |||
ea67e45be9 | |||
ca440a42bd | |||
4ff4b263b4 | |||
79e544c733 | |||
9acf99ff25 | |||
4958a5ab2e | |||
f2d89e2da5 | |||
1952f245c0 | |||
7cbc0c11c3 | |||
9f2651c35e | |||
9126980ed4 | |||
ea051c85b3 | |||
d8294469cf | |||
96eadb0577 | |||
fb524c649f | |||
19a34a07a4 | |||
d8df6f515f | |||
90ee127c87 | |||
0b631b7dea | |||
f587518adb | |||
efbb2665fe | |||
544413018c | |||
322b0ec423 | |||
f3833d9b20 | |||
735c8497af | |||
7252e40c2d | |||
caa5604d81 | |||
6624e8dab6 | |||
d907638858 | |||
39e66fe38c | |||
ba915bb290 | |||
3f0b8e50a2 | |||
f93edc469c | |||
087fb39475 | |||
8b4acf41d6 | |||
32a76ccf3f | |||
5f9367bdd3 | |||
5848d03507 | |||
36dc448f02 | |||
2764283f59 | |||
2d0c3f7868 | |||
cadba9fbba | |||
2d200a86c9 | |||
640c1bc95e | |||
b574fc86f4 | |||
02ee18c410 | |||
d1e241a213 | |||
c5c7f638e7 | |||
a1218fd20b | |||
c58a933d21 | |||
7874e1ebfa | |||
79b410a85b | |||
6645395924 | |||
beb3eadd38 | |||
edf4568e8f | |||
a962258b2a | |||
fa011559c1 | |||
349eec3942 | |||
99500f3a88 | |||
54eccb17aa | |||
cc8ac74a37 | |||
3be904d158 | |||
5d9fc5500c | |||
57751f5b32 | |||
1c005518d8 | |||
3279f7ef2c | |||
a2e124f444 | |||
6d673bd2be | |||
613a3185e3 | |||
c83ee65cf7 | |||
113633459d | |||
41abf53085 | |||
fef3e1d31e | |||
02db87eee6 | |||
ad85c3dd29 | |||
0e6ccd687b | |||
85d4c419fd | |||
159278066c | |||
b69358a185 | |||
e82ef60e2e | |||
911d9bc284 | |||
752a9b36ae | |||
97d17de8ad | |||
5da7e6558e | |||
1928caa1d7 | |||
5db034432c | |||
55119a3e07 | |||
a9eff10dbf | |||
0f5c1c0db6 | |||
d17365ca37 | |||
8125d9c840 | |||
ba55ad82f0 | |||
45c81d2019 | |||
78cfda32e3 | |||
3658d3876b | |||
022b50950f | |||
e5efbadc8e | |||
74f633c9da | |||
ab9a327130 | |||
da72fc9777 | |||
a01cb4132d | |||
7c3da2fe44 | |||
f0e06dc436 | |||
ddc0eb4264 | |||
0a22db3965 | |||
8bb8f068de | |||
416902097d | |||
f5276e9fc8 | |||
c47f28f93a | |||
63b5f99b90 | |||
7d7b89b52f | |||
8d249273c6 | |||
abe431c663 | |||
ccf1f695af | |||
06f7390c9e | |||
6de77a08f1 | |||
8db9771c20 | |||
04f815a24b | |||
6868f5f126 | |||
ca0943ec19 | |||
68addb4e4a | |||
68c33b1f14 | |||
8dd8741100 | |||
8e6341ae5d | |||
422b1e2df2 | |||
0f745b3047 | |||
71cd7ed9b7 | |||
a79d6104d5 | |||
8e8ec59e30 | |||
b89b945a0f | |||
bd7bdb2eb8 | |||
840cd2fd13 | |||
bbd59c8b50 | |||
405c110fd7 | |||
274adcd856 | |||
a1850c9c2c | |||
6cd28b67b1 | |||
d6d215d53d | |||
e02143ddb2 | |||
e275384d03 | |||
a6a67ec15c | |||
fc43107307 | |||
90633413bb | |||
c7c3aff0fb | |||
e2347c954e | |||
222a5c6c53 | |||
1ca2c143e5 | |||
b5df575c79 | |||
2768a5ad15 | |||
a105543c38 | |||
309f38d0ed | |||
9a27b6ef6a | |||
99532cf9e0 | |||
dfdd0e5c74 | |||
9a2699adfc | |||
9bbb95b18b | |||
6bbed322c5 | |||
2317894355 | |||
539c92226c | |||
77c766d85d | |||
49d04db1d6 | |||
ea838d05ae | |||
f2a48bdb2a | |||
6d14e0b8aa | |||
b31b9327b9 | |||
b98ff1331a | |||
00e6ba1124 | |||
01029230c9 | |||
ecc4e5ef9d | |||
23f31c472b | |||
a1e2746360 | |||
1c40d59a52 | |||
bfb09a189f | |||
416a499866 | |||
637d193807 | |||
b7fa5745ce | |||
0104c8edd9 | |||
cf3b8e787d | |||
83d022016c | |||
43b740ecaa | |||
4ce059b920 | |||
99a4228285 | |||
230ec72609 | |||
d36ece3767 | |||
231963538e | |||
b4d6aad6de | |||
e95142eabf | |||
d21c3470bc | |||
7576883f49 | |||
cc211542f8 | |||
8292dcf70b | |||
b362fd37f6 | |||
41ec13ee17 | |||
efa9aa9097 | |||
d9afb48f45 | |||
d1140e0f16 | |||
6091e44561 | |||
e233ba790f | |||
f0304b4c00 | |||
60594ca58e | |||
c7f2df4abc | |||
5b7409f802 | |||
06038062a2 | |||
ae9fe89759 | |||
04def60021 | |||
9ce0f69dff | |||
90c3be91c4 | |||
ebccfb3531 | |||
e006f1d02e | |||
5292319802 | |||
173121ca87 | |||
26bab031bd | |||
b5fefffa09 | |||
dccb3e370a | |||
95ca55aa7e | |||
e01813f29d | |||
7f41e117a2 | |||
dd5fc806e5 | |||
f8ca8d31e6 | |||
ed89d803f0 | |||
3d24092cd2 | |||
304bb43d85 | |||
59a79a30a5 | |||
c0d450d39e | |||
6f14d609b2 | |||
77ef87456f | |||
32d6af935c | |||
6af3a6fc41 | |||
f8a06fb3b7 | |||
e790bb9e8a | |||
89be6f5931 | |||
4cdef3285d | |||
bcd82c4d59 | |||
caf63ab01f | |||
2d72891162 | |||
cda2ac3e77 | |||
57d3d60f6a | |||
d6b5befe76 | |||
7429c1788d | |||
0ef71c193b | |||
4a50dd015e | |||
22274550ab | |||
4f06d6ae68 | |||
c54d8041c3 | |||
52ae397d7d | |||
d05b6f6348 | |||
049375d30e | |||
88eb0123f5 | |||
a547ddbbba | |||
28e72fd53e | |||
f63107b334 | |||
955d7aa871 | |||
b8d2cf1b78 | |||
7c465730de | |||
aca130272d | |||
76e5e9883f | |||
fb4f4519ff | |||
30328714a7 | |||
759466de4a | |||
d3efb829b5 | |||
90b96799ac | |||
56679ad770 | |||
b5541722c2 | |||
aaea105861 | |||
e6a081d639 | |||
1835d03412 | |||
c7a712d8d8 | |||
20d315b4f7 | |||
a44a5e3135 | |||
039b2a0557 | |||
cd1dfe7dcd | |||
fb35517dfa | |||
b9f0b35bbe | |||
b1b09f8cd0 | |||
d467df7980 | |||
09bc7eb48c | |||
b77f07a4cd | |||
59f0076306 | |||
83bc5bc775 | |||
6b1dfec828 | |||
d827f41fa5 | |||
7eca587fdf | |||
a351bc1b10 | |||
1d61d61a81 | |||
755255030b | |||
8e79998e95 | |||
9f914598c2 | |||
0468b04538 | |||
232a3876c2 | |||
1c27dd72d6 | |||
de5e474001 | |||
0fc092779d | |||
7abfdfbf3e | |||
92724d10ba | |||
1d7acbf916 | |||
ea3ea487bc | |||
69ad8c4842 | |||
0047e0360a | |||
1ac6abdad0 | |||
65f09f793c | |||
84e21ff467 | |||
11b228f77a | |||
7860a6aefb | |||
454e561d69 | |||
fe91ff59a3 | |||
64c24a00d6 | |||
58c0ae72f6 | |||
c5f079f61f | |||
16f23f4a91 | |||
b0f12d55dd | |||
8a648c1b97 | |||
2d45466f66 | |||
c6a0e6e96f | |||
79755dc624 | |||
f260f2c83d | |||
14402005bf | |||
0d372fb878 | |||
5eac924118 | |||
0b75da7a8f | |||
2dfc94b566 | |||
e318888a06 | |||
7c95934cc2 | |||
96df9d8323 | |||
31e2c7c8b4 | |||
2a725ee13f | |||
eb8037ee3c | |||
fadb84d703 | |||
9d0d2415be | |||
130dae0734 | |||
402234dfc3 | |||
4406d51a98 | |||
9b6de6ecb7 | |||
c512631184 | |||
19d27c31bc | |||
28310fe886 | |||
1ccc2bce7e | |||
00237e30b2 | |||
521ff88f7c | |||
64897a1dd1 | |||
41ce8480bb | |||
204a6ecb15 | |||
5db3b186a4 | |||
fe640cf421 | |||
ca67c79fe4 | |||
8917bcd4bf | |||
a75ec98673 | |||
e476338d61 | |||
d752b882f2 | |||
ade27773e6 | |||
0c1a1d2388 | |||
e3f335dfe5 | |||
7a191c0ebb | |||
55bf11e393 | |||
e90dcd10f3 | |||
7d44f4eaa0 | |||
f541432d44 | |||
aa4e32f78a | |||
2bc1416c00 | |||
68bbbf757d | |||
3df96fdfdd | |||
740ab76eaf | |||
ce13a47fea | |||
50a4a60786 | |||
14afa02db6 | |||
cc990d6ce4 | |||
0f5162e0c0 | |||
b26cd52f8c | |||
236d925a1d | |||
a4a4bc61ba | |||
3d82888580 | |||
749b878904 | |||
f396e3934c | |||
dd7594b5fa | |||
4ac1beee6d | |||
8c0ce736d8 | |||
8858c9426f | |||
9123ccb583 | |||
5dce851bef | |||
5b0441de6b | |||
317c53ab6f | |||
7db4411462 | |||
422317850e | |||
965537d8cb | |||
0dcdec5949 | |||
7fce305a1d | |||
dfbbe23512 | |||
7761a91242 | |||
9b06e46bf1 | |||
171e6f1871 | |||
1431e41d16 | |||
a49c655816 | |||
30e3ffc0e9 | |||
db7211c3a9 | |||
c6d57cf5c3 | |||
ca5253ddee | |||
e19da84b2e | |||
3e8e3542fd | |||
2f7365412d | |||
bba9ad131e | |||
ee24380d1f | |||
bfcd91acf8 | |||
d97291d4d3 | |||
a61fbbcf45 | |||
5adc8fd0a7 | |||
251a486c28 | |||
1edb96a0bd | |||
52e674a192 | |||
e241c13bf1 | |||
b53ff31212 | |||
2045e89f24 | |||
841b2dab5c | |||
d634f7d3cf | |||
1593e181a3 | |||
8e781506de | |||
f6a2c7620a | |||
6c30e5ab2f | |||
810eac4e61 | |||
d9bb3ab7ab | |||
21d0e90bd9 | |||
f071d749ce | |||
d95c354595 | |||
9bcd8183f6 | |||
5c531d8273 | |||
3fe3e2ca95 | |||
f01e781469 | |||
e6180a5a81 | |||
a9d31b46ed | |||
b01f23ed99 | |||
842bf21411 | |||
750d9e3c38 | |||
3b90318f83 | |||
1fb37604d3 | |||
018ecab310 | |||
6a1d6017e2 | |||
e7406f8147 | |||
f316026592 | |||
a8db747768 | |||
727af94722 | |||
6c89659df7 | |||
58c7c8f6ff | |||
225003f412 | |||
40b966aef2 | |||
294ec6988b | |||
fad23ebb22 | |||
b226dc4337 | |||
e7af863017 | |||
af6ce5b79c | |||
0a6fc943e2 | |||
67c6e178e1 | |||
9bf213707c | |||
5cd7899e98 | |||
ceec5fb9b3 | |||
85be497edb | |||
bd1b7107af | |||
b8275f108d | |||
2820ff9758 | |||
a015de893d | |||
b7f746e66d | |||
40cf4941f0 | |||
8a418ceb3e | |||
0312b6eb07 | |||
077f197d24 | |||
62354b4dce | |||
5970cd85cf | |||
4f6a742e6c | |||
87b43e5d04 | |||
f0c2a64ae3 | |||
e5d3deb6fe | |||
d321058b48 | |||
cea83140c0 | |||
7807d6caf0 | |||
3d0fad3c2a | |||
fe3b087435 | |||
bcefe52298 | |||
f88c148ccc | |||
4a47b1d04a | |||
80da937cb7 | |||
c81972e66e | |||
b09362fde1 | |||
b7688844fa | |||
3d212e7592 | |||
7aedfdf9c3 | |||
ebd4f74959 | |||
ebe2fbab92 | |||
4831a0cae1 | |||
07192c6ffb | |||
09d325e8ab | |||
11b0293d5f | |||
493bbed82c | |||
3bc25daaab | |||
40a3bc4bc3 | |||
c083d63c96 | |||
0221e3ea21 | |||
f5fd2b064e | |||
06e91a6a98 | |||
41b3f3c018 | |||
842076fef4 | |||
10d58f6a47 | |||
e2464efc12 | |||
1beae5024e | |||
c7c65b6542 | |||
f41ff0a6e8 | |||
389c1d189f | |||
487298986e | |||
d4cd045c48 | |||
3816645313 | |||
83b937c720 | |||
b3e6e8976f | |||
c890ea93cb | |||
84c68c6913 | |||
6f1e6fe232 | |||
b0d76312d1 | |||
19c846c71c | |||
f355c73209 | |||
173014ba19 | |||
24d4752bc3 | |||
a85b273e2e | |||
7f73b4b304 | |||
f3eb6d1b79 | |||
9082cc9f44 | |||
bf64a40472 | |||
32dbeebc09 | |||
66ddc79b15 | |||
7a8bd0bf41 | |||
ee552de740 | |||
6d1fb61573 | |||
f094529e66 | |||
5fecec2a4c | |||
85bb46f45c | |||
17c329fd6d | |||
437e1b425a | |||
c0f87db3c1 | |||
a9c5c19e30 | |||
f39567b2bc | |||
99ec0f4946 | |||
f5c60f68dc | |||
bdef0986d6 | |||
c396c4dac8 | |||
0b443f510b | |||
66fa6f3824 | |||
875fbe969f | |||
e35e85886e | |||
7211217f40 | |||
d34b980516 | |||
6aee52d980 | |||
090c8d5315 | |||
1042ff9f4b | |||
bc687969c1 | |||
de27bd3f41 | |||
4dcf713d0e | |||
f9dea53c24 | |||
6cedd7c327 | |||
6278d32f7d | |||
991039903c | |||
ea3e92be3f | |||
56c3306b38 | |||
cb6561c151 | |||
407aedcd20 | |||
bf8ff66c77 | |||
82f753efb1 | |||
e950794866 | |||
cb7c0cf83e | |||
33cb7031a3 | |||
33492fc851 | |||
5101522025 | |||
5130ab7e6a | |||
27024fb38e | |||
ff4e934bef | |||
a1d09fb5fc | |||
aefaac2405 | |||
e96cff4fc5 | |||
97bec3b1ee | |||
27f8dcf06d | |||
cb97ad3d2c | |||
8a7b3b5f95 | |||
11cc124019 | |||
e2daeb5e54 | |||
cbc7c5125d | |||
27fd9d54f9 | |||
e5e7ae9eda | |||
315bc57ac3 | |||
3b0b9175d6 | |||
e570800396 | |||
c327378373 | |||
aaffd61e4e | |||
b32edb1ed6 | |||
624980a47b | |||
3f436e6dfd | |||
d647ea7eee | |||
9ff30f8c1d | |||
e3be1a1d8a | |||
f63e58f2d9 | |||
a05a026bc7 | |||
1d875b1f1f | |||
f4f2493b59 | |||
4501da6edc | |||
36045fe53b | |||
7eef39d5fd | |||
de7f78a43b | |||
fab3567d74 | |||
6d6514d5c3 | |||
b67fe79e47 | |||
064b4bf715 | |||
e08be4c2a8 | |||
0276810776 | |||
565d0e98a9 | |||
47245df9bd | |||
c07670ac3e | |||
37b4376b4c | |||
3b52ecafa3 | |||
ac32647fac | |||
4e143dfa18 | |||
b84ffddd50 | |||
1531114677 | |||
ab4c1f0925 | |||
f7149e48e8 | |||
055cfa12b2 | |||
5cb03cd6ef | |||
70bcc6d2b3 | |||
43d3daa840 | |||
7dfa288270 | |||
881b9a7bd1 | |||
e85acdd20c | |||
d725ed1771 | |||
d58a27e2bf | |||
f3b0dfabea | |||
cccaec326a | |||
a3f444eb25 | |||
277b0c1d00 | |||
7bba4a80d9 | |||
f29d38d9d9 | |||
b4a0288a39 | |||
e63ab23d20 | |||
776279a4e6 | |||
7a9012c3e9 | |||
8c619cfde5 | |||
cc4e3bdb76 | |||
3dfd187710 | |||
cf66eca42c | |||
1046d0c47b | |||
d940aabf66 | |||
e1bf680d13 | |||
18720db594 | |||
19c70bf887 | |||
a672119dd0 | |||
f721e41f2b | |||
853639f390 | |||
ba11e4467f | |||
17073905f7 | |||
3b696e2a03 | |||
dd5658f987 | |||
54847b0710 | |||
b72e276eb8 | |||
0808ed5bd8 | |||
ec25eac697 | |||
09340d543d | |||
913883be3a | |||
8cc1dff0b8 | |||
5b2d52b8bc | |||
22ef82b59c | |||
a235c94c02 | |||
795d2ac7cf | |||
1c4efb92c6 | |||
56a1770f45 | |||
b766aef257 | |||
3e5d0ef87d | |||
a26468c059 | |||
0099a41fd8 | |||
b1baacf272 | |||
8afaf8f329 | |||
610860c379 | |||
c076af64af | |||
d0435cbf91 | |||
ace199ffa8 | |||
5b89fa7ccf | |||
d8929467eb | |||
ba66668fff | |||
1e1c7fa9c4 | |||
9b64a18daf | |||
398d382dac | |||
53e3c44186 | |||
4395f68662 | |||
c5ec07a661 | |||
4ca726439b | |||
72deddcd9b | |||
6812a28131 | |||
7933775462 | |||
3b9d84d689 | |||
b5e3b22558 | |||
93d7088af3 | |||
9f4d88cc8f | |||
8da7de817c | |||
acdea5f00b | |||
6c9cf198d7 | |||
5fa3ff9610 | |||
35a801d997 | |||
a3634c468f | |||
dfaeddefa2 |
11
.coveragerc
Normal file
11
.coveragerc
Normal file
|
@ -0,0 +1,11 @@
|
|||
# -*- conf -*-
|
||||
|
||||
[run]
|
||||
branch = True
|
||||
|
||||
[report]
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
if 0:
|
||||
omit = nilmdb/scripts,nilmdb/_version.py,nilmdb/fsck
|
||||
show_missing = True
|
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
nilmdb/_version.py export-subst
|
24
.gitignore
vendored
Normal file
24
.gitignore
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
# Tests
|
||||
tests/*testdb/
|
||||
.coverage
|
||||
db/
|
||||
|
||||
# Compiled / cythonized files
|
||||
README.html
|
||||
docs/*.html
|
||||
build/
|
||||
*.pyc
|
||||
nilmdb/server/interval.c
|
||||
nilmdb/server/layout.c
|
||||
nilmdb/server/rbtree.c
|
||||
*.so
|
||||
|
||||
# Setup junk
|
||||
dist/
|
||||
nilmdb.egg-info/
|
||||
venv/
|
||||
.eggs/
|
||||
|
||||
# Misc
|
||||
timeit*out
|
||||
|
29
MANIFEST.in
Normal file
29
MANIFEST.in
Normal file
|
@ -0,0 +1,29 @@
|
|||
# Root
|
||||
include README.txt
|
||||
include setup.cfg
|
||||
include setup.py
|
||||
include versioneer.py
|
||||
include Makefile
|
||||
include .coveragerc
|
||||
include .pylintrc
|
||||
include requirements.txt
|
||||
|
||||
# Cython files -- include .pyx source, but not the generated .c files
|
||||
# (Downstream systems must have cython installed in order to build)
|
||||
recursive-include nilmdb/server *.pyx *.pyxdep *.pxd
|
||||
exclude nilmdb/server/interval.c
|
||||
exclude nilmdb/server/rbtree.c
|
||||
|
||||
# Version
|
||||
include nilmdb/_version.py
|
||||
|
||||
# Tests
|
||||
recursive-include tests *.py
|
||||
recursive-include tests/data *
|
||||
include tests/test.order
|
||||
|
||||
# Docs
|
||||
recursive-include docs Makefile *.md
|
||||
|
||||
# Extras
|
||||
recursive-include extras *
|
52
Makefile
52
Makefile
|
@ -1,2 +1,50 @@
|
|||
all:
|
||||
nosetests
|
||||
# By default, run the tests.
|
||||
all: test
|
||||
|
||||
version:
|
||||
python3 setup.py version
|
||||
|
||||
build:
|
||||
python3 setup.py build_ext --inplace
|
||||
|
||||
dist: sdist
|
||||
sdist:
|
||||
python3 setup.py sdist
|
||||
|
||||
install:
|
||||
python3 setup.py install
|
||||
|
||||
develop:
|
||||
python3 setup.py develop
|
||||
|
||||
docs:
|
||||
make -C docs
|
||||
|
||||
ctrl: flake
|
||||
flake:
|
||||
flake8 nilmdb
|
||||
lint:
|
||||
pylint3 --rcfile=setup.cfg nilmdb
|
||||
|
||||
test:
|
||||
ifneq ($(INSIDE_EMACS),)
|
||||
# Use the slightly more flexible script
|
||||
python3 setup.py build_ext --inplace
|
||||
python3 tests/runtests.py
|
||||
else
|
||||
# Let setup.py check dependencies, build stuff, and run the test
|
||||
python3 setup.py nosetests
|
||||
endif
|
||||
|
||||
clean::
|
||||
find . -name '*.pyc' -o -name '__pycache__' -print0 | xargs -0 rm -rf
|
||||
rm -f .coverage
|
||||
rm -rf tests/*testdb*
|
||||
rm -rf nilmdb.egg-info/ build/ nilmdb/server/*.so
|
||||
make -C docs clean
|
||||
|
||||
gitclean::
|
||||
git clean -dXf
|
||||
|
||||
.PHONY: all version build dist sdist install docs test
|
||||
.PHONY: ctrl lint flake clean gitclean
|
||||
|
|
40
README.md
Normal file
40
README.md
Normal file
|
@ -0,0 +1,40 @@
|
|||
# nilmdb: Non-Intrusive Load Monitor Database
|
||||
by Jim Paris <jim@jtan.com>
|
||||
|
||||
NilmDB requires Python 3.8 or newer.
|
||||
|
||||
## Prerequisites:
|
||||
|
||||
# Runtime and build environments
|
||||
sudo apt install python3 python3-dev python3-venv python3-pip
|
||||
|
||||
# Create a new Python virtual environment to isolate deps.
|
||||
python3 -m venv ../venv
|
||||
source ../venv/bin/activate # run "deactivate" to leave
|
||||
|
||||
# Install all Python dependencies
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
## Test:
|
||||
|
||||
python3 setup.py nosetests
|
||||
|
||||
## Install:
|
||||
|
||||
Install it into the virtual environment
|
||||
|
||||
python3 setup.py install
|
||||
|
||||
If you want to instead install it system-wide, you will also need to
|
||||
install the requirements system-wide:
|
||||
|
||||
sudo pip3 install -r requirements.txt
|
||||
sudo python3 setup.py install
|
||||
|
||||
## Usage:
|
||||
|
||||
nilmdb-server --help
|
||||
nilmdb-fsck --help
|
||||
nilmtool --help
|
||||
|
||||
See docs/wsgi.md for info on setting up a WSGI application in Apache.
|
|
@ -1,4 +0,0 @@
|
|||
To install,
|
||||
|
||||
python seutp.py install
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
from nilmdb import Interval
|
||||
from optparse import OptionParser
|
||||
import sys
|
||||
|
||||
version = "1.0"
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("-d", "--db", dest="database", metavar="DATABASE",
|
||||
help="location of sqlite database")
|
||||
parser.add_option("-V", "--version", dest="version", default=False, action="store_true",
|
||||
help="print version then exit")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if (options.version):
|
||||
print "This script version: " + version
|
||||
sys.exit(0)
|
||||
|
||||
if options.database is None:
|
||||
print "Error: database is mandatory"
|
||||
sys.exit(1)
|
||||
|
||||
print "Database is " + options.database
|
||||
|
9
docs/Makefile
Normal file
9
docs/Makefile
Normal file
|
@ -0,0 +1,9 @@
|
|||
ALL_DOCS = $(wildcard *.md)
|
||||
|
||||
all: $(ALL_DOCS:.md=.html)
|
||||
|
||||
%.html: %.md
|
||||
pandoc -s $< > $@
|
||||
|
||||
clean:
|
||||
rm -f *.html
|
5
docs/TODO.md
Normal file
5
docs/TODO.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
- Documentation
|
||||
|
||||
- Machine-readable information in OverflowError, parser errors.
|
||||
Maybe subclass `cherrypy.HTTPError` and override `set_response`
|
||||
to add another JSON field?
|
469
docs/design.md
Normal file
469
docs/design.md
Normal file
|
@ -0,0 +1,469 @@
|
|||
Structure
|
||||
---------
|
||||
nilmdb.nilmdb is the NILM database interface. A nilmdb.BulkData
|
||||
interface stores data in flat files, and a SQL database tracks
|
||||
metadata and ranges.
|
||||
|
||||
Access to the nilmdb must be single-threaded. This is handled with
|
||||
the nilmdb.serializer class. In the future this could probably
|
||||
be turned into a per-path serialization.
|
||||
|
||||
nilmdb.server is a HTTP server that provides an interface to talk,
|
||||
thorugh the serialization layer, to the nilmdb object.
|
||||
|
||||
nilmdb.client is a HTTP client that connects to this.
|
||||
|
||||
Sqlite performance
|
||||
------------------
|
||||
|
||||
Committing a transaction in the default sync mode (PRAGMA synchronous=FULL)
|
||||
takes about 125msec. sqlite3 will commit transactions at 3 times:
|
||||
|
||||
1. explicit con.commit()
|
||||
|
||||
2. between a series of DML commands and non-DML commands, e.g.
|
||||
after a series of INSERT, SELECT, but before a CREATE TABLE or
|
||||
PRAGMA.
|
||||
|
||||
3. at the end of an explicit transaction, e.g. "with self.con as con:"
|
||||
|
||||
To speed up testing, or if this transaction speed becomes an issue,
|
||||
the sync=False option to NilmDB will set PRAGMA synchronous=OFF.
|
||||
|
||||
|
||||
Inserting streams
|
||||
-----------------
|
||||
|
||||
We need to send the contents of "data" as POST. Do we need chunked
|
||||
transfer?
|
||||
|
||||
- Don't know the size in advance, so we would need to use chunked if
|
||||
we send the entire thing in one request.
|
||||
- But we shouldn't send one chunk per line, so we need to buffer some
|
||||
anyway; why not just make new requests?
|
||||
- Consider the infinite-streaming case, we might want to send it
|
||||
immediately? Not really -- server still should do explicit inserts
|
||||
of fixed-size chunks.
|
||||
- Even chunked encoding needs the size of each chunk beforehand, so
|
||||
everything still gets buffered. Just a tradeoff of buffer size.
|
||||
|
||||
Before timestamps are added:
|
||||
|
||||
- Raw data is about 440 kB/s (9 channels)
|
||||
- Prep data is about 12.5 kB/s (1 phase)
|
||||
- How do we know how much data to send?
|
||||
|
||||
- Remember that we can only do maybe 8-50 transactions per second on
|
||||
the sqlite database. So if one block of inserted data is one
|
||||
transaction, we'd need the raw case to be around 64kB per request,
|
||||
ideally more.
|
||||
- Maybe use a range, based on how long it's taking to read the data
|
||||
- If no more data, send it
|
||||
- If data > 1 MB, send it
|
||||
- If more than 10 seconds have elapsed, send it
|
||||
- Should those numbers come from the server?
|
||||
|
||||
Converting from ASCII to PyTables:
|
||||
|
||||
- For each row getting added, we need to set attributes on a PyTables
|
||||
Row object and call table.append(). This means that there isn't a
|
||||
particularly efficient way of converting from ascii.
|
||||
- Could create a function like nilmdb.layout.Layout("foo".fillRow(asciiline)
|
||||
- But this means we're doing parsing on the serialized side
|
||||
- Let's keep parsing on the threaded server side so we can detect
|
||||
errors better, and not block the serialized nilmdb for a slow
|
||||
parsing process.
|
||||
- Client sends ASCII data
|
||||
- Server converts this ACSII data to a list of values
|
||||
- Maybe:
|
||||
|
||||
# threaded side creates this object
|
||||
parser = nilmdb.layout.Parser("layout_name")
|
||||
# threaded side parses and fills it with data
|
||||
parser.parse(textdata)
|
||||
# serialized side pulls out rows
|
||||
for n in xrange(parser.nrows):
|
||||
parser.fill_row(rowinstance, n)
|
||||
table.append()
|
||||
|
||||
|
||||
Inserting streams, inside nilmdb
|
||||
--------------------------------
|
||||
|
||||
- First check that the new stream doesn't overlap.
|
||||
- Get minimum timestamp, maximum timestamp from data parser.
|
||||
- (extend parser to verify monotonicity and track extents)
|
||||
- Get all intervals for this stream in the database
|
||||
- See if new interval overlaps any existing ones
|
||||
- If so, bail
|
||||
- Question: should we cache intervals inside NilmDB?
|
||||
- Assume database is fast for now, and always rebuild fom DB.
|
||||
- Can add a caching layer later if we need to.
|
||||
- `stream_get_ranges(path)` -> return IntervalSet?
|
||||
|
||||
Speed
|
||||
-----
|
||||
|
||||
- First approach was quadratic. Adding four hours of data:
|
||||
|
||||
$ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-110000 /bpnilm/1/raw
|
||||
real 24m31.093s
|
||||
$ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-120001 /bpnilm/1/raw
|
||||
real 43m44.528s
|
||||
$ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-130002 /bpnilm/1/raw
|
||||
real 93m29.713s
|
||||
$ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-140003 /bpnilm/1/raw
|
||||
real 166m53.007s
|
||||
|
||||
- Disabling pytables indexing didn't help:
|
||||
|
||||
real 31m21.492s
|
||||
real 52m51.963s
|
||||
real 102m8.151s
|
||||
real 176m12.469s
|
||||
|
||||
- Server RAM usage is constant.
|
||||
|
||||
- Speed problems were due to IntervalSet speed, of parsing intervals
|
||||
from the database and adding the new one each time.
|
||||
|
||||
- First optimization is to cache result of `nilmdb:_get_intervals`,
|
||||
which gives the best speedup.
|
||||
|
||||
- Also switched to internally using bxInterval from bx-python package.
|
||||
Speed of `tests/test_interval:TestIntervalSpeed` is pretty decent
|
||||
and seems to be growing logarithmically now. About 85μs per insertion
|
||||
for inserting 131k entries.
|
||||
|
||||
- Storing the interval data in SQL might be better, with a scheme like:
|
||||
http://www.logarithmic.net/pfh/blog/01235197474
|
||||
|
||||
- Next slowdown target is nilmdb.layout.Parser.parse().
|
||||
- Rewrote parsers using cython and sscanf
|
||||
- Stats (rev 10831), with `_add_interval` disabled
|
||||
|
||||
layout.pyx.Parser.parse:128 6303 sec, 262k calls
|
||||
layout.pyx.parse:63 13913 sec, 5.1g calls
|
||||
numpy:records.py.fromrecords:569 7410 sec, 262k calls
|
||||
|
||||
- Probably OK for now.
|
||||
|
||||
- After all updates, now takes about 8.5 minutes to insert an hour of
|
||||
data, constant after adding 171 hours (4.9 billion data points)
|
||||
|
||||
- Data set size: 98 gigs = 20 bytes per data point.
|
||||
6 uint16 data + 1 uint32 timestamp = 16 bytes per point
|
||||
So compression must be off -- will retry with compression forced on.
|
||||
|
||||
IntervalSet speed
|
||||
-----------------
|
||||
- Initial implementation was pretty slow, even with binary search in
|
||||
sorted list
|
||||
|
||||
- Replaced with bxInterval; now takes about log n time for an insertion
|
||||
- TestIntervalSpeed with range(17,18) and profiling
|
||||
- 85 μs each
|
||||
- 131072 calls to `__iadd__`
|
||||
- 131072 to bx.insert_interval
|
||||
- 131072 to bx.insert:395
|
||||
- 2355835 to bx.insert:106 (18x as many?)
|
||||
|
||||
- Tried blist too, worse than bxinterval.
|
||||
|
||||
- Might be algorithmic improvements to be made in Interval.py,
|
||||
like in `__and__`
|
||||
|
||||
- Replaced again with rbtree. Seems decent. Numbers are time per
|
||||
insert for 2**17 insertions, followed by total wall time and RAM
|
||||
usage for running "make test" with `test_rbtree` and `test_interval`
|
||||
with range(5,20):
|
||||
- old values with bxinterval:
|
||||
20.2 μS, total 20 s, 177 MB RAM
|
||||
- rbtree, plain python:
|
||||
97 μS, total 105 s, 846 MB RAM
|
||||
- rbtree converted to cython:
|
||||
26 μS, total 29 s, 320 MB RAM
|
||||
- rbtree and interval converted to cython:
|
||||
8.4 μS, total 12 s, 134 MB RAM
|
||||
|
||||
- Would like to move Interval itself back to Python so other
|
||||
non-cythonized code like client code can use it more easily.
|
||||
Testing speed with just `test_interval` being tested, with
|
||||
`range(5,22)`, using `/usr/bin/time -v python tests/runtests.py`,
|
||||
times recorded for 2097152:
|
||||
- 52ae397 (Interval in cython):
|
||||
12.6133 μs each, ratio 0.866533, total 47 sec, 399 MB RAM
|
||||
- 9759dcf (Interval in python):
|
||||
21.2937 μs each, ratio 1.462870, total 83 sec, 1107 MB RAM
|
||||
That's a huge difference! Instead, will keep Interval and DBInterval
|
||||
cythonized inside nilmdb, and just have an additional copy in
|
||||
nilmdb.utils for clients to use.
|
||||
|
||||
Layouts
|
||||
-------
|
||||
Current/old design has specific layouts: RawData, PrepData, RawNotchedData.
|
||||
Let's get rid of this entirely and switch to simpler data types that are
|
||||
just collections and counts of a single type. We'll still use strings
|
||||
to describe them, with format:
|
||||
|
||||
type_count
|
||||
|
||||
where type is "uint16", "float32", or "float64", and count is an integer.
|
||||
|
||||
nilmdb.layout.named() will parse these strings into the appropriate
|
||||
handlers. For compatibility:
|
||||
|
||||
"RawData" == "uint16_6"
|
||||
"RawNotchedData" == "uint16_9"
|
||||
"PrepData" == "float32_8"
|
||||
|
||||
|
||||
BulkData design
|
||||
---------------
|
||||
|
||||
BulkData is a custom bulk data storage system that was written to
|
||||
replace PyTables. The general structure is a `data` subdirectory in
|
||||
the main NilmDB directory. Within `data`, paths are created for each
|
||||
created stream. These locations are called tables. For example,
|
||||
tables might be located at
|
||||
|
||||
nilmdb/data/newton/raw/
|
||||
nilmdb/data/newton/prep/
|
||||
nilmdb/data/cottage/raw/
|
||||
|
||||
Each table contains:
|
||||
|
||||
- An unchanging `_format` file (Python pickle format) that describes
|
||||
parameters of how the data is broken up, like files per directory,
|
||||
rows per file, and the binary data format
|
||||
|
||||
- Hex named subdirectories `("%04x", although more than 65536 can exist)`
|
||||
|
||||
- Hex named files within those subdirectories, like:
|
||||
|
||||
/nilmdb/data/newton/raw/000b/010a
|
||||
|
||||
The data format of these files is raw binary, interpreted by the
|
||||
Python `struct` module according to the format string in the
|
||||
`_format` file.
|
||||
|
||||
- Same as above, with `.removed` suffix, is an optional file (Python
|
||||
pickle format) containing a list of row numbers that have been
|
||||
logically removed from the file. If this range covers the entire
|
||||
file, the entire file will be removed.
|
||||
|
||||
- Note that the `bulkdata.nrows` variable is calculated once in
|
||||
`BulkData.__init__()`, and only ever incremented during use. Thus,
|
||||
even if all data is removed, `nrows` can remain high. However, if
|
||||
the server is restarted, the newly calculated `nrows` may be lower
|
||||
than in a previous run due to deleted data. To be specific, this
|
||||
sequence of events:
|
||||
|
||||
- insert data
|
||||
- remove all data
|
||||
- insert data
|
||||
|
||||
will result in having different row numbers in the database, and
|
||||
differently numbered files on the filesystem, than the sequence:
|
||||
|
||||
- insert data
|
||||
- remove all data
|
||||
- restart server
|
||||
- insert data
|
||||
|
||||
This is okay! Everything should remain consistent both in the
|
||||
`BulkData` and `NilmDB`. Not attempting to readjust `nrows` during
|
||||
deletion makes the code quite a bit simpler.
|
||||
|
||||
- Similarly, data files are never truncated shorter. Removing data
|
||||
from the end of the file will not shorten it; it will only be
|
||||
deleted when it has been fully filled and all of the data has been
|
||||
subsequently removed.
|
||||
|
||||
|
||||
Rocket
|
||||
------
|
||||
|
||||
Original design had the nilmdb.nilmdb thread (through bulkdata)
|
||||
convert from on-disk layout to a Python list, and then the
|
||||
nilmdb.server thread (from cherrypy) converts to ASCII. For at least
|
||||
the extraction side of things, it's easy to pass the bulkdata a layout
|
||||
name instead, and have it convert directly from on-disk to ASCII
|
||||
format, because this conversion can then be shoved into a C module.
|
||||
This module, which provides a means for converting directly from
|
||||
on-disk format to ASCII or Python lists, is the "rocket" interface.
|
||||
Python is still used to manage the files and figure out where the
|
||||
data should go; rocket just puts binary data directly in or out of
|
||||
those files at specified locations.
|
||||
|
||||
Before rocket, testing speed with uint16_6 data, with an end-to-end
|
||||
test (extracting data with nilmtool):
|
||||
|
||||
- insert: 65 klines/sec
|
||||
- extract: 120 klines/sec
|
||||
|
||||
After switching to the rocket design, but using the Python version
|
||||
(pyrocket):
|
||||
|
||||
- insert: 57 klines/sec
|
||||
- extract: 120 klines/sec
|
||||
|
||||
After switching to a C extension module (rocket.c)
|
||||
|
||||
- insert: 74 klines/sec through insert.py; 99.6 klines/sec through nilmtool
|
||||
- extract: 335 klines/sec
|
||||
|
||||
After client block updates (described below):
|
||||
|
||||
- insert: 180 klines/sec through nilmtool (pre-timestamped)
|
||||
- extract: 390 klines/sec through nilmtool
|
||||
|
||||
Using "insert --timestamp" or "extract --bare" cuts the speed in half.
|
||||
|
||||
Blocks versus lines
|
||||
-------------------
|
||||
|
||||
Generally want to avoid parsing the bulk of the data as lines if
|
||||
possible, and transfer things in bigger blocks at once.
|
||||
|
||||
Current places where we use lines:
|
||||
|
||||
- All data returned by `client.stream_extract`, since it comes from
|
||||
`httpclient.get_gen`, which iterates over lines. Not sure if this
|
||||
should be changed, because a `nilmtool extract` is just about the
|
||||
same speed as `curl -q .../stream/extract`!
|
||||
|
||||
- `client.StreamInserter.insert_iter` and
|
||||
`client.StreamInserter.insert_line`, which should probably get
|
||||
replaced with block versions. There's no real need to keep
|
||||
updating the timestamp every time we get a new line of data.
|
||||
|
||||
- Finished. Just a single insert() that takes any length string and
|
||||
does very little processing until it's time to send it to the
|
||||
server.
|
||||
|
||||
Timestamps
|
||||
----------
|
||||
|
||||
Timestamps are currently double-precision floats (64 bit). Since the
|
||||
mantissa is 53-bit, this can only represent about 15-17 significant
|
||||
figures, and microsecond Unix timestamps like 1222333444.000111 are
|
||||
already 16 significant figures. Rounding is therefore an issue;
|
||||
it's hard to sure that converting from ASCII, then back to ASCII,
|
||||
will always give the same result.
|
||||
|
||||
Also, if the client provides a floating point value like 1.9999999999,
|
||||
we need to be careful that we don't store it as 1.9999999999 but later
|
||||
print it as 2.000000, because then round-trips change the data.
|
||||
|
||||
Possible solutions:
|
||||
|
||||
- When the client provides a floating point value to the server,
|
||||
always round to the 6th decimal digit before verifying & storing.
|
||||
Good for compatibility and simplicity. But still might have rounding
|
||||
issues, and clients will also need to round when doing their own
|
||||
verification. Having every piece of code need to know which digit
|
||||
to round at is not ideal.
|
||||
|
||||
- Always store int64 timestamps on the server, representing
|
||||
microseconds since epoch. int64 timestamps are used in all HTTP
|
||||
parameters, in insert/extract ASCII strings, client API, commandline
|
||||
raw timestamps, etc. Pretty big change.
|
||||
|
||||
This is what we'll go with...
|
||||
|
||||
- Client programs that interpret the timestamps as doubles instead
|
||||
of ints will remain accurate until 2^53 microseconds, or year
|
||||
2255.
|
||||
|
||||
- On insert, maybe it's OK to send floating point microsecond values
|
||||
(1234567890123456.0), just to cope with clients that want to print
|
||||
everything as a double. Server could try parsing as int64, and if
|
||||
that fails, parse as double and truncate to int64. However, this
|
||||
wouldn't catch imprecise inputs like "1.23456789012e+15". But
|
||||
maybe that can just be ignored; it's likely to cause a
|
||||
non-monotonic error at the client.
|
||||
|
||||
- Timestamps like 1234567890.123456 never show up anywhere, except
|
||||
for interfacing to datetime_tz etc. Command line "raw timestamps"
|
||||
are always printed as int64 values, and a new format
|
||||
"@1234567890123456" is added to the parser for specifying them
|
||||
exactly.
|
||||
|
||||
Binary interface
|
||||
----------------
|
||||
|
||||
The ASCII interface is too slow for high-bandwidth processing, like
|
||||
sinefits, prep, etc. A binary interface was added so that you can
|
||||
extract the raw binary out of the bulkdata storage. This binary is
|
||||
a little-endian format, e.g. in C a uint16_6 stream would be:
|
||||
|
||||
#include <endian.h>
|
||||
#include <stdint.h>
|
||||
struct {
|
||||
int64_t timestamp_le;
|
||||
uint16_t data_le[6];
|
||||
} __attribute__((packed));
|
||||
|
||||
Remember to byteswap (with e.g. `letoh` in C)!
|
||||
|
||||
This interface is used by the new `nilmdb.client.numpyclient.NumpyClient`
|
||||
class, which is a subclass of the normal `nilmcb.client.client.Client`
|
||||
and has all of the same functions. It adds three new functions:
|
||||
|
||||
- `stream_extract_numpy` to extract data as a Numpy array
|
||||
|
||||
- `stream_insert_numpy` to insert data as a Numpy array
|
||||
|
||||
- `stream_insert_numpy_context` is the context manager for
|
||||
incrementally inserting data
|
||||
|
||||
It is significantly faster! It is about 20 times faster to decimate a
|
||||
stream with `nilm-decimate` when the filter code is using the new
|
||||
binary/numpy interface.
|
||||
|
||||
|
||||
WSGI interface & chunked requests
|
||||
---------------------------------
|
||||
|
||||
mod_wsgi requires "WSGIChunkedRequest On" to handle
|
||||
"Transfer-encoding: Chunked" requests. However, `/stream/insert`
|
||||
doesn't handle this correctly right now, because:
|
||||
|
||||
- The `cherrypy.request.body.read()` call needs to be fixed for chunked requests
|
||||
|
||||
- We don't want to just buffer endlessly in the server, and it will
|
||||
require some thought on how to handle data in chunks (what to do about
|
||||
interval endpoints).
|
||||
|
||||
It is probably better to just keep the endpoint management on the client
|
||||
side, so leave "WSGIChunkedRequest off" for now.
|
||||
|
||||
|
||||
Unicode & character encoding
|
||||
----------------------------
|
||||
|
||||
Stream data is passed back and forth as raw `bytes` objects in most
|
||||
places, including the `nilmdb.client` and command-line interfaces.
|
||||
This is done partially for performance reasons, and partially to
|
||||
support the binary insert/extract options, where character-set encoding
|
||||
would not apply.
|
||||
|
||||
For the HTTP server, the raw bytes transferred over HTTP are interpreted
|
||||
as follows:
|
||||
- For `/stream/insert`, the client-provided `Content-Type` is ignored,
|
||||
and the data is read as if it were `application/octet-stream`.
|
||||
- For `/stream/extract`, the returned data is `application/octet-stream`.
|
||||
- All other endpoints communicate via JSON, which is specified to always
|
||||
be encoded as UTF-8. This includes:
|
||||
- `/version`
|
||||
- `/dbinfo`
|
||||
- `/stream/list`
|
||||
- `/stream/create`
|
||||
- `/stream/destroy`
|
||||
- `/stream/rename`
|
||||
- `/stream/get_metadata`
|
||||
- `/stream/set_metadata`
|
||||
- `/stream/update_metadata`
|
||||
- `/stream/remove`
|
||||
- `/stream/intervals`
|
32
docs/wsgi.md
Normal file
32
docs/wsgi.md
Normal file
|
@ -0,0 +1,32 @@
|
|||
WSGI Application in Apache
|
||||
--------------------------
|
||||
|
||||
Install `apache2` and `libapache2-mod-wsgi`
|
||||
|
||||
We'll set up the database server at URL `http://myhost.com/nilmdb`.
|
||||
The database will be stored in `/home/nilm/db`, and the process will
|
||||
run as user `nilm`, group `nilm`.
|
||||
|
||||
First, create a WSGI script `/home/nilm/nilmdb.wsgi` containing:
|
||||
|
||||
import nilmdb.server
|
||||
application = nilmdb.server.wsgi_application("/home/nilm/db", "/nilmdb")
|
||||
|
||||
The first parameter is the local filesystem path, and the second
|
||||
parameter is the path part of the URL.
|
||||
|
||||
Then, set up Apache with a configuration like:
|
||||
|
||||
<VirtualHost>
|
||||
WSGIScriptAlias /nilmdb /home/nilm/nilmdb.wsgi
|
||||
WSGIDaemonProcess nilmdb-procgroup threads=32 user=nilm group=nilm
|
||||
<Location /nilmdb>
|
||||
WSGIProcessGroup nilmdb-procgroup
|
||||
WSGIApplicationGroup nilmdb-appgroup
|
||||
|
||||
# Access control example:
|
||||
Order deny,allow
|
||||
Deny from all
|
||||
Allow from 1.2.3.4
|
||||
</Location>
|
||||
</VirtualHost>
|
50
extras/fix-oversize-files.py
Normal file
50
extras/fix-oversize-files.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import pickle
|
||||
import argparse
|
||||
import fcntl
|
||||
import re
|
||||
from nilmdb.client.numpyclient import layout_to_dtype
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = """
|
||||
Fix database corruption where binary writes caused too much data to be
|
||||
written to the file. Truncates files to the correct length. This was
|
||||
fixed by b98ff1331a515ad47fd3203615e835b529b039f9.
|
||||
""")
|
||||
parser.add_argument("path", action="store", help='Database root path')
|
||||
parser.add_argument("-y", "--yes", action="store_true", help='Fix them')
|
||||
args = parser.parse_args()
|
||||
|
||||
lock = os.path.join(args.path, "data.lock")
|
||||
with open(lock, "w") as f:
|
||||
fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
|
||||
fix = {}
|
||||
|
||||
for (path, dirs, files) in os.walk(args.path):
|
||||
if "_format" in files:
|
||||
with open(os.path.join(path, "_format")) as format:
|
||||
fmt = pickle.load(format)
|
||||
rowsize = layout_to_dtype(fmt["layout"]).itemsize
|
||||
maxsize = rowsize * fmt["rows_per_file"]
|
||||
fix[path] = maxsize
|
||||
if maxsize < 128000000: # sanity check
|
||||
raise Exception("bad maxsize " + str(maxsize))
|
||||
|
||||
for fixpath in fix:
|
||||
for (path, dirs, files) in os.walk(fixpath):
|
||||
for fn in files:
|
||||
if not re.match("^[0-9a-f]{4,}$", fn):
|
||||
continue
|
||||
fn = os.path.join(path, fn)
|
||||
size = os.path.getsize(fn)
|
||||
maxsize = fix[fixpath]
|
||||
if size > maxsize:
|
||||
diff = size - maxsize
|
||||
print(diff, "too big:", fn)
|
||||
if args.yes:
|
||||
with open(fn, "a+") as dbfile:
|
||||
dbfile.truncate(maxsize)
|
20
extras/nilmtool-bash-completion.sh
Normal file
20
extras/nilmtool-bash-completion.sh
Normal file
|
@ -0,0 +1,20 @@
|
|||
# To enable bash completion:
|
||||
#
|
||||
# 1. Ensure python-argcomplete is installed:
|
||||
# pip install argcomplete
|
||||
# 2. Source this file:
|
||||
# . nilmtool-bash-completion.sh
|
||||
|
||||
_nilmtool_argcomplete() {
|
||||
local IFS=$(printf "\013")
|
||||
COMPREPLY=( $(IFS="$IFS" \
|
||||
COMP_LINE="$COMP_LINE" \
|
||||
COMP_WORDBREAKS="$COMP_WORDBREAKS" \
|
||||
COMP_POINT="$COMP_POINT" \
|
||||
_ARGCOMPLETE=1 \
|
||||
"$1" 8>&1 9>&2 1>/dev/null 2>/dev/null) )
|
||||
if [[ $? != 0 ]]; then
|
||||
unset COMPREPLY
|
||||
fi
|
||||
}
|
||||
complete -o nospace -F _nilmtool_argcomplete nilmtool
|
|
@ -1,2 +1,5 @@
|
|||
from nilmdb.interval import *
|
||||
from nilmdb.fileinterval import *
|
||||
"""Main NilmDB import"""
|
||||
|
||||
from ._version import get_versions
|
||||
__version__ = get_versions()['version']
|
||||
del get_versions
|
||||
|
|
520
nilmdb/_version.py
Normal file
520
nilmdb/_version.py
Normal file
|
@ -0,0 +1,520 @@
|
|||
|
||||
# This file helps to compute a version number in source trees obtained from
|
||||
# git-archive tarball (such as those provided by githubs download-from-tag
|
||||
# feature). Distribution tarballs (built by setup.py sdist) and build
|
||||
# directories (produced by setup.py build) will contain a much shorter file
|
||||
# that just contains the computed version number.
|
||||
|
||||
# This file is released into the public domain. Generated by
|
||||
# versioneer-0.18 (https://github.com/warner/python-versioneer)
|
||||
|
||||
"""Git implementation of _version.py."""
|
||||
|
||||
import errno
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def get_keywords():
|
||||
"""Get the keywords needed to look up the version information."""
|
||||
# these strings will be replaced by git during git-archive.
|
||||
# setup.py/versioneer.py will grep for the variable names, so they must
|
||||
# each be defined on a line of their own. _version.py will just call
|
||||
# get_keywords().
|
||||
git_refnames = "$Format:%d$"
|
||||
git_full = "$Format:%H$"
|
||||
git_date = "$Format:%ci$"
|
||||
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
|
||||
return keywords
|
||||
|
||||
|
||||
class VersioneerConfig:
|
||||
"""Container for Versioneer configuration parameters."""
|
||||
|
||||
|
||||
def get_config():
|
||||
"""Create, populate and return the VersioneerConfig() object."""
|
||||
# these strings are filled in when 'setup.py versioneer' creates
|
||||
# _version.py
|
||||
cfg = VersioneerConfig()
|
||||
cfg.VCS = "git"
|
||||
cfg.style = "pep440"
|
||||
cfg.tag_prefix = "nilmdb-"
|
||||
cfg.parentdir_prefix = "nilmdb-"
|
||||
cfg.versionfile_source = "nilmdb/_version.py"
|
||||
cfg.verbose = False
|
||||
return cfg
|
||||
|
||||
|
||||
class NotThisMethod(Exception):
|
||||
"""Exception raised if a method is not valid for the current scenario."""
|
||||
|
||||
|
||||
LONG_VERSION_PY = {}
|
||||
HANDLERS = {}
|
||||
|
||||
|
||||
def register_vcs_handler(vcs, method): # decorator
|
||||
"""Decorator to mark a method as the handler for a particular VCS."""
|
||||
def decorate(f):
|
||||
"""Store f in HANDLERS[vcs][method]."""
|
||||
if vcs not in HANDLERS:
|
||||
HANDLERS[vcs] = {}
|
||||
HANDLERS[vcs][method] = f
|
||||
return f
|
||||
return decorate
|
||||
|
||||
|
||||
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
|
||||
env=None):
|
||||
"""Call the given command(s)."""
|
||||
assert isinstance(commands, list)
|
||||
p = None
|
||||
for c in commands:
|
||||
try:
|
||||
dispcmd = str([c] + args)
|
||||
# remember shell=False, so use git.cmd on windows, not just git
|
||||
p = subprocess.Popen([c] + args, cwd=cwd, env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=(subprocess.PIPE if hide_stderr
|
||||
else None))
|
||||
break
|
||||
except EnvironmentError:
|
||||
e = sys.exc_info()[1]
|
||||
if e.errno == errno.ENOENT:
|
||||
continue
|
||||
if verbose:
|
||||
print("unable to run %s" % dispcmd)
|
||||
print(e)
|
||||
return None, None
|
||||
else:
|
||||
if verbose:
|
||||
print("unable to find command, tried %s" % (commands,))
|
||||
return None, None
|
||||
stdout = p.communicate()[0].strip()
|
||||
if sys.version_info[0] >= 3:
|
||||
stdout = stdout.decode()
|
||||
if p.returncode != 0:
|
||||
if verbose:
|
||||
print("unable to run %s (error)" % dispcmd)
|
||||
print("stdout was %s" % stdout)
|
||||
return None, p.returncode
|
||||
return stdout, p.returncode
|
||||
|
||||
|
||||
def versions_from_parentdir(parentdir_prefix, root, verbose):
|
||||
"""Try to determine the version from the parent directory name.
|
||||
|
||||
Source tarballs conventionally unpack into a directory that includes both
|
||||
the project name and a version string. We will also support searching up
|
||||
two directory levels for an appropriately named parent directory
|
||||
"""
|
||||
rootdirs = []
|
||||
|
||||
for i in range(3):
|
||||
dirname = os.path.basename(root)
|
||||
if dirname.startswith(parentdir_prefix):
|
||||
return {"version": dirname[len(parentdir_prefix):],
|
||||
"full-revisionid": None,
|
||||
"dirty": False, "error": None, "date": None}
|
||||
else:
|
||||
rootdirs.append(root)
|
||||
root = os.path.dirname(root) # up a level
|
||||
|
||||
if verbose:
|
||||
print("Tried directories %s but none started with prefix %s" %
|
||||
(str(rootdirs), parentdir_prefix))
|
||||
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
|
||||
|
||||
|
||||
@register_vcs_handler("git", "get_keywords")
|
||||
def git_get_keywords(versionfile_abs):
|
||||
"""Extract version information from the given file."""
|
||||
# the code embedded in _version.py can just fetch the value of these
|
||||
# keywords. When used from setup.py, we don't want to import _version.py,
|
||||
# so we do it with a regexp instead. This function is not used from
|
||||
# _version.py.
|
||||
keywords = {}
|
||||
try:
|
||||
f = open(versionfile_abs, "r")
|
||||
for line in f.readlines():
|
||||
if line.strip().startswith("git_refnames ="):
|
||||
mo = re.search(r'=\s*"(.*)"', line)
|
||||
if mo:
|
||||
keywords["refnames"] = mo.group(1)
|
||||
if line.strip().startswith("git_full ="):
|
||||
mo = re.search(r'=\s*"(.*)"', line)
|
||||
if mo:
|
||||
keywords["full"] = mo.group(1)
|
||||
if line.strip().startswith("git_date ="):
|
||||
mo = re.search(r'=\s*"(.*)"', line)
|
||||
if mo:
|
||||
keywords["date"] = mo.group(1)
|
||||
f.close()
|
||||
except EnvironmentError:
|
||||
pass
|
||||
return keywords
|
||||
|
||||
|
||||
@register_vcs_handler("git", "keywords")
|
||||
def git_versions_from_keywords(keywords, tag_prefix, verbose):
|
||||
"""Get version information from git keywords."""
|
||||
if not keywords:
|
||||
raise NotThisMethod("no keywords at all, weird")
|
||||
date = keywords.get("date")
|
||||
if date is not None:
|
||||
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
|
||||
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
|
||||
# -like" string, which we must then edit to make compliant), because
|
||||
# it's been around since git-1.5.3, and it's too difficult to
|
||||
# discover which version we're using, or to work around using an
|
||||
# older one.
|
||||
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
|
||||
refnames = keywords["refnames"].strip()
|
||||
if refnames.startswith("$Format"):
|
||||
if verbose:
|
||||
print("keywords are unexpanded, not using")
|
||||
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
|
||||
refs = set([r.strip() for r in refnames.strip("()").split(",")])
|
||||
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
|
||||
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
|
||||
TAG = "tag: "
|
||||
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
|
||||
if not tags:
|
||||
# Either we're using git < 1.8.3, or there really are no tags. We use
|
||||
# a heuristic: assume all version tags have a digit. The old git %d
|
||||
# expansion behaves like git log --decorate=short and strips out the
|
||||
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
|
||||
# between branches and tags. By ignoring refnames without digits, we
|
||||
# filter out many common branch names like "release" and
|
||||
# "stabilization", as well as "HEAD" and "master".
|
||||
tags = set([r for r in refs if re.search(r'\d', r)])
|
||||
if verbose:
|
||||
print("discarding '%s', no digits" % ",".join(refs - tags))
|
||||
if verbose:
|
||||
print("likely tags: %s" % ",".join(sorted(tags)))
|
||||
for ref in sorted(tags):
|
||||
# sorting will prefer e.g. "2.0" over "2.0rc1"
|
||||
if ref.startswith(tag_prefix):
|
||||
r = ref[len(tag_prefix):]
|
||||
if verbose:
|
||||
print("picking %s" % r)
|
||||
return {"version": r,
|
||||
"full-revisionid": keywords["full"].strip(),
|
||||
"dirty": False, "error": None,
|
||||
"date": date}
|
||||
# no suitable tags, so version is "0+unknown", but full hex is still there
|
||||
if verbose:
|
||||
print("no suitable tags, using unknown + full revision id")
|
||||
return {"version": "0+unknown",
|
||||
"full-revisionid": keywords["full"].strip(),
|
||||
"dirty": False, "error": "no suitable tags", "date": None}
|
||||
|
||||
|
||||
@register_vcs_handler("git", "pieces_from_vcs")
|
||||
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
|
||||
"""Get version from 'git describe' in the root of the source tree.
|
||||
|
||||
This only gets called if the git-archive 'subst' keywords were *not*
|
||||
expanded, and _version.py hasn't already been rewritten with a short
|
||||
version string, meaning we're inside a checked out source tree.
|
||||
"""
|
||||
GITS = ["git"]
|
||||
if sys.platform == "win32":
|
||||
GITS = ["git.cmd", "git.exe"]
|
||||
|
||||
out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
|
||||
hide_stderr=True)
|
||||
if rc != 0:
|
||||
if verbose:
|
||||
print("Directory %s not under git control" % root)
|
||||
raise NotThisMethod("'git rev-parse --git-dir' returned error")
|
||||
|
||||
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
|
||||
# if there isn't one, this yields HEX[-dirty] (no NUM)
|
||||
describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
|
||||
"--always", "--long",
|
||||
"--match", "%s*" % tag_prefix],
|
||||
cwd=root)
|
||||
# --long was added in git-1.5.5
|
||||
if describe_out is None:
|
||||
raise NotThisMethod("'git describe' failed")
|
||||
describe_out = describe_out.strip()
|
||||
full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
|
||||
if full_out is None:
|
||||
raise NotThisMethod("'git rev-parse' failed")
|
||||
full_out = full_out.strip()
|
||||
|
||||
pieces = {}
|
||||
pieces["long"] = full_out
|
||||
pieces["short"] = full_out[:7] # maybe improved later
|
||||
pieces["error"] = None
|
||||
|
||||
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
|
||||
# TAG might have hyphens.
|
||||
git_describe = describe_out
|
||||
|
||||
# look for -dirty suffix
|
||||
dirty = git_describe.endswith("-dirty")
|
||||
pieces["dirty"] = dirty
|
||||
if dirty:
|
||||
git_describe = git_describe[:git_describe.rindex("-dirty")]
|
||||
|
||||
# now we have TAG-NUM-gHEX or HEX
|
||||
|
||||
if "-" in git_describe:
|
||||
# TAG-NUM-gHEX
|
||||
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
|
||||
if not mo:
|
||||
# unparseable. Maybe git-describe is misbehaving?
|
||||
pieces["error"] = ("unable to parse git-describe output: '%s'"
|
||||
% describe_out)
|
||||
return pieces
|
||||
|
||||
# tag
|
||||
full_tag = mo.group(1)
|
||||
if not full_tag.startswith(tag_prefix):
|
||||
if verbose:
|
||||
fmt = "tag '%s' doesn't start with prefix '%s'"
|
||||
print(fmt % (full_tag, tag_prefix))
|
||||
pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
|
||||
% (full_tag, tag_prefix))
|
||||
return pieces
|
||||
pieces["closest-tag"] = full_tag[len(tag_prefix):]
|
||||
|
||||
# distance: number of commits since tag
|
||||
pieces["distance"] = int(mo.group(2))
|
||||
|
||||
# commit: short hex revision ID
|
||||
pieces["short"] = mo.group(3)
|
||||
|
||||
else:
|
||||
# HEX: no tags
|
||||
pieces["closest-tag"] = None
|
||||
count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
|
||||
cwd=root)
|
||||
pieces["distance"] = int(count_out) # total number of commits
|
||||
|
||||
# commit date: see ISO-8601 comment in git_versions_from_keywords()
|
||||
date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
|
||||
cwd=root)[0].strip()
|
||||
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
|
||||
|
||||
return pieces
|
||||
|
||||
|
||||
def plus_or_dot(pieces):
|
||||
"""Return a + if we don't already have one, else return a ."""
|
||||
if "+" in pieces.get("closest-tag", ""):
|
||||
return "."
|
||||
return "+"
|
||||
|
||||
|
||||
def render_pep440(pieces):
|
||||
"""Build up version string, with post-release "local version identifier".
|
||||
|
||||
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
|
||||
get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
|
||||
|
||||
Exceptions:
|
||||
1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"] or pieces["dirty"]:
|
||||
rendered += plus_or_dot(pieces)
|
||||
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dirty"
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0+untagged.%d.g%s" % (pieces["distance"],
|
||||
pieces["short"])
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dirty"
|
||||
return rendered
|
||||
|
||||
|
||||
def render_pep440_pre(pieces):
|
||||
"""TAG[.post.devDISTANCE] -- No -dirty.
|
||||
|
||||
Exceptions:
|
||||
1: no tags. 0.post.devDISTANCE
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"]:
|
||||
rendered += ".post.dev%d" % pieces["distance"]
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0.post.dev%d" % pieces["distance"]
|
||||
return rendered
|
||||
|
||||
|
||||
def render_pep440_post(pieces):
|
||||
"""TAG[.postDISTANCE[.dev0]+gHEX] .
|
||||
|
||||
The ".dev0" means dirty. Note that .dev0 sorts backwards
|
||||
(a dirty tree will appear "older" than the corresponding clean one),
|
||||
but you shouldn't be releasing software with -dirty anyways.
|
||||
|
||||
Exceptions:
|
||||
1: no tags. 0.postDISTANCE[.dev0]
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"] or pieces["dirty"]:
|
||||
rendered += ".post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
rendered += plus_or_dot(pieces)
|
||||
rendered += "g%s" % pieces["short"]
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0.post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
rendered += "+g%s" % pieces["short"]
|
||||
return rendered
|
||||
|
||||
|
||||
def render_pep440_old(pieces):
|
||||
"""TAG[.postDISTANCE[.dev0]] .
|
||||
|
||||
The ".dev0" means dirty.
|
||||
|
||||
Eexceptions:
|
||||
1: no tags. 0.postDISTANCE[.dev0]
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"] or pieces["dirty"]:
|
||||
rendered += ".post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0.post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
return rendered
|
||||
|
||||
|
||||
def render_git_describe(pieces):
|
||||
"""TAG[-DISTANCE-gHEX][-dirty].
|
||||
|
||||
Like 'git describe --tags --dirty --always'.
|
||||
|
||||
Exceptions:
|
||||
1: no tags. HEX[-dirty] (note: no 'g' prefix)
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"]:
|
||||
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
|
||||
else:
|
||||
# exception #1
|
||||
rendered = pieces["short"]
|
||||
if pieces["dirty"]:
|
||||
rendered += "-dirty"
|
||||
return rendered
|
||||
|
||||
|
||||
def render_git_describe_long(pieces):
|
||||
"""TAG-DISTANCE-gHEX[-dirty].
|
||||
|
||||
Like 'git describe --tags --dirty --always -long'.
|
||||
The distance/hash is unconditional.
|
||||
|
||||
Exceptions:
|
||||
1: no tags. HEX[-dirty] (note: no 'g' prefix)
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
|
||||
else:
|
||||
# exception #1
|
||||
rendered = pieces["short"]
|
||||
if pieces["dirty"]:
|
||||
rendered += "-dirty"
|
||||
return rendered
|
||||
|
||||
|
||||
def render(pieces, style):
|
||||
"""Render the given version pieces into the requested style."""
|
||||
if pieces["error"]:
|
||||
return {"version": "unknown",
|
||||
"full-revisionid": pieces.get("long"),
|
||||
"dirty": None,
|
||||
"error": pieces["error"],
|
||||
"date": None}
|
||||
|
||||
if not style or style == "default":
|
||||
style = "pep440" # the default
|
||||
|
||||
if style == "pep440":
|
||||
rendered = render_pep440(pieces)
|
||||
elif style == "pep440-pre":
|
||||
rendered = render_pep440_pre(pieces)
|
||||
elif style == "pep440-post":
|
||||
rendered = render_pep440_post(pieces)
|
||||
elif style == "pep440-old":
|
||||
rendered = render_pep440_old(pieces)
|
||||
elif style == "git-describe":
|
||||
rendered = render_git_describe(pieces)
|
||||
elif style == "git-describe-long":
|
||||
rendered = render_git_describe_long(pieces)
|
||||
else:
|
||||
raise ValueError("unknown style '%s'" % style)
|
||||
|
||||
return {"version": rendered, "full-revisionid": pieces["long"],
|
||||
"dirty": pieces["dirty"], "error": None,
|
||||
"date": pieces.get("date")}
|
||||
|
||||
|
||||
def get_versions():
|
||||
"""Get version information or return default if unable to do so."""
|
||||
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
|
||||
# __file__, we can work backwards from there to the root. Some
|
||||
# py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
|
||||
# case we can only use expanded keywords.
|
||||
|
||||
cfg = get_config()
|
||||
verbose = cfg.verbose
|
||||
|
||||
try:
|
||||
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
|
||||
verbose)
|
||||
except NotThisMethod:
|
||||
pass
|
||||
|
||||
try:
|
||||
root = os.path.realpath(__file__)
|
||||
# versionfile_source is the relative path from the top of the source
|
||||
# tree (where the .git directory might live) to this file. Invert
|
||||
# this to find the root from __file__.
|
||||
for i in cfg.versionfile_source.split('/'):
|
||||
root = os.path.dirname(root)
|
||||
except NameError:
|
||||
return {"version": "0+unknown", "full-revisionid": None,
|
||||
"dirty": None,
|
||||
"error": "unable to find root of source tree",
|
||||
"date": None}
|
||||
|
||||
try:
|
||||
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
|
||||
return render(pieces, cfg.style)
|
||||
except NotThisMethod:
|
||||
pass
|
||||
|
||||
try:
|
||||
if cfg.parentdir_prefix:
|
||||
return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
|
||||
except NotThisMethod:
|
||||
pass
|
||||
|
||||
return {"version": "0+unknown", "full-revisionid": None,
|
||||
"dirty": None,
|
||||
"error": "unable to compute version", "date": None}
|
4
nilmdb/client/__init__.py
Normal file
4
nilmdb/client/__init__.py
Normal file
|
@ -0,0 +1,4 @@
|
|||
"""nilmdb.client"""
|
||||
|
||||
from nilmdb.client.client import Client
|
||||
from nilmdb.client.errors import ClientError, ServerError, Error
|
477
nilmdb/client/client.py
Normal file
477
nilmdb/client/client.py
Normal file
|
@ -0,0 +1,477 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""Class for performing HTTP client requests via libcurl"""
|
||||
|
||||
import json
|
||||
import contextlib
|
||||
|
||||
import nilmdb.utils
|
||||
import nilmdb.client.httpclient
|
||||
from nilmdb.client.errors import ClientError
|
||||
from nilmdb.utils.time import timestamp_to_string, string_to_timestamp
|
||||
|
||||
|
||||
def extract_timestamp(line):
|
||||
"""Extract just the timestamp from a line of data text"""
|
||||
return string_to_timestamp(line.split()[0])
|
||||
|
||||
|
||||
class Client():
|
||||
"""Main client interface to the Nilm database."""
|
||||
|
||||
def __init__(self, url, post_json=False):
|
||||
"""Initialize client with given URL. If post_json is true,
|
||||
POST requests are sent with Content-Type 'application/json'
|
||||
instead of the default 'x-www-form-urlencoded'."""
|
||||
self.http = nilmdb.client.httpclient.HTTPClient(url, post_json)
|
||||
self.post_json = post_json
|
||||
|
||||
# __enter__/__exit__ allow this class to be a context manager
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.close()
|
||||
|
||||
def _json_post_param(self, data):
|
||||
"""Return compact json-encoded version of parameter"""
|
||||
if self.post_json:
|
||||
# If we're posting as JSON, we don't need to encode it further here
|
||||
return data
|
||||
return json.dumps(data, separators=(',', ':'))
|
||||
|
||||
def close(self):
|
||||
"""Close the connection; safe to call multiple times"""
|
||||
self.http.close()
|
||||
|
||||
def geturl(self):
|
||||
"""Return the URL we're using"""
|
||||
return self.http.baseurl
|
||||
|
||||
def version(self):
|
||||
"""Return server version"""
|
||||
return self.http.get("version")
|
||||
|
||||
def dbinfo(self):
|
||||
"""Return server database info (path, size, free space)
|
||||
as a dictionary."""
|
||||
return self.http.get("dbinfo")
|
||||
|
||||
def stream_list(self, path=None, layout=None, extended=False):
|
||||
"""Return a sorted list of [path, layout] lists. If 'path' or
|
||||
'layout' are specified, only return streams that match those
|
||||
exact values. If 'extended' is True, the returned lists have
|
||||
extended info, e.g.: [path, layout, extent_min, extent_max,
|
||||
total_rows, total_seconds."""
|
||||
params = {}
|
||||
if path is not None:
|
||||
params["path"] = path
|
||||
if layout is not None:
|
||||
params["layout"] = layout
|
||||
if extended:
|
||||
params["extended"] = 1
|
||||
streams = self.http.get("stream/list", params)
|
||||
return nilmdb.utils.sort.sort_human(streams, key=lambda s: s[0])
|
||||
|
||||
def stream_get_metadata(self, path, keys=None):
|
||||
"""Get stream metadata"""
|
||||
params = {"path": path}
|
||||
if keys is not None:
|
||||
params["key"] = keys
|
||||
return self.http.get("stream/get_metadata", params)
|
||||
|
||||
def stream_set_metadata(self, path, data):
|
||||
"""Set stream metadata from a dictionary, replacing all existing
|
||||
metadata."""
|
||||
params = {
|
||||
"path": path,
|
||||
"data": self._json_post_param(data)
|
||||
}
|
||||
return self.http.post("stream/set_metadata", params)
|
||||
|
||||
def stream_update_metadata(self, path, data):
|
||||
"""Update stream metadata from a dictionary"""
|
||||
params = {
|
||||
"path": path,
|
||||
"data": self._json_post_param(data)
|
||||
}
|
||||
return self.http.post("stream/update_metadata", params)
|
||||
|
||||
def stream_create(self, path, layout):
|
||||
"""Create a new stream"""
|
||||
params = {
|
||||
"path": path,
|
||||
"layout": layout
|
||||
}
|
||||
return self.http.post("stream/create", params)
|
||||
|
||||
def stream_destroy(self, path):
|
||||
"""Delete stream. Fails if any data is still present."""
|
||||
params = {
|
||||
"path": path
|
||||
}
|
||||
return self.http.post("stream/destroy", params)
|
||||
|
||||
def stream_rename(self, oldpath, newpath):
|
||||
"""Rename a stream."""
|
||||
params = {
|
||||
"oldpath": oldpath,
|
||||
"newpath": newpath
|
||||
}
|
||||
return self.http.post("stream/rename", params)
|
||||
|
||||
def stream_remove(self, path, start=None, end=None):
|
||||
"""Remove data from the specified time range"""
|
||||
params = {
|
||||
"path": path
|
||||
}
|
||||
if start is not None:
|
||||
params["start"] = timestamp_to_string(start)
|
||||
if end is not None:
|
||||
params["end"] = timestamp_to_string(end)
|
||||
total = 0
|
||||
for count in self.http.post_gen("stream/remove", params):
|
||||
total += int(count)
|
||||
return total
|
||||
|
||||
@contextlib.contextmanager
|
||||
def stream_insert_context(self, path, start=None, end=None):
|
||||
"""Return a context manager that allows data to be efficiently
|
||||
inserted into a stream in a piecewise manner. Data is
|
||||
provided as ASCII lines, and is aggregated and sent to the
|
||||
server in larger or smaller chunks as necessary. Data lines
|
||||
must match the database layout for the given path, and end
|
||||
with a newline.
|
||||
|
||||
Example:
|
||||
with client.stream_insert_context('/path', start, end) as ctx:
|
||||
ctx.insert('1234567890000000 1 2 3 4\\n')
|
||||
ctx.insert('1234567891000000 1 2 3 4\\n')
|
||||
|
||||
For more details, see help for nilmdb.client.client.StreamInserter
|
||||
|
||||
This may make multiple requests to the server, if the data is
|
||||
large enough or enough time has passed between insertions.
|
||||
"""
|
||||
ctx = StreamInserter(self, path, start, end)
|
||||
yield ctx
|
||||
ctx.finalize()
|
||||
ctx.destroy()
|
||||
|
||||
def stream_insert(self, path, data, start=None, end=None):
|
||||
"""Insert rows of data into a stream. data should be a string
|
||||
or iterable that provides ASCII data that matches the database
|
||||
layout for path. Data is passed through stream_insert_context,
|
||||
so it will be broken into reasonably-sized chunks and
|
||||
start/end will be deduced if missing."""
|
||||
with self.stream_insert_context(path, start, end) as ctx:
|
||||
if isinstance(data, bytes):
|
||||
ctx.insert(data)
|
||||
else:
|
||||
for chunk in data:
|
||||
ctx.insert(chunk)
|
||||
return ctx.last_response
|
||||
|
||||
def stream_insert_block(self, path, data, start, end, binary=False):
|
||||
"""Insert a single fixed block of data into the stream. It is
|
||||
sent directly to the server in one block with no further
|
||||
processing.
|
||||
|
||||
If 'binary' is True, provide raw binary data in little-endian
|
||||
format matching the path layout, including an int64 timestamp.
|
||||
Otherwise, provide ASCII data matching the layout."""
|
||||
params = {
|
||||
"path": path,
|
||||
"start": timestamp_to_string(start),
|
||||
"end": timestamp_to_string(end),
|
||||
}
|
||||
if binary:
|
||||
params["binary"] = 1
|
||||
return self.http.put("stream/insert", data, params)
|
||||
|
||||
def stream_intervals(self, path, start=None, end=None, diffpath=None):
|
||||
"""
|
||||
Return a generator that yields each stream interval.
|
||||
|
||||
If 'diffpath' is not None, yields only interval ranges that are
|
||||
present in 'path' but not in 'diffpath'.
|
||||
"""
|
||||
params = {
|
||||
"path": path
|
||||
}
|
||||
if diffpath is not None:
|
||||
params["diffpath"] = diffpath
|
||||
if start is not None:
|
||||
params["start"] = timestamp_to_string(start)
|
||||
if end is not None:
|
||||
params["end"] = timestamp_to_string(end)
|
||||
return self.http.get_gen("stream/intervals", params)
|
||||
|
||||
def stream_extract(self, path, start=None, end=None,
|
||||
count=False, markup=False, binary=False):
|
||||
"""
|
||||
Extract data from a stream. Returns a generator that yields
|
||||
lines of ASCII-formatted data that matches the database
|
||||
layout for the given path.
|
||||
|
||||
If 'count' is True, return a count of matching data points
|
||||
rather than the actual data. The output format is unchanged.
|
||||
|
||||
If 'markup' is True, include comments in the returned data
|
||||
that indicate interval starts and ends.
|
||||
|
||||
If 'binary' is True, return chunks of raw binary data, rather
|
||||
than lines of ASCII-formatted data. Raw binary data is
|
||||
little-endian and matches the database types (including an
|
||||
int64 timestamp).
|
||||
"""
|
||||
params = {
|
||||
"path": path,
|
||||
}
|
||||
if start is not None:
|
||||
params["start"] = timestamp_to_string(start)
|
||||
if end is not None:
|
||||
params["end"] = timestamp_to_string(end)
|
||||
if count:
|
||||
params["count"] = 1
|
||||
if markup:
|
||||
params["markup"] = 1
|
||||
if binary:
|
||||
params["binary"] = 1
|
||||
return self.http.get_gen("stream/extract", params, binary=binary)
|
||||
|
||||
def stream_count(self, path, start=None, end=None):
|
||||
"""
|
||||
Return the number of rows of data in the stream that satisfy
|
||||
the given timestamps.
|
||||
"""
|
||||
counts = list(self.stream_extract(path, start, end, count=True))
|
||||
return int(counts[0])
|
||||
|
||||
|
||||
class StreamInserter():
|
||||
"""Object returned by stream_insert_context() that manages
|
||||
the insertion of rows of data into a particular path.
|
||||
|
||||
The basic data flow is that we are filling a contiguous interval
|
||||
on the server, with no gaps, that extends from timestamp 'start'
|
||||
to timestamp 'end'. Data timestamps satisfy 'start <= t < end'.
|
||||
|
||||
Data is provided to .insert() as ASCII formatted data separated by
|
||||
newlines. The chunks of data passed to .insert() do not need to
|
||||
match up with the newlines; less or more than one line can be passed.
|
||||
|
||||
1. The first inserted line begins a new interval that starts at
|
||||
'start'. If 'start' is not given, it is deduced from the first
|
||||
line's timestamp.
|
||||
|
||||
2. Subsequent lines go into the same contiguous interval. As lines
|
||||
are inserted, this routine may make multiple insertion requests to
|
||||
the server, but will structure the timestamps to leave no gaps.
|
||||
|
||||
3. The current contiguous interval can be completed by manually
|
||||
calling .finalize(), which the context manager will also do
|
||||
automatically. This will send any remaining data to the server,
|
||||
using the 'end' timestamp to end the interval. If no 'end'
|
||||
was provided, it is deduced from the last timestamp seen,
|
||||
plus a small delta.
|
||||
|
||||
After a .finalize(), inserting new data goes back to step 1.
|
||||
|
||||
.update_start() can be called before step 1 to change the start
|
||||
time for the interval. .update_end() can be called before step 3
|
||||
to change the end time for the interval.
|
||||
"""
|
||||
|
||||
# See design.md for a discussion of how much data to send. This
|
||||
# is a soft limit -- we might send up to twice as much or so
|
||||
_max_data = 2 * 1024 * 1024
|
||||
_max_data_after_send = 64 * 1024
|
||||
|
||||
def __init__(self, client, path, start, end):
|
||||
"""'client' is the client object. 'path' is the database
|
||||
path to insert to. 'start' and 'end' are used for the first
|
||||
contiguous interval and may be None."""
|
||||
self.last_response = None
|
||||
|
||||
self._client = client
|
||||
self._path = path
|
||||
|
||||
# Start and end for the overall contiguous interval we're
|
||||
# filling
|
||||
self._interval_start = start
|
||||
self._interval_end = end
|
||||
|
||||
# Current data we're building up to send. Each string
|
||||
# goes into the array, and gets joined all at once.
|
||||
self._block_data = []
|
||||
self._block_len = 0
|
||||
|
||||
self.destroyed = False
|
||||
|
||||
def destroy(self):
|
||||
"""Ensure this object can't be used again without raising
|
||||
an error"""
|
||||
def error(*args, **kwargs):
|
||||
raise Exception("don't reuse this context object")
|
||||
self._send_block = self.insert = self.finalize = self.send = error
|
||||
|
||||
def insert(self, data):
|
||||
"""Insert a chunk of ASCII formatted data in string form. The
|
||||
overall data must consist of lines terminated by '\\n'."""
|
||||
length = len(data)
|
||||
maxdata = self._max_data
|
||||
|
||||
if length > maxdata:
|
||||
# This could make our buffer more than twice what we
|
||||
# wanted to send, so split it up. This is a bit
|
||||
# inefficient, but the user really shouldn't be providing
|
||||
# this much data at once.
|
||||
for cut in range(0, length, maxdata):
|
||||
self.insert(data[cut:(cut + maxdata)])
|
||||
return
|
||||
|
||||
# Append this string to our list
|
||||
self._block_data.append(data)
|
||||
self._block_len += length
|
||||
|
||||
# Send the block once we have enough data
|
||||
if self._block_len >= maxdata:
|
||||
self._send_block(final=False)
|
||||
if self._block_len >= self._max_data_after_send:
|
||||
raise ValueError("too much data left over after trying"
|
||||
" to send intermediate block; is it"
|
||||
" missing newlines or malformed?")
|
||||
|
||||
def update_start(self, start):
|
||||
"""Update the start time for the next contiguous interval.
|
||||
Call this before starting to insert data for a new interval,
|
||||
for example, after .finalize()"""
|
||||
self._interval_start = start
|
||||
|
||||
def update_end(self, end):
|
||||
"""Update the end time for the current contiguous interval.
|
||||
Call this before .finalize()"""
|
||||
self._interval_end = end
|
||||
|
||||
def finalize(self):
|
||||
"""Stop filling the current contiguous interval.
|
||||
All outstanding data will be sent, and the interval end
|
||||
time of the interval will be taken from the 'end' argument
|
||||
used when initializing this class, or the most recent
|
||||
value passed to update_end(), or the last timestamp plus
|
||||
a small epsilon value if no other endpoint was provided.
|
||||
|
||||
If more data is inserted after a finalize(), it will become
|
||||
part of a new interval and there may be a gap left in-between."""
|
||||
self._send_block(final=True)
|
||||
|
||||
def send(self):
|
||||
"""Send any data that we might have buffered up. Does not affect
|
||||
any other treatment of timestamps or endpoints."""
|
||||
self._send_block(final=False)
|
||||
|
||||
def _get_first_noncomment(self, block):
|
||||
"""Return the (start, end) indices of the first full line in
|
||||
block that isn't a comment, or raise IndexError if
|
||||
there isn't one."""
|
||||
start = 0
|
||||
while True:
|
||||
end = block.find(b'\n', start)
|
||||
if end < 0:
|
||||
raise IndexError
|
||||
if block[start] != b'#'[0]:
|
||||
return (start, (end + 1))
|
||||
start = end + 1
|
||||
|
||||
def _get_last_noncomment(self, block):
|
||||
"""Return the (start, end) indices of the last full line in
|
||||
block[:length] that isn't a comment, or raise IndexError if
|
||||
there isn't one."""
|
||||
end = block.rfind(b'\n')
|
||||
if end <= 0:
|
||||
raise IndexError
|
||||
while True:
|
||||
start = block.rfind(b'\n', 0, end)
|
||||
if block[start + 1] != b'#'[0]:
|
||||
return ((start + 1), end)
|
||||
if start == -1:
|
||||
raise IndexError
|
||||
end = start
|
||||
|
||||
def _send_block(self, final=False):
|
||||
"""Send data currently in the block. The data sent will
|
||||
consist of full lines only, so some might be left over."""
|
||||
# Build the full string to send
|
||||
block = b"".join(self._block_data)
|
||||
|
||||
start_ts = self._interval_start
|
||||
if start_ts is None:
|
||||
# Pull start from the first line
|
||||
try:
|
||||
(spos, epos) = self._get_first_noncomment(block)
|
||||
start_ts = extract_timestamp(block[spos:epos])
|
||||
except (ValueError, IndexError):
|
||||
pass # no timestamp is OK, if we have no data
|
||||
|
||||
if final:
|
||||
# For a final block, it must end in a newline, and the
|
||||
# ending timestamp is either the user-provided end,
|
||||
# or the timestamp of the last line plus epsilon.
|
||||
end_ts = self._interval_end
|
||||
try:
|
||||
if block[-1] != b'\n'[0]:
|
||||
raise ValueError("final block didn't end with a newline")
|
||||
if end_ts is None:
|
||||
(spos, epos) = self._get_last_noncomment(block)
|
||||
end_ts = extract_timestamp(block[spos:epos])
|
||||
end_ts += nilmdb.utils.time.epsilon
|
||||
except (ValueError, IndexError):
|
||||
pass # no timestamp is OK, if we have no data
|
||||
self._block_data = []
|
||||
self._block_len = 0
|
||||
|
||||
# Next block is completely fresh
|
||||
self._interval_start = None
|
||||
self._interval_end = None
|
||||
else:
|
||||
# An intermediate block, e.g. "line1\nline2\nline3\nline4"
|
||||
# We need to save "line3\nline4" for the next block, and
|
||||
# use the timestamp from "line3" as the ending timestamp
|
||||
# for this one.
|
||||
try:
|
||||
(spos, epos) = self._get_last_noncomment(block)
|
||||
end_ts = extract_timestamp(block[spos:epos])
|
||||
except (ValueError, IndexError):
|
||||
# If we found no timestamp, give up; we could send this
|
||||
# block later when we have more data.
|
||||
return
|
||||
if spos == 0:
|
||||
# Not enough data to send an intermediate block
|
||||
return
|
||||
if self._interval_end is not None and end_ts > self._interval_end:
|
||||
# User gave us bad endpoints; send it anyway, and let
|
||||
# the server complain so that the error is the same
|
||||
# as if we hadn't done this chunking.
|
||||
end_ts = self._interval_end
|
||||
self._block_data = [block[spos:]]
|
||||
self._block_len = (epos - spos)
|
||||
block = block[:spos]
|
||||
|
||||
# Next block continues where this one ended
|
||||
self._interval_start = end_ts
|
||||
|
||||
# Double check endpoints
|
||||
if (start_ts is None or end_ts is None) or (start_ts == end_ts):
|
||||
# If the block has no non-comment lines, it's OK
|
||||
try:
|
||||
self._get_first_noncomment(block)
|
||||
except IndexError:
|
||||
return
|
||||
raise ClientError("have data to send, but no start/end times")
|
||||
|
||||
# Send it
|
||||
self.last_response = self._client.stream_insert_block(
|
||||
self._path, block, start_ts, end_ts, binary=False)
|
||||
|
||||
return
|
41
nilmdb/client/errors.py
Normal file
41
nilmdb/client/errors.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
"""HTTP client errors"""
|
||||
|
||||
from nilmdb.utils.printf import sprintf
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
"""Base exception for both ClientError and ServerError responses"""
|
||||
def __init__(self,
|
||||
status="Unspecified error",
|
||||
message=None,
|
||||
url=None,
|
||||
traceback=None):
|
||||
super().__init__(status)
|
||||
self.status = status # e.g. "400 Bad Request"
|
||||
self.message = message # textual message from the server
|
||||
self.url = url # URL we were requesting
|
||||
self.traceback = traceback # server traceback, if available
|
||||
|
||||
def _format_error(self, show_url):
|
||||
s = sprintf("[%s]", self.status)
|
||||
if self.message:
|
||||
s += sprintf(" %s", self.message)
|
||||
if show_url and self.url:
|
||||
s += sprintf(" (%s)", self.url)
|
||||
if self.traceback:
|
||||
s += sprintf("\nServer traceback:\n%s", self.traceback)
|
||||
return s
|
||||
|
||||
def __str__(self):
|
||||
return self._format_error(show_url=False)
|
||||
|
||||
def __repr__(self):
|
||||
return self._format_error(show_url=True)
|
||||
|
||||
|
||||
class ClientError(Error):
|
||||
pass
|
||||
|
||||
|
||||
class ServerError(Error):
|
||||
pass
|
189
nilmdb/client/httpclient.py
Normal file
189
nilmdb/client/httpclient.py
Normal file
|
@ -0,0 +1,189 @@
|
|||
"""HTTP client library"""
|
||||
|
||||
import json
|
||||
import urllib.parse
|
||||
import requests
|
||||
|
||||
from nilmdb.client.errors import ClientError, ServerError, Error
|
||||
|
||||
|
||||
class HTTPClient():
|
||||
"""Class to manage and perform HTTP requests from the client"""
|
||||
def __init__(self, baseurl="", post_json=False, verify_ssl=True):
|
||||
"""If baseurl is supplied, all other functions that take
|
||||
a URL can be given a relative URL instead."""
|
||||
# Verify / clean up URL
|
||||
reparsed = urllib.parse.urlparse(baseurl).geturl()
|
||||
if '://' not in reparsed:
|
||||
reparsed = urllib.parse.urlparse("http://" + baseurl).geturl()
|
||||
self.baseurl = reparsed.rstrip('/') + '/'
|
||||
|
||||
# Note whether we want SSL verification
|
||||
self.verify_ssl = verify_ssl
|
||||
|
||||
# Saved response, so that tests can verify a few things.
|
||||
self._last_response = {}
|
||||
|
||||
# Whether to send application/json POST bodies (versus
|
||||
# x-www-form-urlencoded)
|
||||
self.post_json = post_json
|
||||
|
||||
def _handle_error(self, url, code, body):
|
||||
# Default variables for exception. We use the entire body as
|
||||
# the default message, in case we can't extract it from a JSON
|
||||
# response.
|
||||
args = {
|
||||
"url": url,
|
||||
"status": str(code),
|
||||
"message": body,
|
||||
"traceback": None
|
||||
}
|
||||
try:
|
||||
# Fill with server-provided data if we can
|
||||
jsonerror = json.loads(body)
|
||||
args["status"] = jsonerror["status"]
|
||||
args["message"] = jsonerror["message"]
|
||||
args["traceback"] = jsonerror["traceback"]
|
||||
except Exception:
|
||||
pass
|
||||
if 400 <= code <= 499:
|
||||
raise ClientError(**args)
|
||||
else:
|
||||
if 500 <= code <= 599:
|
||||
if args["message"] is None:
|
||||
args["message"] = ("(no message; try disabling "
|
||||
"response.stream option in "
|
||||
"nilmdb.server for better debugging)")
|
||||
raise ServerError(**args)
|
||||
else:
|
||||
raise Error(**args)
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def _do_req(self, method, url, query_data, body_data, stream, headers):
|
||||
url = urllib.parse.urljoin(self.baseurl, url)
|
||||
try:
|
||||
# Create a new session, ensure we send "Connection: close",
|
||||
# and explicitly close connection after the transfer.
|
||||
# This is to avoid HTTP/1.1 persistent connections
|
||||
# (keepalive), because they have fundamental race
|
||||
# conditions when there are delays between requests:
|
||||
# a new request may be sent at the same instant that the
|
||||
# server decides to timeout the connection.
|
||||
session = requests.Session()
|
||||
if headers is None:
|
||||
headers = {}
|
||||
headers["Connection"] = "close"
|
||||
response = session.request(method, url,
|
||||
params=query_data,
|
||||
data=body_data,
|
||||
stream=stream,
|
||||
headers=headers,
|
||||
verify=self.verify_ssl)
|
||||
|
||||
# Close the connection. If it's a generator (stream =
|
||||
# True), the requests library shouldn't actually close the
|
||||
# HTTP connection until all data has been read from the
|
||||
# response.
|
||||
session.close()
|
||||
except requests.RequestException as e:
|
||||
raise ServerError(status="502 Error", url=url,
|
||||
message=str(e))
|
||||
if response.status_code != 200:
|
||||
self._handle_error(url, response.status_code, response.content)
|
||||
self._last_response = response
|
||||
if response.headers["content-type"] in ("application/json",
|
||||
"application/x-json-stream"):
|
||||
return (response, True)
|
||||
else:
|
||||
return (response, False)
|
||||
|
||||
# Normal versions that return data directly
|
||||
def _req(self, method, url, query=None, body=None, headers=None):
|
||||
"""
|
||||
Make a request and return the body data as a string or parsed
|
||||
JSON object, or raise an error if it contained an error.
|
||||
"""
|
||||
(response, isjson) = self._do_req(method, url, query, body,
|
||||
stream=False, headers=headers)
|
||||
if isjson:
|
||||
return json.loads(response.content)
|
||||
return response.text
|
||||
|
||||
def get(self, url, params=None):
|
||||
"""Simple GET (parameters in URL)"""
|
||||
return self._req("GET", url, params, None)
|
||||
|
||||
def post(self, url, params=None):
|
||||
"""Simple POST (parameters in body)"""
|
||||
if self.post_json:
|
||||
return self._req("POST", url, None,
|
||||
json.dumps(params),
|
||||
{'Content-type': 'application/json'})
|
||||
else:
|
||||
return self._req("POST", url, None, params)
|
||||
|
||||
def put(self, url, data, params=None,
|
||||
content_type="application/octet-stream"):
|
||||
"""Simple PUT (parameters in URL, data in body)"""
|
||||
h = {'Content-type': content_type}
|
||||
return self._req("PUT", url, query=params, body=data, headers=h)
|
||||
|
||||
# Generator versions that return data one line at a time.
|
||||
def _req_gen(self, method, url, query=None, body=None,
|
||||
headers=None, binary=False):
|
||||
"""
|
||||
Make a request and return a generator that gives back strings
|
||||
or JSON decoded lines of the body data, or raise an error if
|
||||
it contained an eror.
|
||||
"""
|
||||
(response, isjson) = self._do_req(method, url, query, body,
|
||||
stream=True, headers=headers)
|
||||
|
||||
# Like the iter_lines function in Requests, but only splits on
|
||||
# the specified line ending.
|
||||
def lines(source, ending):
|
||||
pending = None
|
||||
for chunk in source:
|
||||
if pending is not None:
|
||||
chunk = pending + chunk
|
||||
tmp = chunk.split(ending)
|
||||
lines = tmp[:-1]
|
||||
if chunk.endswith(ending):
|
||||
pending = None
|
||||
else:
|
||||
pending = tmp[-1]
|
||||
for line in lines:
|
||||
yield line
|
||||
if pending is not None:
|
||||
yield pending
|
||||
|
||||
# Yield the chunks or lines as requested
|
||||
if binary:
|
||||
for chunk in response.iter_content(chunk_size=65536):
|
||||
yield chunk
|
||||
elif isjson:
|
||||
for line in lines(response.iter_content(chunk_size=1),
|
||||
ending=b'\r\n'):
|
||||
yield json.loads(line)
|
||||
else:
|
||||
for line in lines(response.iter_content(chunk_size=65536),
|
||||
ending=b'\n'):
|
||||
yield line
|
||||
|
||||
def get_gen(self, url, params=None, binary=False):
|
||||
"""Simple GET (parameters in URL) returning a generator"""
|
||||
return self._req_gen("GET", url, params, binary=binary)
|
||||
|
||||
def post_gen(self, url, params=None):
|
||||
"""Simple POST (parameters in body) returning a generator"""
|
||||
if self.post_json:
|
||||
return self._req_gen("POST", url, None,
|
||||
json.dumps(params),
|
||||
{'Content-type': 'application/json'})
|
||||
else:
|
||||
return self._req_gen("POST", url, None, params)
|
||||
|
||||
# Not much use for a POST or PUT generator, since they don't
|
||||
# return much data.
|
263
nilmdb/client/numpyclient.py
Normal file
263
nilmdb/client/numpyclient.py
Normal file
|
@ -0,0 +1,263 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""Provide a NumpyClient class that is based on normal Client, but has
|
||||
additional methods for extracting and inserting data via Numpy arrays."""
|
||||
|
||||
import contextlib
|
||||
|
||||
import numpy
|
||||
|
||||
import nilmdb.utils
|
||||
import nilmdb.client.client
|
||||
import nilmdb.client.httpclient
|
||||
from nilmdb.client.errors import ClientError
|
||||
|
||||
|
||||
def layout_to_dtype(layout):
|
||||
ltype = layout.split('_')[0]
|
||||
lcount = int(layout.split('_')[1])
|
||||
if ltype.startswith('int'):
|
||||
atype = '<i' + str(int(ltype[3:]) // 8)
|
||||
elif ltype.startswith('uint'):
|
||||
atype = '<u' + str(int(ltype[4:]) // 8)
|
||||
elif ltype.startswith('float'):
|
||||
atype = '<f' + str(int(ltype[5:]) // 8)
|
||||
else:
|
||||
raise ValueError("bad layout")
|
||||
if lcount == 1:
|
||||
dtype = [('timestamp', '<i8'), ('data', atype)]
|
||||
else:
|
||||
dtype = [('timestamp', '<i8'), ('data', atype, lcount)]
|
||||
return numpy.dtype(dtype)
|
||||
|
||||
|
||||
class NumpyClient(nilmdb.client.client.Client):
|
||||
"""Subclass of nilmdb.client.Client that adds additional methods for
|
||||
extracting and inserting data via Numpy arrays."""
|
||||
|
||||
def _get_dtype(self, path, layout):
|
||||
if layout is None:
|
||||
streams = self.stream_list(path)
|
||||
if len(streams) != 1:
|
||||
raise ClientError("can't get layout for path: " + path)
|
||||
layout = streams[0][1]
|
||||
return layout_to_dtype(layout)
|
||||
|
||||
def stream_extract_numpy(self, path, start=None, end=None,
|
||||
layout=None, maxrows=100000,
|
||||
structured=False):
|
||||
"""
|
||||
Extract data from a stream. Returns a generator that yields
|
||||
Numpy arrays of up to 'maxrows' of data each.
|
||||
|
||||
If 'layout' is None, it is read using stream_info.
|
||||
|
||||
If 'structured' is False, all data is converted to float64
|
||||
and returned in a flat 2D array. Otherwise, data is returned
|
||||
as a structured dtype in a 1D array.
|
||||
"""
|
||||
dtype = self._get_dtype(path, layout)
|
||||
|
||||
def to_numpy(data):
|
||||
a = numpy.frombuffer(data, dtype)
|
||||
if structured:
|
||||
return a
|
||||
return numpy.c_[a['timestamp'], a['data']]
|
||||
|
||||
chunks = []
|
||||
total_len = 0
|
||||
maxsize = dtype.itemsize * maxrows
|
||||
for data in self.stream_extract(path, start, end, binary=True):
|
||||
# Add this block of binary data
|
||||
chunks.append(data)
|
||||
total_len += len(data)
|
||||
|
||||
# See if we have enough to make the requested Numpy array
|
||||
while total_len >= maxsize:
|
||||
assembled = b"".join(chunks)
|
||||
total_len -= maxsize
|
||||
chunks = [assembled[maxsize:]]
|
||||
block = assembled[:maxsize]
|
||||
yield to_numpy(block)
|
||||
|
||||
if total_len:
|
||||
yield to_numpy(b"".join(chunks))
|
||||
|
||||
@contextlib.contextmanager
|
||||
def stream_insert_numpy_context(self, path, start=None, end=None,
|
||||
layout=None):
|
||||
"""Return a context manager that allows data to be efficiently
|
||||
inserted into a stream in a piecewise manner. Data is
|
||||
provided as Numpy arrays, and is aggregated and sent to the
|
||||
server in larger or smaller chunks as necessary. Data format
|
||||
must match the database layout for the given path.
|
||||
|
||||
For more details, see help for
|
||||
nilmdb.client.numpyclient.StreamInserterNumpy
|
||||
|
||||
If 'layout' is not None, use it as the layout rather than
|
||||
querying the database.
|
||||
"""
|
||||
dtype = self._get_dtype(path, layout)
|
||||
ctx = StreamInserterNumpy(self, path, start, end, dtype)
|
||||
yield ctx
|
||||
ctx.finalize()
|
||||
ctx.destroy()
|
||||
|
||||
def stream_insert_numpy(self, path, data, start=None, end=None,
|
||||
layout=None):
|
||||
"""Insert data into a stream. data should be a Numpy array
|
||||
which will be passed through stream_insert_numpy_context to
|
||||
break it into chunks etc. See the help for that function
|
||||
for details."""
|
||||
with self.stream_insert_numpy_context(path, start, end, layout) as ctx:
|
||||
if isinstance(data, numpy.ndarray):
|
||||
ctx.insert(data)
|
||||
else:
|
||||
for chunk in data:
|
||||
ctx.insert(chunk)
|
||||
return ctx.last_response
|
||||
|
||||
|
||||
class StreamInserterNumpy(nilmdb.client.client.StreamInserter):
|
||||
"""Object returned by stream_insert_numpy_context() that manages
|
||||
the insertion of rows of data into a particular path.
|
||||
|
||||
See help for nilmdb.client.client.StreamInserter for details.
|
||||
The only difference is that, instead of ASCII formatted data,
|
||||
this context manager can take Numpy arrays, which are either
|
||||
structured (1D with complex dtype) or flat (2D with simple dtype).
|
||||
"""
|
||||
|
||||
# Soft limit of how many bytes to send per HTTP request.
|
||||
_max_data = 2 * 1024 * 1024
|
||||
|
||||
def __init__(self, client, path, start, end, dtype):
|
||||
"""
|
||||
'client' is the client object. 'path' is the database path
|
||||
to insert to. 'start' and 'end' are used for the first
|
||||
contiguous interval and may be None. 'dtype' is the Numpy
|
||||
dtype for this stream.
|
||||
"""
|
||||
super(StreamInserterNumpy, self).__init__(client, path, start, end)
|
||||
self._dtype = dtype
|
||||
|
||||
# Max rows to send at once
|
||||
self._max_rows = self._max_data // self._dtype.itemsize
|
||||
|
||||
# List of the current arrays we're building up to send
|
||||
self._block_arrays = []
|
||||
self._block_rows = 0
|
||||
|
||||
def insert(self, array):
|
||||
"""Insert Numpy data, which must match the layout type."""
|
||||
if not isinstance(array, numpy.ndarray):
|
||||
array = numpy.array(array)
|
||||
if array.ndim == 1:
|
||||
# Already a structured array; just verify the type
|
||||
if array.dtype != self._dtype:
|
||||
raise ValueError("wrong dtype for 1D (structured) array")
|
||||
elif array.ndim == 2:
|
||||
# Convert to structured array
|
||||
sarray = numpy.zeros(array.shape[0], dtype=self._dtype)
|
||||
try:
|
||||
sarray['timestamp'] = array[:, 0]
|
||||
# Need the squeeze in case sarray['data'] is 1 dimensional
|
||||
sarray['data'] = numpy.squeeze(array[:, 1:])
|
||||
except (IndexError, ValueError):
|
||||
raise ValueError("wrong number of fields for this data type")
|
||||
array = sarray
|
||||
else:
|
||||
raise ValueError("wrong number of dimensions in array")
|
||||
|
||||
length = len(array)
|
||||
maxrows = self._max_rows
|
||||
|
||||
if length == 0:
|
||||
return
|
||||
if length > maxrows:
|
||||
# This is more than twice what we wanted to send, so split
|
||||
# it up. This is a bit inefficient, but the user really
|
||||
# shouldn't be providing this much data at once.
|
||||
for cut in range(0, length, maxrows):
|
||||
self.insert(array[cut:(cut + maxrows)])
|
||||
return
|
||||
|
||||
# Add this array to our list
|
||||
self._block_arrays.append(array)
|
||||
self._block_rows += length
|
||||
|
||||
# Send if it's too long
|
||||
if self._block_rows >= maxrows:
|
||||
self._send_block(final=False)
|
||||
|
||||
def _send_block(self, final=False):
|
||||
"""Send the data current stored up. One row might be left
|
||||
over if we need its timestamp saved."""
|
||||
|
||||
# Build the full array to send
|
||||
if self._block_rows == 0:
|
||||
array = numpy.zeros(0, dtype=self._dtype)
|
||||
else:
|
||||
array = numpy.hstack(self._block_arrays)
|
||||
|
||||
# Get starting timestamp
|
||||
start_ts = self._interval_start
|
||||
if start_ts is None:
|
||||
# Pull start from the first row
|
||||
try:
|
||||
start_ts = array['timestamp'][0]
|
||||
except IndexError:
|
||||
pass # no timestamp is OK, if we have no data
|
||||
|
||||
# Get ending timestamp
|
||||
if final:
|
||||
# For a final block, the timestamp is either the
|
||||
# user-provided end, or the timestamp of the last line
|
||||
# plus epsilon.
|
||||
end_ts = self._interval_end
|
||||
if end_ts is None:
|
||||
try:
|
||||
end_ts = array['timestamp'][-1]
|
||||
end_ts += nilmdb.utils.time.epsilon
|
||||
except IndexError:
|
||||
pass # no timestamp is OK, if we have no data
|
||||
self._block_arrays = []
|
||||
self._block_rows = 0
|
||||
|
||||
# Next block is completely fresh
|
||||
self._interval_start = None
|
||||
self._interval_end = None
|
||||
else:
|
||||
# An intermediate block. We need to save the last row
|
||||
# for the next block, and use its timestamp as the ending
|
||||
# timestamp for this one.
|
||||
if len(array) < 2:
|
||||
# Not enough data to send an intermediate block
|
||||
return
|
||||
end_ts = array['timestamp'][-1]
|
||||
if self._interval_end is not None and end_ts > self._interval_end:
|
||||
# User gave us bad endpoints; send it anyway, and let
|
||||
# the server complain so that the error is the same
|
||||
# as if we hadn't done this chunking.
|
||||
end_ts = self._interval_end
|
||||
self._block_arrays = [array[-1:]]
|
||||
self._block_rows = 1
|
||||
array = array[:-1]
|
||||
|
||||
# Next block continues where this one ended
|
||||
self._interval_start = end_ts
|
||||
|
||||
# If we have no endpoints, or equal endpoints, it's OK as long
|
||||
# as there's no data to send
|
||||
if (start_ts is None or end_ts is None) or (start_ts == end_ts):
|
||||
if not array:
|
||||
return
|
||||
raise ClientError("have data to send, but invalid start/end times")
|
||||
|
||||
# Send it
|
||||
data = array.tostring()
|
||||
self.last_response = self._client.stream_insert_block(
|
||||
self._path, data, start_ts, end_ts, binary=True)
|
||||
|
||||
return
|
3
nilmdb/cmdline/__init__.py
Normal file
3
nilmdb/cmdline/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
"""nilmdb.cmdline"""
|
||||
|
||||
from nilmdb.cmdline.cmdline import Cmdline
|
173
nilmdb/cmdline/cmdline.py
Normal file
173
nilmdb/cmdline/cmdline.py
Normal file
|
@ -0,0 +1,173 @@
|
|||
"""Command line client functionality"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import signal
|
||||
import argparse
|
||||
from argparse import ArgumentDefaultsHelpFormatter as def_form
|
||||
|
||||
import nilmdb.client
|
||||
from nilmdb.utils.printf import fprintf, sprintf
|
||||
import nilmdb.utils.time
|
||||
|
||||
import argcomplete
|
||||
import datetime_tz
|
||||
|
||||
# Valid subcommands. Defined in separate files just to break
|
||||
# things up -- they're still called with Cmdline as self.
|
||||
subcommands = ["help", "info", "create", "rename", "list", "intervals",
|
||||
"metadata", "insert", "extract", "remove", "destroy"]
|
||||
|
||||
# Import the subcommand modules
|
||||
subcmd_mods = {}
|
||||
for cmd in subcommands:
|
||||
subcmd_mods[cmd] = __import__("nilmdb.cmdline." + cmd, fromlist=[cmd])
|
||||
|
||||
|
||||
class JimArgumentParser(argparse.ArgumentParser):
|
||||
def parse_args(self, args=None, namespace=None):
|
||||
# Look for --version anywhere and change it to just "nilmtool
|
||||
# --version". This makes "nilmtool cmd --version" work, which
|
||||
# is needed by help2man.
|
||||
if "--version" in (args or sys.argv[1:]):
|
||||
args = ["--version"]
|
||||
return argparse.ArgumentParser.parse_args(self, args, namespace)
|
||||
|
||||
def error(self, message):
|
||||
self.print_usage(sys.stderr)
|
||||
self.exit(2, sprintf("error: %s\n", message))
|
||||
|
||||
|
||||
class Complete():
|
||||
# Completion helpers, for using argcomplete (see
|
||||
# extras/nilmtool-bash-completion.sh)
|
||||
def escape(self, s):
|
||||
quote_chars = ["\\", "\"", "'", " "]
|
||||
for char in quote_chars:
|
||||
s = s.replace(char, "\\" + char)
|
||||
return s
|
||||
|
||||
def none(self, prefix, parsed_args, **kwargs):
|
||||
return []
|
||||
rate = none
|
||||
time = none
|
||||
url = none
|
||||
|
||||
def path(self, prefix, parsed_args, **kwargs):
|
||||
client = nilmdb.client.Client(parsed_args.url)
|
||||
return (self.escape(s[0])
|
||||
for s in client.stream_list()
|
||||
if s[0].startswith(prefix))
|
||||
|
||||
def layout(self, prefix, parsed_args, **kwargs):
|
||||
types = ["int8", "int16", "int32", "int64",
|
||||
"uint8", "uint16", "uint32", "uint64",
|
||||
"float32", "float64"]
|
||||
layouts = []
|
||||
for i in range(1, 10):
|
||||
layouts.extend([(t + "_" + str(i)) for t in types])
|
||||
return (lay for lay in layouts if lay.startswith(prefix))
|
||||
|
||||
def meta_key(self, prefix, parsed_args, **kwargs):
|
||||
return (kv.split('=')[0] for kv
|
||||
in self.meta_keyval(prefix, parsed_args, **kwargs))
|
||||
|
||||
def meta_keyval(self, prefix, parsed_args, **kwargs):
|
||||
client = nilmdb.client.Client(parsed_args.url)
|
||||
path = parsed_args.path
|
||||
if not path:
|
||||
return []
|
||||
results = []
|
||||
for (k, v) in client.stream_get_metadata(path).items():
|
||||
kv = self.escape(k + '=' + v)
|
||||
if kv.startswith(prefix):
|
||||
results.append(kv)
|
||||
return results
|
||||
|
||||
|
||||
class Cmdline():
|
||||
|
||||
def __init__(self, argv=None):
|
||||
self.argv = argv or sys.argv[1:]
|
||||
self.client = None
|
||||
self.def_url = os.environ.get("NILMDB_URL", "http://localhost/nilmdb/")
|
||||
self.subcmd = {}
|
||||
self.complete = Complete()
|
||||
self.complete_output_stream = None # overridden by test suite
|
||||
|
||||
def arg_time(self, toparse):
|
||||
"""Parse a time string argument"""
|
||||
try:
|
||||
return nilmdb.utils.time.parse_time(toparse)
|
||||
except ValueError as e:
|
||||
raise argparse.ArgumentTypeError(sprintf("%s \"%s\"",
|
||||
str(e), toparse))
|
||||
|
||||
# Set up the parser
|
||||
def parser_setup(self):
|
||||
self.parser = JimArgumentParser(add_help=False,
|
||||
formatter_class=def_form)
|
||||
|
||||
group = self.parser.add_argument_group("General options")
|
||||
group.add_argument("-h", "--help", action='help',
|
||||
help='show this help message and exit')
|
||||
group.add_argument("-v", "--version", action="version",
|
||||
version=nilmdb.__version__)
|
||||
|
||||
group = self.parser.add_argument_group("Server")
|
||||
group.add_argument("-u", "--url", action="store",
|
||||
default=self.def_url,
|
||||
help="NilmDB server URL (default: %(default)s)"
|
||||
).completer = self.complete.url
|
||||
|
||||
sub = self.parser.add_subparsers(
|
||||
title="Commands", dest="command", required=True,
|
||||
description="Use 'help command' or 'command --help' for more "
|
||||
"details on a particular command.")
|
||||
|
||||
# Set up subcommands (defined in separate files)
|
||||
for cmd in subcommands:
|
||||
self.subcmd[cmd] = subcmd_mods[cmd].setup(self, sub)
|
||||
|
||||
def die(self, formatstr, *args):
|
||||
fprintf(sys.stderr, formatstr + "\n", *args)
|
||||
if self.client:
|
||||
self.client.close()
|
||||
sys.exit(-1)
|
||||
|
||||
def run(self):
|
||||
# Set SIGPIPE to its default handler -- we don't need Python
|
||||
# to catch it for us.
|
||||
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
||||
|
||||
# Clear cached timezone, so that we can pick up timezone changes
|
||||
# while running this from the test suite.
|
||||
datetime_tz._localtz = None
|
||||
|
||||
# Run parser
|
||||
self.parser_setup()
|
||||
argcomplete.autocomplete(self.parser, exit_method=sys.exit,
|
||||
output_stream=self.complete_output_stream)
|
||||
self.args = self.parser.parse_args(self.argv)
|
||||
|
||||
# Run arg verify handler if there is one
|
||||
if "verify" in self.args:
|
||||
self.args.verify(self)
|
||||
|
||||
self.client = nilmdb.client.Client(self.args.url)
|
||||
|
||||
# Make a test connection to make sure things work,
|
||||
# unless the particular command requests that we don't.
|
||||
if "no_test_connect" not in self.args:
|
||||
try:
|
||||
self.client.version()
|
||||
except nilmdb.client.Error as e:
|
||||
self.die("error connecting to server: %s", str(e))
|
||||
|
||||
# Now dispatch client request to appropriate function. Parser
|
||||
# should have ensured that we don't have any unknown commands
|
||||
# here.
|
||||
retval = self.args.handler(self) or 0
|
||||
|
||||
self.client.close()
|
||||
sys.exit(retval)
|
38
nilmdb/cmdline/create.py
Normal file
38
nilmdb/cmdline/create.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
from argparse import RawDescriptionHelpFormatter as raw_form
|
||||
|
||||
import nilmdb.client
|
||||
|
||||
|
||||
def setup(self, sub):
|
||||
cmd = sub.add_parser("create", help="Create a new stream",
|
||||
formatter_class=raw_form,
|
||||
description="""
|
||||
Create a new empty stream at the specified path and with the specified
|
||||
layout type.
|
||||
|
||||
Layout types are of the format: type_count
|
||||
|
||||
'type' is a data type like 'float32', 'float64', 'uint16', 'int32', etc.
|
||||
|
||||
'count' is the number of columns of this type.
|
||||
|
||||
For example, 'float32_8' means the data for this stream has 8 columns of
|
||||
32-bit floating point values.
|
||||
""")
|
||||
cmd.set_defaults(handler=cmd_create)
|
||||
group = cmd.add_argument_group("Required arguments")
|
||||
group.add_argument("path",
|
||||
help="Path (in database) of new stream, e.g. /foo/bar",
|
||||
).completer = self.complete.path
|
||||
group.add_argument("layout",
|
||||
help="Layout type for new stream, e.g. float32_8",
|
||||
).completer = self.complete.layout
|
||||
return cmd
|
||||
|
||||
|
||||
def cmd_create(self):
|
||||
"""Create new stream"""
|
||||
try:
|
||||
self.client.stream_create(self.args.path, self.args.layout)
|
||||
except nilmdb.client.ClientError as e:
|
||||
self.die("error creating stream: %s", str(e))
|
52
nilmdb/cmdline/destroy.py
Normal file
52
nilmdb/cmdline/destroy.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
import fnmatch
|
||||
|
||||
from argparse import ArgumentDefaultsHelpFormatter as def_form
|
||||
|
||||
from nilmdb.utils.printf import printf
|
||||
import nilmdb.client
|
||||
|
||||
|
||||
def setup(self, sub):
|
||||
cmd = sub.add_parser("destroy", help="Delete a stream and all data",
|
||||
formatter_class=def_form,
|
||||
description="""
|
||||
Destroy the stream at the specified path.
|
||||
The stream must be empty. All metadata
|
||||
related to the stream is permanently deleted.
|
||||
|
||||
Wildcards and multiple paths are supported.
|
||||
""")
|
||||
cmd.set_defaults(handler=cmd_destroy)
|
||||
group = cmd.add_argument_group("Options")
|
||||
group.add_argument("-R", "--remove", action="store_true",
|
||||
help="Remove all data before destroying stream")
|
||||
group.add_argument("-q", "--quiet", action="store_true",
|
||||
help="Don't display names when destroying "
|
||||
"multiple paths")
|
||||
group = cmd.add_argument_group("Required arguments")
|
||||
group.add_argument("path", nargs='+',
|
||||
help="Path of the stream to delete, e.g. /foo/bar/*",
|
||||
).completer = self.complete.path
|
||||
return cmd
|
||||
|
||||
|
||||
def cmd_destroy(self):
|
||||
"""Destroy stream"""
|
||||
streams = [s[0] for s in self.client.stream_list()]
|
||||
paths = []
|
||||
for path in self.args.path:
|
||||
new = fnmatch.filter(streams, path)
|
||||
if not new:
|
||||
self.die("error: no stream matched path: %s", path)
|
||||
paths.extend(new)
|
||||
|
||||
for path in paths:
|
||||
if not self.args.quiet and len(paths) > 1:
|
||||
printf("Destroying %s\n", path)
|
||||
|
||||
try:
|
||||
if self.args.remove:
|
||||
self.client.stream_remove(path)
|
||||
self.client.stream_destroy(path)
|
||||
except nilmdb.client.ClientError as e:
|
||||
self.die("error destroying stream: %s", str(e))
|
96
nilmdb/cmdline/extract.py
Normal file
96
nilmdb/cmdline/extract.py
Normal file
|
@ -0,0 +1,96 @@
|
|||
import sys
|
||||
|
||||
from nilmdb.utils.printf import printf
|
||||
import nilmdb.client
|
||||
|
||||
|
||||
def setup(self, sub):
|
||||
cmd = sub.add_parser("extract", help="Extract data",
|
||||
description="""
|
||||
Extract data from a stream.
|
||||
""")
|
||||
cmd.set_defaults(verify=cmd_extract_verify,
|
||||
handler=cmd_extract)
|
||||
|
||||
group = cmd.add_argument_group("Data selection")
|
||||
group.add_argument("path",
|
||||
help="Path of stream, e.g. /foo/bar",
|
||||
).completer = self.complete.path
|
||||
group.add_argument("-s", "--start", required=True,
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Starting timestamp (free-form, inclusive)",
|
||||
).completer = self.complete.time
|
||||
group.add_argument("-e", "--end", required=True,
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Ending timestamp (free-form, noninclusive)",
|
||||
).completer = self.complete.time
|
||||
|
||||
group = cmd.add_argument_group("Output format")
|
||||
group.add_argument("-B", "--binary", action="store_true",
|
||||
help="Raw binary output")
|
||||
group.add_argument("-b", "--bare", action="store_true",
|
||||
help="Exclude timestamps from output lines")
|
||||
group.add_argument("-a", "--annotate", action="store_true",
|
||||
help="Include comments with some information "
|
||||
"about the stream")
|
||||
group.add_argument("-m", "--markup", action="store_true",
|
||||
help="Include comments with interval starts and ends")
|
||||
group.add_argument("-T", "--timestamp-raw", action="store_true",
|
||||
help="Show raw timestamps in annotated information")
|
||||
group.add_argument("-c", "--count", action="store_true",
|
||||
help="Just output a count of matched data points")
|
||||
return cmd
|
||||
|
||||
|
||||
def cmd_extract_verify(self):
|
||||
if self.args.start > self.args.end:
|
||||
self.parser.error("start is after end")
|
||||
|
||||
if self.args.binary:
|
||||
if (self.args.bare or self.args.annotate or self.args.markup or
|
||||
self.args.timestamp_raw or self.args.count):
|
||||
self.parser.error("--binary cannot be combined with other options")
|
||||
|
||||
|
||||
def cmd_extract(self):
|
||||
streams = self.client.stream_list(self.args.path)
|
||||
if len(streams) != 1:
|
||||
self.die("error getting stream info for path %s", self.args.path)
|
||||
layout = streams[0][1]
|
||||
|
||||
if self.args.timestamp_raw:
|
||||
time_string = nilmdb.utils.time.timestamp_to_string
|
||||
else:
|
||||
time_string = nilmdb.utils.time.timestamp_to_human
|
||||
|
||||
if self.args.annotate:
|
||||
printf("# path: %s\n", self.args.path)
|
||||
printf("# layout: %s\n", layout)
|
||||
printf("# start: %s\n", time_string(self.args.start))
|
||||
printf("# end: %s\n", time_string(self.args.end))
|
||||
|
||||
printed = False
|
||||
if self.args.binary:
|
||||
printer = sys.stdout.buffer.write
|
||||
else:
|
||||
printer = lambda x: print(x.decode('utf-8'))
|
||||
bare = self.args.bare
|
||||
count = self.args.count
|
||||
for dataline in self.client.stream_extract(self.args.path,
|
||||
self.args.start,
|
||||
self.args.end,
|
||||
self.args.count,
|
||||
self.args.markup,
|
||||
self.args.binary):
|
||||
if bare and not count:
|
||||
# Strip timestamp (first element). Doesn't make sense
|
||||
# if we are only returning a count.
|
||||
dataline = b' '.join(dataline.split(b' ')[1:])
|
||||
printer(dataline)
|
||||
printed = True
|
||||
if not printed:
|
||||
if self.args.annotate:
|
||||
printf("# no data\n")
|
||||
return 2
|
||||
|
||||
return 0
|
25
nilmdb/cmdline/help.py
Normal file
25
nilmdb/cmdline/help.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import argparse
|
||||
|
||||
|
||||
def setup(self, sub):
|
||||
cmd = sub.add_parser("help", help="Show detailed help for a command",
|
||||
description="""
|
||||
Show help for a command. 'help command' is
|
||||
the same as 'command --help'.
|
||||
""")
|
||||
cmd.set_defaults(handler=cmd_help)
|
||||
cmd.set_defaults(no_test_connect=True)
|
||||
cmd.add_argument("command", nargs="?",
|
||||
help="Command to get help about")
|
||||
cmd.add_argument("rest", nargs=argparse.REMAINDER,
|
||||
help=argparse.SUPPRESS)
|
||||
return cmd
|
||||
|
||||
|
||||
def cmd_help(self):
|
||||
if self.args.command in self.subcmd:
|
||||
self.subcmd[self.args.command].print_help()
|
||||
else:
|
||||
self.parser.print_help()
|
||||
|
||||
return
|
30
nilmdb/cmdline/info.py
Normal file
30
nilmdb/cmdline/info.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
from argparse import ArgumentDefaultsHelpFormatter as def_form
|
||||
|
||||
import nilmdb.client
|
||||
from nilmdb.utils.printf import printf
|
||||
from nilmdb.utils import human_size
|
||||
|
||||
|
||||
def setup(self, sub):
|
||||
cmd = sub.add_parser("info", help="Server information",
|
||||
formatter_class=def_form,
|
||||
description="""
|
||||
List information about the server, like
|
||||
version.
|
||||
""")
|
||||
cmd.set_defaults(handler=cmd_info)
|
||||
return cmd
|
||||
|
||||
|
||||
def cmd_info(self):
|
||||
"""Print info about the server"""
|
||||
printf("Client version: %s\n", nilmdb.__version__)
|
||||
printf("Server version: %s\n", self.client.version())
|
||||
printf("Server URL: %s\n", self.client.geturl())
|
||||
dbinfo = self.client.dbinfo()
|
||||
printf("Server database path: %s\n", dbinfo["path"])
|
||||
for (desc, field) in [("used by NilmDB", "size"),
|
||||
("used by other", "other"),
|
||||
("reserved", "reserved"),
|
||||
("free", "free")]:
|
||||
printf("Server disk space %s: %s\n", desc, human_size(dbinfo[field]))
|
135
nilmdb/cmdline/insert.py
Normal file
135
nilmdb/cmdline/insert.py
Normal file
|
@ -0,0 +1,135 @@
|
|||
import sys
|
||||
|
||||
from nilmdb.utils.printf import printf
|
||||
import nilmdb.client
|
||||
import nilmdb.utils.timestamper as timestamper
|
||||
import nilmdb.utils.time
|
||||
|
||||
|
||||
def setup(self, sub):
|
||||
cmd = sub.add_parser("insert", help="Insert data",
|
||||
description="""
|
||||
Insert data into a stream.
|
||||
""")
|
||||
cmd.set_defaults(verify=cmd_insert_verify,
|
||||
handler=cmd_insert)
|
||||
cmd.add_argument("-q", "--quiet", action='store_true',
|
||||
help='suppress unnecessary messages')
|
||||
|
||||
group = cmd.add_argument_group("Timestamping",
|
||||
description="""
|
||||
To add timestamps, specify the
|
||||
arguments --timestamp and --rate,
|
||||
and provide a starting time.
|
||||
""")
|
||||
|
||||
group.add_argument("-t", "--timestamp", action="store_true",
|
||||
help="Add timestamps to each line")
|
||||
group.add_argument("-r", "--rate", type=float,
|
||||
help="Data rate, in Hz",
|
||||
).completer = self.complete.rate
|
||||
|
||||
group = cmd.add_argument_group("Start time",
|
||||
description="""
|
||||
Start time may be manually
|
||||
specified with --start, or guessed
|
||||
from the filenames using
|
||||
--filename. Set the TZ environment
|
||||
variable to change the default
|
||||
timezone.""")
|
||||
|
||||
exc = group.add_mutually_exclusive_group()
|
||||
exc.add_argument("-s", "--start",
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Starting timestamp (free-form)",
|
||||
).completer = self.complete.time
|
||||
exc.add_argument("-f", "--filename", action="store_true",
|
||||
help="Use filename to determine start time")
|
||||
|
||||
group = cmd.add_argument_group("End time",
|
||||
description="""
|
||||
End time for the overall stream.
|
||||
(required when not using --timestamp).
|
||||
Set the TZ environment
|
||||
variable to change the default
|
||||
timezone.""")
|
||||
group.add_argument("-e", "--end",
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Ending timestamp (free-form)",
|
||||
).completer = self.complete.time
|
||||
|
||||
group = cmd.add_argument_group("Required parameters")
|
||||
group.add_argument("path",
|
||||
help="Path of stream, e.g. /foo/bar",
|
||||
).completer = self.complete.path
|
||||
group.add_argument("file", nargs='?', default='-',
|
||||
help="File to insert (default: - (stdin))")
|
||||
return cmd
|
||||
|
||||
|
||||
def cmd_insert_verify(self):
|
||||
if self.args.timestamp:
|
||||
if not self.args.rate:
|
||||
self.die("error: --rate is needed, but was not specified")
|
||||
if not self.args.filename and self.args.start is None:
|
||||
self.die("error: need --start or --filename "
|
||||
"when adding timestamps")
|
||||
else:
|
||||
if self.args.start is None or self.args.end is None:
|
||||
self.die("error: when not adding timestamps, --start and "
|
||||
"--end are required")
|
||||
|
||||
|
||||
def cmd_insert(self):
|
||||
# Find requested stream
|
||||
streams = self.client.stream_list(self.args.path)
|
||||
if len(streams) != 1:
|
||||
self.die("error getting stream info for path %s", self.args.path)
|
||||
|
||||
arg = self.args
|
||||
|
||||
try:
|
||||
filename = arg.file
|
||||
if filename == '-':
|
||||
infile = sys.stdin.buffer
|
||||
else:
|
||||
try:
|
||||
infile = open(filename, "rb")
|
||||
except IOError:
|
||||
self.die("error opening input file %s", filename)
|
||||
|
||||
if arg.start is None:
|
||||
try:
|
||||
arg.start = nilmdb.utils.time.parse_time(filename)
|
||||
except ValueError:
|
||||
self.die("error extracting start time from filename '%s'",
|
||||
filename)
|
||||
|
||||
if arg.timestamp:
|
||||
data = timestamper.TimestamperRate(infile, arg.start, arg.rate)
|
||||
else:
|
||||
data = iter(lambda: infile.read(1048576), b'')
|
||||
|
||||
# Print info
|
||||
if not arg.quiet:
|
||||
printf(" Input file: %s\n", filename)
|
||||
printf(" Start time: %s\n",
|
||||
nilmdb.utils.time.timestamp_to_human(arg.start))
|
||||
if arg.end:
|
||||
printf(" End time: %s\n",
|
||||
nilmdb.utils.time.timestamp_to_human(arg.end))
|
||||
if arg.timestamp:
|
||||
printf("Timestamper: %s\n", str(data))
|
||||
|
||||
# Insert the data
|
||||
self.client.stream_insert(arg.path, data, arg.start, arg.end)
|
||||
|
||||
except nilmdb.client.Error as e:
|
||||
# TODO: It would be nice to be able to offer better errors
|
||||
# here, particularly in the case of overlap, which just shows
|
||||
# ugly bracketed ranges of 16-digit numbers and a mangled URL.
|
||||
# Need to consider adding something like e.prettyprint()
|
||||
# that is smarter about the contents of the error.
|
||||
self.die("error inserting data: %s", str(e))
|
||||
|
||||
return
|
76
nilmdb/cmdline/intervals.py
Normal file
76
nilmdb/cmdline/intervals.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
from argparse import ArgumentDefaultsHelpFormatter as def_form
|
||||
|
||||
from nilmdb.utils.printf import printf
|
||||
import nilmdb.utils.time
|
||||
from nilmdb.utils.interval import Interval
|
||||
|
||||
|
||||
def setup(self, sub):
|
||||
cmd = sub.add_parser("intervals", help="List intervals",
|
||||
formatter_class=def_form,
|
||||
description="""
|
||||
List intervals in a stream, similar to
|
||||
'list --detail path'.
|
||||
|
||||
If '--diff diffpath' is provided, only
|
||||
interval ranges that are present in 'path'
|
||||
and not present in 'diffpath' are printed.
|
||||
""")
|
||||
cmd.set_defaults(verify=cmd_intervals_verify,
|
||||
handler=cmd_intervals)
|
||||
|
||||
group = cmd.add_argument_group("Stream selection")
|
||||
group.add_argument("path", metavar="PATH",
|
||||
help="List intervals for this path",
|
||||
).completer = self.complete.path
|
||||
group.add_argument("-d", "--diff", metavar="PATH",
|
||||
help="Subtract intervals from this path",
|
||||
).completer = self.complete.path
|
||||
|
||||
group = cmd.add_argument_group("Interval details")
|
||||
group.add_argument("-s", "--start",
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Starting timestamp for intervals "
|
||||
"(free-form, inclusive)",
|
||||
).completer = self.complete.time
|
||||
group.add_argument("-e", "--end",
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Ending timestamp for intervals "
|
||||
"(free-form, noninclusive)",
|
||||
).completer = self.complete.time
|
||||
|
||||
group = cmd.add_argument_group("Misc options")
|
||||
group.add_argument("-T", "--timestamp-raw", action="store_true",
|
||||
help="Show raw timestamps when printing times")
|
||||
group.add_argument("-o", "--optimize", action="store_true",
|
||||
help="Optimize (merge adjacent) intervals")
|
||||
|
||||
return cmd
|
||||
|
||||
|
||||
def cmd_intervals_verify(self):
|
||||
if self.args.start is not None and self.args.end is not None:
|
||||
if self.args.start >= self.args.end:
|
||||
self.parser.error("start must precede end")
|
||||
|
||||
|
||||
def cmd_intervals(self):
|
||||
"""List intervals in a stream"""
|
||||
if self.args.timestamp_raw:
|
||||
time_string = nilmdb.utils.time.timestamp_to_string
|
||||
else:
|
||||
time_string = nilmdb.utils.time.timestamp_to_human
|
||||
|
||||
try:
|
||||
intervals = (Interval(start, end) for (start, end) in
|
||||
self.client.stream_intervals(self.args.path,
|
||||
self.args.start,
|
||||
self.args.end,
|
||||
self.args.diff))
|
||||
if self.args.optimize:
|
||||
intervals = nilmdb.utils.interval.optimize(intervals)
|
||||
for i in intervals:
|
||||
printf("[ %s -> %s ]\n", time_string(i.start), time_string(i.end))
|
||||
|
||||
except nilmdb.client.ClientError as e:
|
||||
self.die("error listing intervals: %s", str(e))
|
105
nilmdb/cmdline/list.py
Normal file
105
nilmdb/cmdline/list.py
Normal file
|
@ -0,0 +1,105 @@
|
|||
import fnmatch
|
||||
from argparse import ArgumentDefaultsHelpFormatter as def_form
|
||||
|
||||
from nilmdb.utils.printf import printf
|
||||
import nilmdb.utils.time
|
||||
|
||||
|
||||
def setup(self, sub):
|
||||
cmd = sub.add_parser("list", help="List streams",
|
||||
formatter_class=def_form,
|
||||
description="""
|
||||
List streams available in the database,
|
||||
optionally filtering by path. Wildcards
|
||||
are accepted; non-matching paths or wildcards
|
||||
are ignored.
|
||||
""")
|
||||
cmd.set_defaults(verify=cmd_list_verify,
|
||||
handler=cmd_list)
|
||||
|
||||
group = cmd.add_argument_group("Stream filtering")
|
||||
group.add_argument("path", metavar="PATH", default=["*"], nargs='*',
|
||||
).completer = self.complete.path
|
||||
|
||||
group = cmd.add_argument_group("Interval info")
|
||||
group.add_argument("-E", "--ext", action="store_true",
|
||||
help="Show extended stream info, like interval "
|
||||
"extents and row count")
|
||||
|
||||
group = cmd.add_argument_group("Interval details")
|
||||
group.add_argument("-d", "--detail", action="store_true",
|
||||
help="Show available data time intervals")
|
||||
group.add_argument("-s", "--start",
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Starting timestamp for intervals "
|
||||
"(free-form, inclusive)",
|
||||
).completer = self.complete.time
|
||||
group.add_argument("-e", "--end",
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Ending timestamp for intervals "
|
||||
"(free-form, noninclusive)",
|
||||
).completer = self.complete.time
|
||||
|
||||
group = cmd.add_argument_group("Misc options")
|
||||
group.add_argument("-T", "--timestamp-raw", action="store_true",
|
||||
help="Show raw timestamps when printing times")
|
||||
group.add_argument("-l", "--layout", action="store_true",
|
||||
help="Show layout type next to path name")
|
||||
group.add_argument("-n", "--no-decim", action="store_true",
|
||||
help="Skip paths containing \"~decim-\"")
|
||||
|
||||
return cmd
|
||||
|
||||
|
||||
def cmd_list_verify(self):
|
||||
if self.args.start is not None and self.args.end is not None:
|
||||
if self.args.start >= self.args.end:
|
||||
self.parser.error("start must precede end")
|
||||
|
||||
if self.args.start is not None or self.args.end is not None:
|
||||
if not self.args.detail:
|
||||
self.parser.error("--start and --end only make sense "
|
||||
"with --detail")
|
||||
|
||||
|
||||
def cmd_list(self):
|
||||
"""List available streams"""
|
||||
streams = self.client.stream_list(extended=True)
|
||||
|
||||
if self.args.timestamp_raw:
|
||||
time_string = nilmdb.utils.time.timestamp_to_string
|
||||
else:
|
||||
time_string = nilmdb.utils.time.timestamp_to_human
|
||||
|
||||
for argpath in self.args.path:
|
||||
for stream in streams:
|
||||
(path, layout, int_min, int_max, rows, time) = stream[:6]
|
||||
if not fnmatch.fnmatch(path, argpath):
|
||||
continue
|
||||
if self.args.no_decim and "~decim-" in path:
|
||||
continue
|
||||
|
||||
if self.args.layout:
|
||||
printf("%s %s\n", path, layout)
|
||||
else:
|
||||
printf("%s\n", path)
|
||||
|
||||
if self.args.ext:
|
||||
if int_min is None or int_max is None:
|
||||
printf(" interval extents: (no data)\n")
|
||||
else:
|
||||
printf(" interval extents: %s -> %s\n",
|
||||
time_string(int_min), time_string(int_max))
|
||||
printf(" total data: %d rows, %.6f seconds\n",
|
||||
rows or 0,
|
||||
nilmdb.utils.time.timestamp_to_seconds(time or 0))
|
||||
|
||||
if self.args.detail:
|
||||
printed = False
|
||||
for (start, end) in self.client.stream_intervals(
|
||||
path, self.args.start, self.args.end):
|
||||
printf(" [ %s -> %s ]\n",
|
||||
time_string(start), time_string(end))
|
||||
printed = True
|
||||
if not printed:
|
||||
printf(" (no intervals)\n")
|
90
nilmdb/cmdline/metadata.py
Normal file
90
nilmdb/cmdline/metadata.py
Normal file
|
@ -0,0 +1,90 @@
|
|||
from nilmdb.utils.printf import printf
|
||||
import nilmdb
|
||||
import nilmdb.client
|
||||
|
||||
|
||||
def setup(self, sub):
|
||||
cmd = sub.add_parser("metadata", help="Get or set stream metadata",
|
||||
description="""
|
||||
Get or set key=value metadata associated with
|
||||
a stream.
|
||||
""",
|
||||
usage="%(prog)s path [-g [key ...] | "
|
||||
"-s key=value [...] | -u key=value [...]] | "
|
||||
"-d [key ...]")
|
||||
cmd.set_defaults(handler=cmd_metadata)
|
||||
|
||||
group = cmd.add_argument_group("Required arguments")
|
||||
group.add_argument("path",
|
||||
help="Path of stream, e.g. /foo/bar",
|
||||
).completer = self.complete.path
|
||||
|
||||
group = cmd.add_argument_group("Actions")
|
||||
exc = group.add_mutually_exclusive_group()
|
||||
exc.add_argument("-g", "--get", nargs="*", metavar="key",
|
||||
help="Get metadata for specified keys (default all)",
|
||||
).completer = self.complete.meta_key
|
||||
exc.add_argument("-s", "--set", nargs="+", metavar="key=value",
|
||||
help="Replace all metadata with provided "
|
||||
"key=value pairs",
|
||||
).completer = self.complete.meta_keyval
|
||||
exc.add_argument("-u", "--update", nargs="+", metavar="key=value",
|
||||
help="Update metadata using provided "
|
||||
"key=value pairs",
|
||||
).completer = self.complete.meta_keyval
|
||||
exc.add_argument("-d", "--delete", nargs="*", metavar="key",
|
||||
help="Delete metadata for specified keys (default all)",
|
||||
).completer = self.complete.meta_key
|
||||
return cmd
|
||||
|
||||
|
||||
def cmd_metadata(self):
|
||||
"""Manipulate metadata"""
|
||||
if self.args.set is not None or self.args.update is not None:
|
||||
# Either set, or update
|
||||
if self.args.set is not None:
|
||||
keyvals = self.args.set
|
||||
handler = self.client.stream_set_metadata
|
||||
else:
|
||||
keyvals = self.args.update
|
||||
handler = self.client.stream_update_metadata
|
||||
|
||||
# Extract key=value pairs
|
||||
data = {}
|
||||
for keyval in keyvals:
|
||||
kv = keyval.split('=')
|
||||
if len(kv) != 2 or kv[0] == "":
|
||||
self.die("error parsing key=value argument '%s'", keyval)
|
||||
data[kv[0]] = kv[1]
|
||||
|
||||
# Make the call
|
||||
try:
|
||||
handler(self.args.path, data)
|
||||
except nilmdb.client.ClientError as e:
|
||||
self.die("error setting/updating metadata: %s", str(e))
|
||||
elif self.args.delete is not None:
|
||||
# Delete (by setting values to empty strings)
|
||||
keys = None
|
||||
if self.args.delete:
|
||||
keys = list(self.args.delete)
|
||||
try:
|
||||
data = self.client.stream_get_metadata(self.args.path, keys)
|
||||
for key in data:
|
||||
data[key] = ""
|
||||
self.client.stream_update_metadata(self.args.path, data)
|
||||
except nilmdb.client.ClientError as e:
|
||||
self.die("error deleting metadata: %s", str(e))
|
||||
else:
|
||||
# Get (or unspecified)
|
||||
keys = None
|
||||
if self.args.get:
|
||||
keys = list(self.args.get)
|
||||
try:
|
||||
data = self.client.stream_get_metadata(self.args.path, keys)
|
||||
except nilmdb.client.ClientError as e:
|
||||
self.die("error getting metadata: %s", str(e))
|
||||
for key, value in sorted(data.items()):
|
||||
# Print nonexistant keys as having empty value
|
||||
if value is None:
|
||||
value = ""
|
||||
printf("%s=%s\n", key, value)
|
59
nilmdb/cmdline/remove.py
Normal file
59
nilmdb/cmdline/remove.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
import fnmatch
|
||||
|
||||
from nilmdb.utils.printf import printf
|
||||
import nilmdb.client
|
||||
|
||||
|
||||
def setup(self, sub):
|
||||
cmd = sub.add_parser("remove", help="Remove data",
|
||||
description="""
|
||||
Remove all data from a specified time range within a
|
||||
stream. If multiple streams or wildcards are
|
||||
provided, the same time range is removed from all
|
||||
streams.
|
||||
""")
|
||||
cmd.set_defaults(handler=cmd_remove)
|
||||
|
||||
group = cmd.add_argument_group("Data selection")
|
||||
group.add_argument("path", nargs='+',
|
||||
help="Path of stream, e.g. /foo/bar/*",
|
||||
).completer = self.complete.path
|
||||
group.add_argument("-s", "--start", required=True,
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Starting timestamp (free-form, inclusive)",
|
||||
).completer = self.complete.time
|
||||
group.add_argument("-e", "--end", required=True,
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Ending timestamp (free-form, noninclusive)",
|
||||
).completer = self.complete.time
|
||||
|
||||
group = cmd.add_argument_group("Output format")
|
||||
group.add_argument("-q", "--quiet", action="store_true",
|
||||
help="Don't display names when removing "
|
||||
"from multiple paths")
|
||||
group.add_argument("-c", "--count", action="store_true",
|
||||
help="Output number of data points removed")
|
||||
return cmd
|
||||
|
||||
|
||||
def cmd_remove(self):
|
||||
streams = [s[0] for s in self.client.stream_list()]
|
||||
paths = []
|
||||
for path in self.args.path:
|
||||
new = fnmatch.filter(streams, path)
|
||||
if not new:
|
||||
self.die("error: no stream matched path: %s", path)
|
||||
paths.extend(new)
|
||||
|
||||
try:
|
||||
for path in paths:
|
||||
if not self.args.quiet and len(paths) > 1:
|
||||
printf("Removing from %s\n", path)
|
||||
count = self.client.stream_remove(path,
|
||||
self.args.start, self.args.end)
|
||||
if self.args.count:
|
||||
printf("%d\n", count)
|
||||
except nilmdb.client.ClientError as e:
|
||||
self.die("error removing data: %s", str(e))
|
||||
|
||||
return 0
|
32
nilmdb/cmdline/rename.py
Normal file
32
nilmdb/cmdline/rename.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
from argparse import ArgumentDefaultsHelpFormatter as def_form
|
||||
|
||||
import nilmdb.client
|
||||
|
||||
|
||||
def setup(self, sub):
|
||||
cmd = sub.add_parser("rename", help="Rename a stream",
|
||||
formatter_class=def_form,
|
||||
description="""
|
||||
Rename a stream.
|
||||
|
||||
Only the stream's path is renamed; no
|
||||
metadata is changed.
|
||||
""")
|
||||
cmd.set_defaults(handler=cmd_rename)
|
||||
group = cmd.add_argument_group("Required arguments")
|
||||
group.add_argument("oldpath",
|
||||
help="Old path, e.g. /foo/old",
|
||||
).completer = self.complete.path
|
||||
group.add_argument("newpath",
|
||||
help="New path, e.g. /foo/bar/new",
|
||||
).completer = self.complete.path
|
||||
|
||||
return cmd
|
||||
|
||||
|
||||
def cmd_rename(self):
|
||||
"""Rename a stream"""
|
||||
try:
|
||||
self.client.stream_rename(self.args.oldpath, self.args.newpath)
|
||||
except nilmdb.client.ClientError as e:
|
||||
self.die("error renaming stream: %s", str(e))
|
|
@ -1,37 +0,0 @@
|
|||
"""FileInterval
|
||||
|
||||
An Interval that is backed with file data storage"""
|
||||
|
||||
from nilmdb.interval import Interval, IntervalSet, IntervalError
|
||||
from datetime import datetime
|
||||
import bisect
|
||||
|
||||
class FileInterval(Interval):
|
||||
"""Represents an interval of time and its corresponding data"""
|
||||
|
||||
def __init__(self, start, end,
|
||||
filename,
|
||||
start_offset = None, end_offset = None):
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.filename = filename
|
||||
if start_offset is None:
|
||||
start_offset = 0
|
||||
self.start_offset = start_offset
|
||||
if end_offset is None:
|
||||
f = open(filename, 'rb')
|
||||
f.seek(0, os.SEEK_END)
|
||||
end_offset = f.tell()
|
||||
self.end_offset = end_offset
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
pass
|
||||
|
||||
def subset(self, start, end):
|
||||
"""Return a new Interval that is a subset of this one"""
|
||||
# TODO: Any magic regarding file/offset/length mapping for subsets
|
||||
if (start < self.start or end > self.end):
|
||||
raise IntervalError("not a subset")
|
||||
return FileInterval(start, end)
|
||||
|
||||
|
3
nilmdb/fsck/__init__.py
Normal file
3
nilmdb/fsck/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
"""nilmdb.fsck"""
|
||||
|
||||
from nilmdb.fsck.fsck import Fsck
|
610
nilmdb/fsck/fsck.py
Normal file
610
nilmdb/fsck/fsck.py
Normal file
|
@ -0,0 +1,610 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""Check database consistency, with some ability to fix problems.
|
||||
This should be able to fix cases where a database gets corrupted due
|
||||
to unexpected system shutdown, and detect other cases that may cause
|
||||
NilmDB to return errors when trying to manipulate the database."""
|
||||
|
||||
import nilmdb.utils
|
||||
import nilmdb.server
|
||||
import nilmdb.client.numpyclient
|
||||
from nilmdb.utils.interval import IntervalError
|
||||
from nilmdb.server.interval import Interval, IntervalSet
|
||||
from nilmdb.utils.printf import printf, fprintf, sprintf
|
||||
|
||||
from collections import defaultdict
|
||||
import sqlite3
|
||||
import os
|
||||
import sys
|
||||
import progressbar
|
||||
import re
|
||||
import shutil
|
||||
import pickle
|
||||
import numpy
|
||||
|
||||
|
||||
class FsckError(Exception):
|
||||
def __init__(self, msg="", *args):
|
||||
if args:
|
||||
msg = sprintf(msg, *args)
|
||||
Exception.__init__(self, msg)
|
||||
|
||||
|
||||
class FixableFsckError(FsckError):
|
||||
def __init__(self, msg=""):
|
||||
FsckError.__init__(self, f'{msg}\nThis may be fixable with "--fix".')
|
||||
|
||||
|
||||
class RetryFsck(FsckError):
|
||||
pass
|
||||
|
||||
|
||||
class FsckFormatError(FsckError):
|
||||
pass
|
||||
|
||||
|
||||
def log(format, *args):
|
||||
printf(format, *args)
|
||||
|
||||
|
||||
def err(format, *args):
|
||||
fprintf(sys.stderr, format, *args)
|
||||
|
||||
|
||||
# Decorator that retries a function if it returns a specific value
|
||||
def retry_if_raised(exc, message=None, max_retries=1000):
|
||||
def f1(func):
|
||||
def f2(*args, **kwargs):
|
||||
for n in range(max_retries):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except exc:
|
||||
if message:
|
||||
log(f"{message} ({n+1})\n\n")
|
||||
raise Exception("Max number of retries (%d) exceeded; giving up" %
|
||||
max_retries)
|
||||
return f2
|
||||
return f1
|
||||
|
||||
|
||||
class Progress(object):
|
||||
def __init__(self, maxval):
|
||||
if maxval == 0:
|
||||
maxval = 1
|
||||
self.bar = progressbar.ProgressBar(
|
||||
maxval=maxval,
|
||||
widgets=[progressbar.Percentage(), ' ',
|
||||
progressbar.Bar(), ' ',
|
||||
progressbar.ETA()])
|
||||
self.bar.term_width = self.bar.term_width or 75
|
||||
|
||||
def __enter__(self):
|
||||
self.bar.start()
|
||||
self.last_update = 0
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
if exc_type is None:
|
||||
self.bar.finish()
|
||||
else:
|
||||
printf("\n")
|
||||
|
||||
def update(self, val):
|
||||
self.bar.update(val)
|
||||
|
||||
|
||||
class Fsck(object):
|
||||
def __init__(self, path, fix=False):
|
||||
self.basepath = path
|
||||
self.sqlpath = os.path.join(path, "data.sql")
|
||||
self.bulkpath = os.path.join(path, "data")
|
||||
self.bulklock = os.path.join(path, "data.lock")
|
||||
self.fix = fix
|
||||
|
||||
### Main checks
|
||||
|
||||
@retry_if_raised(RetryFsck, "Something was fixed: restarting fsck")
|
||||
def check(self, skip_data=False):
|
||||
self.bulk = None
|
||||
self.sql = None
|
||||
try:
|
||||
self.check_paths()
|
||||
self.check_sql()
|
||||
self.check_streams()
|
||||
self.check_intervals()
|
||||
if skip_data:
|
||||
log("skipped data check\n")
|
||||
else:
|
||||
self.check_data()
|
||||
finally:
|
||||
if self.bulk:
|
||||
self.bulk.close()
|
||||
if self.sql: # pragma: no cover
|
||||
# (coverage doesn't handle finally clauses correctly;
|
||||
# both branches here are tested)
|
||||
self.sql.commit()
|
||||
self.sql.close()
|
||||
log("ok\n")
|
||||
|
||||
### Check basic path structure
|
||||
|
||||
def check_paths(self):
|
||||
log("checking paths\n")
|
||||
if self.bulk:
|
||||
self.bulk.close()
|
||||
if not os.path.isfile(self.sqlpath):
|
||||
raise FsckError("SQL database missing (%s)", self.sqlpath)
|
||||
if not os.path.isdir(self.bulkpath):
|
||||
raise FsckError("Bulk data directory missing (%s)", self.bulkpath)
|
||||
with open(self.bulklock, "w") as lockfile:
|
||||
if not nilmdb.utils.lock.exclusive_lock(lockfile):
|
||||
raise FsckError('Database already locked by another process\n'
|
||||
'Make sure all other processes that might be '
|
||||
'using the database are stopped.\n'
|
||||
'Restarting apache will cause it to unlock '
|
||||
'the db until a request is received.')
|
||||
# unlocked immediately
|
||||
self.bulk = nilmdb.server.bulkdata.BulkData(self.basepath)
|
||||
|
||||
### Check SQL database health
|
||||
|
||||
def check_sql(self):
|
||||
log("checking sqlite database\n")
|
||||
|
||||
self.sql = sqlite3.connect(self.sqlpath)
|
||||
with self.sql:
|
||||
cur = self.sql.cursor()
|
||||
ver = cur.execute("PRAGMA user_version").fetchone()[0]
|
||||
good = max(nilmdb.server.nilmdb._sql_schema_updates.keys())
|
||||
if ver != good:
|
||||
raise FsckError("database version %d too old, should be %d",
|
||||
ver, good)
|
||||
self.stream_path = {}
|
||||
self.stream_layout = {}
|
||||
log(" loading paths\n")
|
||||
result = cur.execute("SELECT id, path, layout FROM streams")
|
||||
for r in result:
|
||||
if r[0] in self.stream_path:
|
||||
raise FsckError("duplicated ID %d in stream IDs", r[0])
|
||||
self.stream_path[r[0]] = r[1]
|
||||
self.stream_layout[r[0]] = r[2]
|
||||
|
||||
log(" loading intervals\n")
|
||||
self.stream_interval = defaultdict(list)
|
||||
result = cur.execute("SELECT stream_id, start_time, end_time, "
|
||||
"start_pos, end_pos FROM ranges "
|
||||
"ORDER BY start_time")
|
||||
for r in result:
|
||||
if r[0] not in self.stream_path:
|
||||
raise FsckError("interval ID %d not in streams", r[0])
|
||||
self.stream_interval[r[0]].append((r[1], r[2], r[3], r[4]))
|
||||
|
||||
log(" loading metadata\n")
|
||||
self.stream_meta = defaultdict(dict)
|
||||
result = cur.execute("SELECT stream_id, key, value FROM metadata")
|
||||
for r in result:
|
||||
if r[0] not in self.stream_path:
|
||||
raise FsckError("metadata ID %d not in streams", r[0])
|
||||
if r[1] in self.stream_meta[r[0]]:
|
||||
raise FsckError(
|
||||
"duplicate metadata key '%s' for stream %d",
|
||||
r[1], r[0])
|
||||
self.stream_meta[r[0]][r[1]] = r[2]
|
||||
|
||||
### Check streams and basic interval overlap
|
||||
|
||||
def check_streams(self):
|
||||
ids = list(self.stream_path.keys())
|
||||
log("checking %s streams\n", "{:,d}".format(len(ids)))
|
||||
with Progress(len(ids)) as pbar:
|
||||
for i, sid in enumerate(ids):
|
||||
pbar.update(i)
|
||||
path = self.stream_path[sid]
|
||||
|
||||
# unique path, valid layout
|
||||
if list(self.stream_path.values()).count(path) != 1:
|
||||
raise FsckError("duplicated path %s", path)
|
||||
layout = self.stream_layout[sid].split('_')[0]
|
||||
if layout not in ('int8', 'int16', 'int32', 'int64',
|
||||
'uint8', 'uint16', 'uint32', 'uint64',
|
||||
'float32', 'float64'):
|
||||
raise FsckError("bad layout %s for %s", layout, path)
|
||||
count = int(self.stream_layout[sid].split('_')[1])
|
||||
if count < 1 or count > 1024:
|
||||
raise FsckError("bad count %d for %s", count, path)
|
||||
|
||||
# must exist in bulkdata
|
||||
bulk = self.bulkpath + path
|
||||
bulk = bulk.encode('utf-8')
|
||||
if not os.path.isdir(bulk):
|
||||
raise FsckError("%s: missing bulkdata dir", path)
|
||||
if not nilmdb.server.bulkdata.Table.exists(bulk):
|
||||
raise FsckError("%s: bad bulkdata table", path)
|
||||
|
||||
# intervals don't overlap. Abuse IntervalSet to check
|
||||
# for intervals in file positions, too.
|
||||
timeiset = IntervalSet()
|
||||
posiset = IntervalSet()
|
||||
for (stime, etime, spos, epos) in self.stream_interval[sid]:
|
||||
new = Interval(stime, etime)
|
||||
try:
|
||||
timeiset += new
|
||||
except IntervalError:
|
||||
raise FsckError("%s: overlap in intervals:\n"
|
||||
"set: %s\nnew: %s",
|
||||
path, str(timeiset), str(new))
|
||||
if spos != epos:
|
||||
new = Interval(spos, epos)
|
||||
try:
|
||||
posiset += new
|
||||
except IntervalError:
|
||||
self.fix_row_overlap(sid, path, posiset, new)
|
||||
|
||||
try:
|
||||
# Check bulkdata
|
||||
self.check_bulkdata(sid, path, bulk)
|
||||
|
||||
# Check that we can open bulkdata
|
||||
tab = nilmdb.server.bulkdata.Table(bulk)
|
||||
except FsckFormatError:
|
||||
# If there are no files except _format, try deleting
|
||||
# the entire stream; this may remove metadata, but
|
||||
# it's probably unimportant.
|
||||
files = list(os.listdir(bulk))
|
||||
if len(files) > 1:
|
||||
raise FsckFormatError(f"{path}: can't load _format, "
|
||||
f"but data is also present")
|
||||
|
||||
# Since the stream was empty, just remove it
|
||||
self.fix_remove_stream(sid, path, bulk,
|
||||
"empty, with corrupted format file")
|
||||
except FsckError as e:
|
||||
raise e
|
||||
except Exception as e: # pragma: no cover
|
||||
# No coverage because this is an unknown/unexpected error
|
||||
raise FsckError("%s: can't open bulkdata: %s",
|
||||
path, str(e))
|
||||
tab.close()
|
||||
|
||||
def fix_row_overlap(self, sid, path, existing, new):
|
||||
# If the file rows (spos, epos) overlap in the interval table,
|
||||
# and the overlapping ranges look like this:
|
||||
# A --------- C
|
||||
# B -------- D
|
||||
# Then we can try changing the first interval to go from
|
||||
# A to B instead.
|
||||
msg = (f"{path}: overlap in file offsets:\n"
|
||||
f"existing ranges: {existing}\n"
|
||||
f"overlapping interval: {new}")
|
||||
if not self.fix:
|
||||
raise FixableFsckError(msg)
|
||||
err(f"\n{msg}\nSeeing if we can truncate one of them...\n")
|
||||
|
||||
# See if there'e exactly one interval that overlaps the
|
||||
# conflicting one in the right way
|
||||
match = None
|
||||
for intv in self.stream_interval[sid]:
|
||||
(stime, etime, spos, epos) = intv
|
||||
if spos < new.start and epos > new.start:
|
||||
if match:
|
||||
err(f"no, more than one interval matched:\n"
|
||||
f"{intv}\n{match}\n")
|
||||
raise FsckError(f"{path}: unfixable overlap")
|
||||
match = intv
|
||||
if match is None:
|
||||
err("no intervals overlapped in the right way\n")
|
||||
raise FsckError(f"{path}: unfixable overlap")
|
||||
|
||||
# Truncate the file position
|
||||
err(f"truncating {match}\n")
|
||||
with self.sql:
|
||||
cur = self.sql.cursor()
|
||||
cur.execute("UPDATE ranges SET end_pos=? "
|
||||
"WHERE stream_id=? AND start_time=? AND "
|
||||
"end_time=? AND start_pos=? AND end_pos=?",
|
||||
(new.start, sid, *match))
|
||||
if cur.rowcount != 1: # pragma: no cover (shouldn't fail)
|
||||
raise FsckError("failed to fix SQL database")
|
||||
raise RetryFsck
|
||||
|
||||
### Check that bulkdata is good enough to be opened
|
||||
|
||||
@retry_if_raised(RetryFsck)
|
||||
def check_bulkdata(self, sid, path, bulk):
|
||||
try:
|
||||
with open(os.path.join(bulk, b"_format"), "rb") as f:
|
||||
fmt = pickle.load(f)
|
||||
except Exception as e:
|
||||
raise FsckFormatError(f"{path}: can't load _format file ({e})")
|
||||
|
||||
if fmt["version"] != 3:
|
||||
raise FsckFormatError("%s: bad or unsupported bulkdata version %d",
|
||||
path, fmt["version"])
|
||||
rows_per_file = int(fmt["rows_per_file"])
|
||||
if rows_per_file < 1:
|
||||
raise FsckFormatError(f"{path}: bad rows_per_file {rows_per_file}")
|
||||
files_per_dir = int(fmt["files_per_dir"])
|
||||
if files_per_dir < 1:
|
||||
raise FsckFormatError(f"{path}: bad files_per_dir {files_per_dir}")
|
||||
layout = fmt["layout"]
|
||||
if layout != self.stream_layout[sid]:
|
||||
raise FsckFormatError("%s: layout mismatch %s != %s", path,
|
||||
layout, self.stream_layout[sid])
|
||||
|
||||
# Every file should have a size that's the multiple of the row size
|
||||
rkt = nilmdb.server.rocket.Rocket(layout, None)
|
||||
row_size = rkt.binary_size
|
||||
rkt.close()
|
||||
|
||||
# Find all directories
|
||||
regex = re.compile(b"^[0-9a-f]{4,}$")
|
||||
subdirs = sorted(filter(regex.search, os.listdir(bulk)),
|
||||
key=lambda x: int(x, 16), reverse=True)
|
||||
for subdir in subdirs:
|
||||
# Find all files in that dir
|
||||
subpath = os.path.join(bulk, subdir)
|
||||
files = list(filter(regex.search, os.listdir(subpath)))
|
||||
if not files:
|
||||
self.fix_empty_subdir(subpath)
|
||||
|
||||
# Verify that their size is a multiple of the row size
|
||||
for filename in files:
|
||||
filepath = os.path.join(subpath, filename)
|
||||
offset = os.path.getsize(filepath)
|
||||
if offset % row_size:
|
||||
self.fix_bad_filesize(path, filepath, offset, row_size)
|
||||
|
||||
def fix_empty_subdir(self, subpath):
|
||||
msg = sprintf("bulkdata path %s is missing data files", subpath)
|
||||
if not self.fix:
|
||||
raise FixableFsckError(msg)
|
||||
# Try to fix it by just deleting whatever is present,
|
||||
# as long as it's only ".removed" files.
|
||||
err("\n%s\n", msg)
|
||||
for fn in os.listdir(subpath):
|
||||
if not fn.endswith(b".removed"):
|
||||
raise FsckError("can't fix automatically: please manually "
|
||||
"remove the file '%s' and try again",
|
||||
os.path.join(subpath, fn).decode(
|
||||
'utf-8', errors='backslashreplace'))
|
||||
# Remove the whole thing
|
||||
err("Removing empty subpath\n")
|
||||
shutil.rmtree(subpath)
|
||||
raise RetryFsck
|
||||
|
||||
def fix_bad_filesize(self, path, filepath, offset, row_size):
|
||||
extra = offset % row_size
|
||||
msg = sprintf("%s: size of file %s (%d) is not a multiple" +
|
||||
" of row size (%d): %d extra bytes present",
|
||||
path, filepath, offset, row_size, extra)
|
||||
if not self.fix:
|
||||
raise FixableFsckError(msg)
|
||||
# Try to fix it by just truncating the file
|
||||
err("\n%s\n", msg)
|
||||
newsize = offset - extra
|
||||
err("Truncating file to %d bytes and retrying\n", newsize)
|
||||
with open(filepath, "r+b") as f:
|
||||
f.truncate(newsize)
|
||||
raise RetryFsck
|
||||
|
||||
def fix_remove_stream(self, sid, path, bulk, reason):
|
||||
msg = f"stream {path} is corrupted: {reason}"
|
||||
if not self.fix:
|
||||
raise FixableFsckError(msg)
|
||||
# Remove the stream from disk and the database
|
||||
err(f"\n{msg}\n")
|
||||
err(f"Removing stream {path} from disk and database\n")
|
||||
shutil.rmtree(bulk)
|
||||
with self.sql:
|
||||
cur = self.sql.cursor()
|
||||
cur.execute("DELETE FROM streams WHERE id=?",
|
||||
(sid,))
|
||||
if cur.rowcount != 1: # pragma: no cover (shouldn't fail)
|
||||
raise FsckError("failed to remove stream")
|
||||
cur.execute("DELETE FROM ranges WHERE stream_id=?", (sid,))
|
||||
cur.execute("DELETE FROM metadata WHERE stream_id=?", (sid,))
|
||||
raise RetryFsck
|
||||
|
||||
### Check interval endpoints
|
||||
|
||||
def check_intervals(self):
|
||||
total_ints = sum(len(x) for x in list(self.stream_interval.values()))
|
||||
log("checking %s intervals\n", "{:,d}".format(total_ints))
|
||||
done = 0
|
||||
with Progress(total_ints) as pbar:
|
||||
for sid in self.stream_interval:
|
||||
try:
|
||||
bulk = self.bulkpath + self.stream_path[sid]
|
||||
bulk = bulk.encode('utf-8')
|
||||
tab = nilmdb.server.bulkdata.Table(bulk)
|
||||
|
||||
def update(x):
|
||||
pbar.update(done + x)
|
||||
|
||||
ints = self.stream_interval[sid]
|
||||
done += self.check_table_intervals(sid, ints, tab, update)
|
||||
finally:
|
||||
tab.close()
|
||||
|
||||
def check_table_intervals(self, sid, ints, tab, update):
|
||||
# look in the table to make sure we can pick out the interval's
|
||||
# endpoints
|
||||
path = self.stream_path[sid] # noqa: F841 unused
|
||||
tab.file_open.cache_remove_all()
|
||||
for (i, intv) in enumerate(ints):
|
||||
update(i)
|
||||
(stime, etime, spos, epos) = intv
|
||||
if spos == epos and spos >= 0 and spos <= tab.nrows:
|
||||
continue
|
||||
try:
|
||||
srow = tab[spos] # noqa: F841 unused
|
||||
erow = tab[epos-1] # noqa: F841 unused
|
||||
except Exception as e:
|
||||
self.fix_bad_interval(sid, intv, tab, str(e))
|
||||
|
||||
return len(ints)
|
||||
|
||||
def fix_bad_interval(self, sid, intv, tab, msg):
|
||||
path = self.stream_path[sid]
|
||||
msg = sprintf("%s: interval %s error accessing rows: %s",
|
||||
path, str(intv), str(msg))
|
||||
if not self.fix:
|
||||
raise FixableFsckError(msg)
|
||||
err("\n%s\n", msg)
|
||||
|
||||
(stime, etime, spos, epos) = intv
|
||||
# If it's just that the end pos is more than the number of rows
|
||||
# in the table, lower end pos and truncate interval time too.
|
||||
if spos < tab.nrows and epos >= tab.nrows:
|
||||
err("end position is past endrows, but it can be truncated\n")
|
||||
err("old end: time %d, pos %d\n", etime, epos)
|
||||
new_epos = tab.nrows
|
||||
new_etime = tab[new_epos-1] + 1
|
||||
err("new end: time %d, pos %d\n", new_etime, new_epos)
|
||||
if stime < new_etime:
|
||||
# Change it in SQL
|
||||
with self.sql:
|
||||
cur = self.sql.cursor()
|
||||
cur.execute("UPDATE ranges SET end_time=?, end_pos=? "
|
||||
"WHERE stream_id=? AND start_time=? AND "
|
||||
"end_time=? AND start_pos=? AND end_pos=?",
|
||||
(new_etime, new_epos, sid, stime, etime,
|
||||
spos, epos))
|
||||
if cur.rowcount != 1: # pragma: no cover (shouldn't fail)
|
||||
raise FsckError("failed to fix SQL database")
|
||||
raise RetryFsck
|
||||
err("actually it can't be truncated; times are bad too\n")
|
||||
|
||||
# Otherwise, the only hope is to delete the interval entirely.
|
||||
err("*** Deleting the entire interval from SQL.\n")
|
||||
err("This may leave stale data on disk. To fix that, copy all "
|
||||
"data from this stream to a new stream using nilm-copy, then\n")
|
||||
err("remove all data from and destroy %s.\n", path)
|
||||
with self.sql:
|
||||
cur = self.sql.cursor()
|
||||
cur.execute("DELETE FROM ranges WHERE "
|
||||
"stream_id=? AND start_time=? AND "
|
||||
"end_time=? AND start_pos=? AND end_pos=?",
|
||||
(sid, stime, etime, spos, epos))
|
||||
if cur.rowcount != 1: # pragma: no cover (shouldn't fail)
|
||||
raise FsckError("failed to remove interval")
|
||||
raise RetryFsck
|
||||
|
||||
### Check data in each interval
|
||||
|
||||
def check_data(self):
|
||||
total_rows = sum(sum((y[3] - y[2]) for y in x)
|
||||
for x in list(self.stream_interval.values()))
|
||||
log("checking %s rows of data\n", "{:,d}".format(total_rows))
|
||||
done = 0
|
||||
with Progress(total_rows) as pbar:
|
||||
for sid in self.stream_interval:
|
||||
try:
|
||||
bulk = self.bulkpath + self.stream_path[sid]
|
||||
bulk = bulk.encode('utf-8')
|
||||
tab = nilmdb.server.bulkdata.Table(bulk)
|
||||
|
||||
def update(x):
|
||||
pbar.update(done + x)
|
||||
|
||||
ints = self.stream_interval[sid]
|
||||
done += self.check_table_data(sid, ints, tab, update)
|
||||
finally:
|
||||
tab.close()
|
||||
|
||||
def check_table_data(self, sid, ints, tab, update):
|
||||
# Pull out all of the interval's data and verify that it's
|
||||
# monotonic.
|
||||
maxrows = getattr(self, 'maxrows_override', 100000)
|
||||
path = self.stream_path[sid]
|
||||
layout = self.stream_layout[sid]
|
||||
dtype = nilmdb.client.numpyclient.layout_to_dtype(layout)
|
||||
tab.file_open.cache_remove_all()
|
||||
done = 0
|
||||
for intv in ints:
|
||||
last_ts = None
|
||||
(stime, etime, spos, epos) = intv
|
||||
|
||||
# Break interval into maxrows-sized chunks
|
||||
next_start = spos
|
||||
while next_start < epos:
|
||||
start = next_start
|
||||
stop = min(start + maxrows, epos)
|
||||
count = stop - start
|
||||
next_start = stop
|
||||
|
||||
# Get raw data, convert to NumPy arary
|
||||
try:
|
||||
raw = tab.get_data(start, stop, binary=True)
|
||||
data = numpy.frombuffer(raw, dtype)
|
||||
except Exception as e: # pragma: no cover
|
||||
# No coverage because it's hard to trigger this -- earlier
|
||||
# checks check the ranges, so this would probably be a real
|
||||
# disk error, malloc failure, etc.
|
||||
raise FsckError(
|
||||
"%s: failed to grab rows %d through %d: %s",
|
||||
path, start, stop, repr(e))
|
||||
|
||||
ts = data['timestamp']
|
||||
|
||||
# Verify that all timestamps are in range.
|
||||
match = (ts < stime) | (ts >= etime)
|
||||
if match.any():
|
||||
row = numpy.argmax(match)
|
||||
if ts[row] != 0:
|
||||
raise FsckError("%s: data timestamp %d at row %d "
|
||||
"outside interval range [%d,%d)",
|
||||
path, ts[row], row + start,
|
||||
stime, etime)
|
||||
|
||||
# Timestamp is zero and out of the expected range;
|
||||
# assume file ends with zeroed data and just truncate it.
|
||||
self.fix_table_by_truncating(
|
||||
path, tab, row + start,
|
||||
"data timestamp is out of range, and zero")
|
||||
|
||||
# Verify that timestamps are monotonic
|
||||
match = numpy.diff(ts) <= 0
|
||||
if match.any():
|
||||
row = numpy.argmax(match)
|
||||
if ts[row+1] != 0:
|
||||
raise FsckError(
|
||||
"%s: non-monotonic timestamp (%d -> %d) "
|
||||
"at row %d", path, ts[row], ts[row+1],
|
||||
row + start)
|
||||
|
||||
# Timestamp is zero and non-monotonic;
|
||||
# assume file ends with zeroed data and just truncate it.
|
||||
self.fix_table_by_truncating(
|
||||
path, tab, row + start + 1,
|
||||
"data timestamp is non-monotonic, and zero")
|
||||
|
||||
first_ts = ts[0]
|
||||
if last_ts is not None and first_ts <= last_ts:
|
||||
raise FsckError("%s: first interval timestamp %d is not "
|
||||
"greater than the previous last interval "
|
||||
"timestamp %d, at row %d",
|
||||
path, first_ts, last_ts, start)
|
||||
last_ts = ts[-1]
|
||||
|
||||
# The previous errors are fixable, by removing the
|
||||
# offending intervals, or changing the data
|
||||
# timestamps. But these are probably unlikely errors,
|
||||
# so it's not worth implementing that yet.
|
||||
|
||||
# Done
|
||||
done += count
|
||||
update(done)
|
||||
return done
|
||||
|
||||
def fix_table_by_truncating(self, path, tab, row, reason):
|
||||
# Simple fix for bad data: truncate the table at the given row.
|
||||
# On retry, fix_bad_interval will correct the database and timestamps
|
||||
# to account for this truncation.
|
||||
msg = f"{path}: bad data in table, starting at row {row}: {reason}"
|
||||
if not self.fix:
|
||||
raise FixableFsckError(msg)
|
||||
err(f"\n{msg}\nWill try truncating table\n")
|
||||
(subdir, fname, offs, count) = tab._offset_from_row(row)
|
||||
tab._remove_or_truncate_file(subdir, fname, offs)
|
||||
raise RetryFsck
|
|
@ -1,205 +0,0 @@
|
|||
"""Interval and IntervalSet
|
||||
|
||||
Represents an interval of time, and a sorted set of such intervals"""
|
||||
|
||||
from datetime import datetime
|
||||
import bisect
|
||||
|
||||
class IntervalError(Exception):
|
||||
"""Error due to interval overlap, etc"""
|
||||
pass
|
||||
|
||||
class Interval(object):
|
||||
"""Represents an interval of time"""
|
||||
|
||||
start = None
|
||||
end = None
|
||||
|
||||
def __init__(self, start, end):
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
def __repr__(self):
|
||||
return "Interval(" + repr(self.start) + ", " + repr(self.end) + ")"
|
||||
|
||||
def __str__(self):
|
||||
return "[" + str(self.start) + " -> " + str(self.end) + "]"
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
"""Set attribute"""
|
||||
# TODO: If we need to manipulate file names, offsets, lengths, etc,
|
||||
# based on start and end time changing, maybe this is the right spot?
|
||||
# Or we could just disallow changing it here.
|
||||
if not isinstance(value, datetime):
|
||||
raise IntervalError("Must set datetime values")
|
||||
self.__dict__[name] = value
|
||||
if (type(self.start) is type(self.end)):
|
||||
if (self.start > self.end):
|
||||
raise IntervalError("Interval start must precede interval end")
|
||||
|
||||
def __cmp__(self, other):
|
||||
"""Compare two intervals. If non-equal, order by start then end"""
|
||||
if not isinstance(other, Interval):
|
||||
raise TypeError("Can't compare to non-interval")
|
||||
if (self.start == other.start):
|
||||
if (self.end < other.end):
|
||||
return -1
|
||||
if (self.end > other.end):
|
||||
return 1
|
||||
return 0
|
||||
if (self.start < other.start):
|
||||
return -1
|
||||
return 1
|
||||
|
||||
def intersects(self, other):
|
||||
"""Return True if two Interval objects intersect"""
|
||||
if (not isinstance(other, Interval)):
|
||||
raise TypeError("need Interval for intersection test")
|
||||
if (self.end <= other.start or
|
||||
self.start >= other.end):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def is_adjacent(self, other):
|
||||
"""Return True if two Intervals are adjacent (same end or start)"""
|
||||
if (not isinstance(other, Interval)):
|
||||
raise TypeError("need Interval for adjacency test")
|
||||
if (self.end == other.start or
|
||||
self.start == other.end):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def subset(self, start, end):
|
||||
"""Return a new Interval that is a subset of this one"""
|
||||
# TODO: Any magic regarding file/offset/length mapping for subsets
|
||||
if (start < self.start or end > self.end):
|
||||
raise IntervalError("not a subset")
|
||||
return Interval(start, end)
|
||||
|
||||
class IntervalSet(object):
|
||||
"""A non-intersecting set of intervals
|
||||
|
||||
Kept sorted internally"""
|
||||
|
||||
def __init__(self, iterable=None):
|
||||
self.data = []
|
||||
if iterable is not None:
|
||||
if isinstance(iterable, Interval):
|
||||
iterable = [iterable]
|
||||
self._add_intervals(iterable)
|
||||
|
||||
def __iter__(self):
|
||||
return self.data.__iter__()
|
||||
|
||||
def __repr__(self):
|
||||
return "IntervalSet(" + repr(list(self.data)) + ")"
|
||||
|
||||
def __cmp__(self, other):
|
||||
# compare isn't supported, they don't really have an ordering
|
||||
raise TypeError("can't compare IntervalSets with cmp()")
|
||||
|
||||
def __eq__(self, other):
|
||||
"""Test equality of two IntervalSets.
|
||||
|
||||
Treats adjacent Intervals as equivalent to one long interval,
|
||||
so this function really tests whether the IntervalSets cover
|
||||
the same spans of time."""
|
||||
if not isinstance(other, IntervalSet):
|
||||
return False
|
||||
i = 0
|
||||
j = 0
|
||||
outside = True
|
||||
try:
|
||||
while True:
|
||||
if (outside):
|
||||
# To match, we need to be finished this set
|
||||
if (i >= len(self) and j >= len(other)):
|
||||
return True
|
||||
# Or the starts need to match
|
||||
if (self[i].start != other[j].start):
|
||||
return False
|
||||
outside = False
|
||||
else:
|
||||
# We can move on if the two interval ends match
|
||||
if (self[i].end == other[j].end):
|
||||
i += 1
|
||||
j += 1
|
||||
outside = True
|
||||
else:
|
||||
# Whichever ends first needs to be adjacent to the next
|
||||
if (self[i].end < other[j].end):
|
||||
if (not self[i].is_adjacent(self[i+1])):
|
||||
return False
|
||||
i += 1
|
||||
else:
|
||||
if (not other[j].is_adjacent(other[j+1])):
|
||||
return False
|
||||
j += 1
|
||||
except IndexError:
|
||||
return False
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.data.__getitem__(key)
|
||||
|
||||
def __iadd__(self, other):
|
||||
"""Inplace add -- modifies self
|
||||
|
||||
This throws an exception if the regions being added intersect."""
|
||||
if isinstance(other, Interval):
|
||||
other = [other]
|
||||
self._add_intervals(other)
|
||||
return self
|
||||
|
||||
def __add__(self, other):
|
||||
"""Add -- returns a new object
|
||||
|
||||
This throws an exception if the regions being added intersect."""
|
||||
new = IntervalSet(self)
|
||||
new += IntervalSet(other)
|
||||
return new
|
||||
|
||||
def __and__(self, other):
|
||||
"""Compute a new IntervalSet from the intersection of two others
|
||||
|
||||
Output intervals are built as subsets of the intervals in the
|
||||
first argument (self)."""
|
||||
# If we were given a set, intersect with each interval in that set
|
||||
if isinstance(other, IntervalSet):
|
||||
out = IntervalSet()
|
||||
for interval in other.data:
|
||||
out += self & interval
|
||||
return out
|
||||
|
||||
if not isinstance(other, Interval):
|
||||
raise TypeError("can't intersect with that type")
|
||||
|
||||
out = IntervalSet()
|
||||
for this in self.data:
|
||||
# If there's any overlap, add the overlapping region
|
||||
if (this.end > other.start and this.start < other.end):
|
||||
out += this.subset(max(this.start, other.start),
|
||||
min(this.end, other.end))
|
||||
return out
|
||||
|
||||
def _add_intervals(self, iterable):
|
||||
"""Add each Interval from an interable to this set"""
|
||||
for element in iter(iterable):
|
||||
self._add_single_interval(element)
|
||||
|
||||
def _add_single_interval(self, interval):
|
||||
"""Add one Interval to this set"""
|
||||
if (not isinstance(interval, Interval)):
|
||||
raise TypeError("can only add Intervals")
|
||||
for existing in self.data:
|
||||
if existing.intersects(interval):
|
||||
raise IntervalError("Tried to add overlapping interval "
|
||||
"to this set")
|
||||
bisect.insort(self.data, interval)
|
1
nilmdb/scripts/__init__.py
Normal file
1
nilmdb/scripts/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
# Command line scripts
|
27
nilmdb/scripts/nilmdb_fsck.py
Executable file
27
nilmdb/scripts/nilmdb_fsck.py
Executable file
|
@ -0,0 +1,27 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import nilmdb.fsck
|
||||
import argparse
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for the 'nilmdb-fsck' command line script"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Check database consistency',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument("-v", "--version", action="version",
|
||||
version=nilmdb.__version__)
|
||||
parser.add_argument("-f", "--fix", action="store_true",
|
||||
default=False, help='Fix errors when possible '
|
||||
'(which may involve removing data)')
|
||||
parser.add_argument("-n", "--no-data", action="store_true",
|
||||
default=False, help='Skip the slow full-data check')
|
||||
parser.add_argument('database', help='Database directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
nilmdb.fsck.Fsck(args.database, args.fix).check(skip_data=args.no_data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
99
nilmdb/scripts/nilmdb_server.py
Executable file
99
nilmdb/scripts/nilmdb_server.py
Executable file
|
@ -0,0 +1,99 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import socket
|
||||
import argparse
|
||||
|
||||
import cherrypy
|
||||
|
||||
import nilmdb.server
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for the 'nilmdb-server' command line script"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Run the NilmDB server',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
|
||||
parser.add_argument("-v", "--version", action="version",
|
||||
version=nilmdb.__version__)
|
||||
|
||||
group = parser.add_argument_group("Standard options")
|
||||
group.add_argument('-a', '--address',
|
||||
help='Only listen on the given address',
|
||||
default='0.0.0.0')
|
||||
group.add_argument('-p', '--port', help='Listen on the given port',
|
||||
type=int, default=12380)
|
||||
group.add_argument('-d', '--database', help='Database directory',
|
||||
default="./db")
|
||||
group.add_argument('-q', '--quiet', help='Silence output',
|
||||
action='store_true')
|
||||
group.add_argument('-t', '--traceback',
|
||||
help='Provide tracebacks in client errors',
|
||||
action='store_true', default=False)
|
||||
|
||||
group = parser.add_argument_group("Debug options")
|
||||
group.add_argument('-y', '--yappi', help='Run under yappi profiler and '
|
||||
'invoke interactive shell afterwards',
|
||||
action='store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create database object. Needs to be serialized before passing
|
||||
# to the Server.
|
||||
db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(args.database)
|
||||
|
||||
# Configure the server
|
||||
if not args.quiet:
|
||||
cherrypy._cpconfig.environments['embedded']['log.screen'] = True
|
||||
|
||||
server = nilmdb.server.Server(db,
|
||||
host=args.address,
|
||||
port=args.port,
|
||||
force_traceback=args.traceback)
|
||||
|
||||
# Print info
|
||||
if not args.quiet:
|
||||
print("Version: %s" % nilmdb.__version__)
|
||||
print("Database: %s" % (os.path.realpath(args.database)))
|
||||
if args.address == '0.0.0.0' or args.address == '::':
|
||||
host = socket.getfqdn()
|
||||
else:
|
||||
host = args.address
|
||||
print("Server URL: http://%s:%d/" % (host, args.port))
|
||||
print("----")
|
||||
|
||||
# Run it
|
||||
try:
|
||||
if args.yappi:
|
||||
print("Running in yappi")
|
||||
try:
|
||||
import yappi
|
||||
yappi.start()
|
||||
server.start(blocking=True)
|
||||
finally:
|
||||
yappi.stop()
|
||||
stats = yappi.get_func_stats()
|
||||
stats.sort("ttot")
|
||||
stats.print_all()
|
||||
try:
|
||||
from IPython import embed
|
||||
embed(header="Use the `yappi` or `stats` object to "
|
||||
"explore further, `quit` to exit")
|
||||
except ModuleNotFoundError:
|
||||
print("\nInstall ipython to explore further")
|
||||
else:
|
||||
server.start(blocking=True)
|
||||
except nilmdb.server.serverutil.CherryPyExit:
|
||||
print("Exiting due to CherryPy error", file=sys.stderr)
|
||||
raise
|
||||
finally:
|
||||
if not args.quiet:
|
||||
print("Closing database")
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
12
nilmdb/scripts/nilmtool.py
Executable file
12
nilmdb/scripts/nilmtool.py
Executable file
|
@ -0,0 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import nilmdb.cmdline
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for the 'nilmtool' command line script"""
|
||||
nilmdb.cmdline.Cmdline().run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
9
nilmdb/server/__init__.py
Normal file
9
nilmdb/server/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
"""nilmdb.server"""
|
||||
|
||||
# Set up pyximport to automatically rebuild Cython modules if needed.
|
||||
import pyximport
|
||||
pyximport.install(inplace=True, build_in_temp=False)
|
||||
|
||||
from nilmdb.server.nilmdb import NilmDB
|
||||
from nilmdb.server.server import Server, wsgi_application
|
||||
from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
|
635
nilmdb/server/bulkdata.py
Normal file
635
nilmdb/server/bulkdata.py
Normal file
|
@ -0,0 +1,635 @@
|
|||
# Fixed record size bulk data storage
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import pickle
|
||||
import tempfile
|
||||
|
||||
from nilmdb.utils.printf import sprintf
|
||||
from nilmdb.utils.time import timestamp_to_string
|
||||
import nilmdb.utils
|
||||
|
||||
import nilmdb.utils.lock
|
||||
from . import rocket
|
||||
|
||||
# Up to 256 open file descriptors at any given time.
|
||||
# These variables are global so they can be used in the decorator arguments.
|
||||
table_cache_size = 32
|
||||
fd_cache_size = 8
|
||||
|
||||
|
||||
@nilmdb.utils.must_close(wrap_verify=False)
|
||||
class BulkData():
|
||||
def __init__(self, basepath, **kwargs):
|
||||
if isinstance(basepath, str):
|
||||
self.basepath = self._encode_filename(basepath)
|
||||
else:
|
||||
self.basepath = basepath
|
||||
self.root = os.path.join(self.basepath, b"data")
|
||||
self.lock = self.root + b".lock"
|
||||
self.lockfile = None
|
||||
|
||||
# Tuneables
|
||||
if "file_size" in kwargs and kwargs["file_size"] is not None:
|
||||
self.file_size = kwargs["file_size"]
|
||||
else:
|
||||
# Default to approximately 128 MiB per file
|
||||
self.file_size = 128 * 1024 * 1024
|
||||
|
||||
if "files_per_dir" in kwargs and kwargs["files_per_dir"] is not None:
|
||||
self.files_per_dir = kwargs["files_per_dir"]
|
||||
else:
|
||||
# 32768 files per dir should work even on FAT32
|
||||
self.files_per_dir = 32768
|
||||
|
||||
if "initial_nrows" in kwargs and kwargs["initial_nrows"] is not None:
|
||||
self.initial_nrows = kwargs["initial_nrows"]
|
||||
else:
|
||||
# First row is 0
|
||||
self.initial_nrows = 0
|
||||
|
||||
# Make root path
|
||||
if not os.path.isdir(self.root):
|
||||
os.mkdir(self.root)
|
||||
|
||||
# Create the lock
|
||||
self.lockfile = open(self.lock, "w")
|
||||
if not nilmdb.utils.lock.exclusive_lock(self.lockfile):
|
||||
raise IOError('database at "' +
|
||||
self._decode_filename(self.basepath) +
|
||||
'" is already locked by another process')
|
||||
|
||||
def close(self):
|
||||
self.getnode.cache_remove_all()
|
||||
if self.lockfile:
|
||||
nilmdb.utils.lock.exclusive_unlock(self.lockfile)
|
||||
self.lockfile.close()
|
||||
try:
|
||||
os.unlink(self.lock)
|
||||
except OSError:
|
||||
pass
|
||||
self.lockfile = None
|
||||
|
||||
def _encode_filename(self, path):
|
||||
# Translate unicode strings to raw bytes, if needed. We
|
||||
# always manipulate paths internally as bytes.
|
||||
return path.encode('utf-8')
|
||||
|
||||
def _decode_filename(self, path):
|
||||
# Translate raw bytes to unicode strings, escaping if needed
|
||||
return path.decode('utf-8', errors='backslashreplace')
|
||||
|
||||
def _create_check_ospath(self, ospath):
|
||||
if ospath[-1:] == b'/':
|
||||
raise ValueError("invalid path; should not end with a /")
|
||||
if Table.exists(ospath):
|
||||
raise ValueError("stream already exists at this path")
|
||||
if os.path.isdir(ospath):
|
||||
# Look for any files in subdirectories. Fully empty subdirectories
|
||||
# are OK; they might be there during a rename
|
||||
for (root, dirs, files) in os.walk(ospath):
|
||||
if files:
|
||||
raise ValueError(
|
||||
"non-empty subdirs of this path already exist")
|
||||
|
||||
def _create_parents(self, unicodepath):
|
||||
"""Verify the path name, and create parent directories if they
|
||||
don't exist. Returns a list of elements that got created."""
|
||||
path = self._encode_filename(unicodepath)
|
||||
|
||||
if path[0:1] != b'/':
|
||||
raise ValueError("paths must start with / ")
|
||||
[group, node] = path.rsplit(b"/", 1)
|
||||
if group == b'':
|
||||
raise ValueError("invalid path; path must contain at least one "
|
||||
"folder")
|
||||
if node == b'':
|
||||
raise ValueError("invalid path; should not end with a /")
|
||||
if not Table.valid_path(path):
|
||||
raise ValueError("path name is invalid or contains reserved words")
|
||||
|
||||
# Create the table's base dir. Note that we make a
|
||||
# distinction here between NilmDB paths (always Unix style,
|
||||
# split apart manually) and OS paths (built up with
|
||||
# os.path.join)
|
||||
|
||||
# Make directories leading up to this one
|
||||
elements = path.lstrip(b'/').split(b'/')
|
||||
made_dirs = []
|
||||
try:
|
||||
# Make parent elements
|
||||
for i in range(len(elements)):
|
||||
ospath = os.path.join(self.root, *elements[0:i])
|
||||
if Table.exists(ospath):
|
||||
raise ValueError("path is subdir of existing node")
|
||||
if not os.path.isdir(ospath):
|
||||
os.mkdir(ospath)
|
||||
made_dirs.append(ospath)
|
||||
except Exception:
|
||||
# Remove paths that we created
|
||||
for ospath in reversed(made_dirs):
|
||||
os.rmdir(ospath)
|
||||
raise
|
||||
|
||||
return elements
|
||||
|
||||
def create(self, unicodepath, layout_name):
|
||||
"""
|
||||
unicodepath: path to the data (e.g. u'/newton/prep').
|
||||
Paths must contain at least two elements, e.g.:
|
||||
/newton/prep
|
||||
/newton/raw
|
||||
/newton/upstairs/prep
|
||||
/newton/upstairs/raw
|
||||
|
||||
layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8'
|
||||
"""
|
||||
elements = self._create_parents(unicodepath)
|
||||
|
||||
# Make the final dir
|
||||
ospath = os.path.join(self.root, *elements)
|
||||
self._create_check_ospath(ospath)
|
||||
os.mkdir(ospath)
|
||||
|
||||
try:
|
||||
# Write format string to file
|
||||
Table.create(ospath, layout_name, self.file_size,
|
||||
self.files_per_dir)
|
||||
|
||||
# Open and cache it
|
||||
self.getnode(unicodepath)
|
||||
except Exception:
|
||||
exc_info = sys.exc_info()
|
||||
try:
|
||||
os.rmdir(ospath)
|
||||
except OSError:
|
||||
pass
|
||||
raise exc_info[1].with_traceback(exc_info[2])
|
||||
|
||||
# Success
|
||||
return
|
||||
|
||||
def _remove_leaves(self, unicodepath):
|
||||
"""Remove empty directories starting at the leaves of unicodepath"""
|
||||
path = self._encode_filename(unicodepath)
|
||||
elements = path.lstrip(b'/').split(b'/')
|
||||
for i in reversed(list(range(len(elements)))):
|
||||
ospath = os.path.join(self.root, *elements[0:i+1])
|
||||
try:
|
||||
os.rmdir(ospath)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def rename(self, oldunicodepath, newunicodepath):
|
||||
"""Move entire tree from 'oldunicodepath' to
|
||||
'newunicodepath'"""
|
||||
oldpath = self._encode_filename(oldunicodepath)
|
||||
newpath = self._encode_filename(newunicodepath)
|
||||
|
||||
# Get OS paths
|
||||
oldelements = oldpath.lstrip(b'/').split(b'/')
|
||||
oldospath = os.path.join(self.root, *oldelements)
|
||||
newelements = newpath.lstrip(b'/').split(b'/')
|
||||
newospath = os.path.join(self.root, *newelements)
|
||||
|
||||
# Basic checks
|
||||
if oldospath == newospath:
|
||||
raise ValueError("old and new paths are the same")
|
||||
|
||||
# Remove Table object at old path from cache
|
||||
self.getnode.cache_remove(self, oldunicodepath)
|
||||
|
||||
# Move the table to a temporary location
|
||||
tmpdir = tempfile.mkdtemp(prefix=b"rename-", dir=self.root)
|
||||
tmppath = os.path.join(tmpdir, b"table")
|
||||
os.rename(oldospath, tmppath)
|
||||
|
||||
try:
|
||||
# Check destination path
|
||||
self._create_check_ospath(newospath)
|
||||
|
||||
# Create parent dirs for new location
|
||||
self._create_parents(newunicodepath)
|
||||
|
||||
# Move table into new location
|
||||
os.rename(tmppath, newospath)
|
||||
except Exception:
|
||||
# On failure, move the table back to original path
|
||||
os.rename(tmppath, oldospath)
|
||||
os.rmdir(tmpdir)
|
||||
raise
|
||||
|
||||
# Prune old dirs
|
||||
self._remove_leaves(oldunicodepath)
|
||||
os.rmdir(tmpdir)
|
||||
|
||||
def destroy(self, unicodepath):
|
||||
"""Fully remove all data at a particular path. No way to undo
|
||||
it! The group/path structure is removed, too."""
|
||||
path = self._encode_filename(unicodepath)
|
||||
|
||||
# Get OS path
|
||||
elements = path.lstrip(b'/').split(b'/')
|
||||
ospath = os.path.join(self.root, *elements)
|
||||
|
||||
# Remove Table object from cache
|
||||
self.getnode.cache_remove(self, unicodepath)
|
||||
|
||||
# Remove the contents of the target directory
|
||||
if not Table.exists(ospath):
|
||||
raise ValueError("nothing at that path")
|
||||
for (root, dirs, files) in os.walk(ospath, topdown=False):
|
||||
for name in files:
|
||||
os.remove(os.path.join(root, name))
|
||||
for name in dirs:
|
||||
os.rmdir(os.path.join(root, name))
|
||||
|
||||
# Remove leftover empty directories
|
||||
self._remove_leaves(unicodepath)
|
||||
|
||||
# Cache open tables
|
||||
@nilmdb.utils.lru_cache(size=table_cache_size,
|
||||
onremove=lambda x: x.close())
|
||||
def getnode(self, unicodepath):
|
||||
"""Return a Table object corresponding to the given database
|
||||
path, which must exist."""
|
||||
path = self._encode_filename(unicodepath)
|
||||
elements = path.lstrip(b'/').split(b'/')
|
||||
ospath = os.path.join(self.root, *elements)
|
||||
return Table(ospath, self.initial_nrows)
|
||||
|
||||
|
||||
@nilmdb.utils.must_close(wrap_verify=False)
|
||||
class Table():
|
||||
"""Tools to help access a single table (data at a specific OS path)."""
|
||||
# See design.md for design details
|
||||
|
||||
# Class methods, to help keep format details in this class.
|
||||
@classmethod
|
||||
def valid_path(cls, root):
|
||||
"""Return True if a root path is a valid name"""
|
||||
return b"_format" not in root.split(b"/")
|
||||
|
||||
@classmethod
|
||||
def exists(cls, root):
|
||||
"""Return True if a table appears to exist at this OS path"""
|
||||
return os.path.isfile(os.path.join(root, b"_format"))
|
||||
|
||||
@classmethod
|
||||
def create(cls, root, layout, file_size, files_per_dir):
|
||||
"""Initialize a table at the given OS path with the
|
||||
given layout string"""
|
||||
|
||||
# Calculate rows per file so that each file is approximately
|
||||
# file_size bytes.
|
||||
rkt = rocket.Rocket(layout, None)
|
||||
rows_per_file = max(file_size // rkt.binary_size, 1)
|
||||
rkt.close()
|
||||
|
||||
fmt = {
|
||||
"rows_per_file": rows_per_file,
|
||||
"files_per_dir": files_per_dir,
|
||||
"layout": layout,
|
||||
"version": 3
|
||||
}
|
||||
nilmdb.utils.atomic.replace_file(
|
||||
os.path.join(root, b"_format"), pickle.dumps(fmt, 2))
|
||||
|
||||
# Normal methods
|
||||
def __init__(self, root, initial_nrows=0):
|
||||
"""'root' is the full OS path to the directory of this table"""
|
||||
self.root = root
|
||||
self.initial_nrows = initial_nrows
|
||||
|
||||
# Load the format
|
||||
with open(os.path.join(self.root, b"_format"), "rb") as f:
|
||||
fmt = pickle.load(f)
|
||||
|
||||
if fmt["version"] != 3:
|
||||
# Old versions used floating point timestamps, which aren't
|
||||
# valid anymore.
|
||||
raise NotImplementedError("old version " + str(fmt["version"]) +
|
||||
" bulk data store is not supported")
|
||||
|
||||
self.rows_per_file = fmt["rows_per_file"]
|
||||
self.files_per_dir = fmt["files_per_dir"]
|
||||
self.layout = fmt["layout"]
|
||||
|
||||
# Use rocket to get row size and file size
|
||||
rkt = rocket.Rocket(self.layout, None)
|
||||
self.row_size = rkt.binary_size
|
||||
self.file_size = rkt.binary_size * self.rows_per_file
|
||||
rkt.close()
|
||||
|
||||
# Find nrows
|
||||
self.nrows = self._get_nrows()
|
||||
|
||||
def close(self):
|
||||
self.file_open.cache_remove_all()
|
||||
|
||||
# Internal helpers
|
||||
def _get_nrows(self):
|
||||
"""Find nrows by locating the lexicographically last filename
|
||||
and using its size"""
|
||||
# Note that this just finds a 'nrows' that is guaranteed to be
|
||||
# greater than the row number of any piece of data that
|
||||
# currently exists, not necessarily all data that _ever_
|
||||
# existed.
|
||||
regex = re.compile(b"^[0-9a-f]{4,}$")
|
||||
|
||||
# Find the last directory. We sort and loop through all of them,
|
||||
# starting with the numerically greatest, because the dirs could be
|
||||
# empty if something was deleted but the directory was unexpectedly
|
||||
# not deleted.
|
||||
subdirs = sorted(filter(regex.search, os.listdir(self.root)),
|
||||
key=lambda x: int(x, 16), reverse=True)
|
||||
|
||||
for subdir in subdirs:
|
||||
# Now find the last file in that dir
|
||||
path = os.path.join(self.root, subdir)
|
||||
files = list(filter(regex.search, os.listdir(path)))
|
||||
if not files:
|
||||
# Empty dir: try the next one
|
||||
continue
|
||||
|
||||
# Find the numerical max
|
||||
filename = max(files, key=lambda x: int(x, 16))
|
||||
offset = os.path.getsize(os.path.join(self.root, subdir, filename))
|
||||
|
||||
# Convert to row number
|
||||
return self._row_from_offset(subdir, filename, offset)
|
||||
|
||||
# No files, so no data. We typically start at row 0 in this
|
||||
# case, although initial_nrows is specified during some tests
|
||||
# to exercise other parts of the code better. Since we have
|
||||
# no files yet, round initial_nrows up so it points to a row
|
||||
# that would begin a new file.
|
||||
nrows = ((self.initial_nrows + (self.rows_per_file - 1)) //
|
||||
self.rows_per_file) * self.rows_per_file
|
||||
return nrows
|
||||
|
||||
def _offset_from_row(self, row):
|
||||
"""Return a (subdir, filename, offset, count) tuple:
|
||||
|
||||
subdir: subdirectory for the file
|
||||
filename: the filename that contains the specified row
|
||||
offset: byte offset of the specified row within the file
|
||||
count: number of rows (starting at offset) that fit in the file
|
||||
"""
|
||||
filenum = row // self.rows_per_file
|
||||
# It's OK if these format specifiers are too short; the filenames
|
||||
# will just get longer but will still sort correctly.
|
||||
dirname = sprintf(b"%04x", filenum // self.files_per_dir)
|
||||
filename = sprintf(b"%04x", filenum % self.files_per_dir)
|
||||
offset = (row % self.rows_per_file) * self.row_size
|
||||
count = self.rows_per_file - (row % self.rows_per_file)
|
||||
return (dirname, filename, offset, count)
|
||||
|
||||
def _row_from_offset(self, subdir, filename, offset):
|
||||
"""Return the row number that corresponds to the given
|
||||
'subdir/filename' and byte-offset within that file."""
|
||||
if (offset % self.row_size) != 0:
|
||||
# this shouldn't occur, unless there is some corruption somewhere
|
||||
raise ValueError("file offset is not a multiple of data size")
|
||||
filenum = int(subdir, 16) * self.files_per_dir + int(filename, 16)
|
||||
row = (filenum * self.rows_per_file) + (offset // self.row_size)
|
||||
return row
|
||||
|
||||
def _remove_or_truncate_file(self, subdir, filename, offset=0):
|
||||
"""Remove the given file, and remove the subdirectory too
|
||||
if it's empty. If offset is nonzero, truncate the file
|
||||
to that size instead."""
|
||||
# Close potentially open file in file_open LRU cache
|
||||
self.file_open.cache_remove(self, subdir, filename)
|
||||
if offset:
|
||||
# Truncate it
|
||||
with open(os.path.join(self.root, subdir, filename), "r+b") as f:
|
||||
f.truncate(offset)
|
||||
else:
|
||||
# Remove file
|
||||
os.remove(os.path.join(self.root, subdir, filename))
|
||||
# Try deleting subdir, too
|
||||
try:
|
||||
os.rmdir(os.path.join(self.root, subdir))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Cache open files
|
||||
@nilmdb.utils.lru_cache(size=fd_cache_size,
|
||||
onremove=lambda f: f.close())
|
||||
def file_open(self, subdir, filename):
|
||||
"""Open and map a given 'subdir/filename' (relative to self.root).
|
||||
Will be automatically closed when evicted from the cache."""
|
||||
# Create path if it doesn't exist
|
||||
try:
|
||||
os.mkdir(os.path.join(self.root, subdir))
|
||||
except OSError:
|
||||
pass
|
||||
# Return a rocket.Rocket object, which contains the open file
|
||||
return rocket.Rocket(self.layout,
|
||||
os.path.join(self.root, subdir, filename))
|
||||
|
||||
def append_data(self, data, start, end, binary=False):
|
||||
"""Parse the formatted string in 'data', according to the
|
||||
current layout, and append it to the table. If any timestamps
|
||||
are non-monotonic, or don't fall between 'start' and 'end',
|
||||
a ValueError is raised.
|
||||
|
||||
Note that data is always of 'bytes' type.
|
||||
|
||||
If 'binary' is True, the data should be in raw binary format
|
||||
instead: little-endian, matching the current table's layout,
|
||||
including the int64 timestamp.
|
||||
|
||||
If this function succeeds, it returns normally. Otherwise,
|
||||
the table is reverted back to its original state by truncating
|
||||
or deleting files as necessary."""
|
||||
data_offset = 0
|
||||
last_timestamp = nilmdb.utils.time.min_timestamp
|
||||
tot_rows = self.nrows
|
||||
count = 0
|
||||
linenum = 0
|
||||
try:
|
||||
while data_offset < len(data):
|
||||
# See how many rows we can fit into the current file,
|
||||
# and open it
|
||||
(subdir, fname, offs, count) = self._offset_from_row(tot_rows)
|
||||
f = self.file_open(subdir, fname)
|
||||
|
||||
# Ask the rocket object to parse and append up to "count"
|
||||
# rows of data, verifying things along the way.
|
||||
try:
|
||||
if binary:
|
||||
appender = f.append_binary
|
||||
else:
|
||||
appender = f.append_string
|
||||
(added_rows, data_offset, last_timestamp, linenum
|
||||
) = appender(count, data, data_offset, linenum,
|
||||
start, end, last_timestamp)
|
||||
except rocket.ParseError as e:
|
||||
(linenum, colnum, errtype, obj) = e.args
|
||||
if binary:
|
||||
where = "byte %d: " % (linenum)
|
||||
else:
|
||||
where = "line %d, column %d: " % (linenum, colnum)
|
||||
# Extract out the error line, add column marker
|
||||
try:
|
||||
if binary:
|
||||
raise IndexError
|
||||
bad = data.splitlines()[linenum-1]
|
||||
bad += b'\n' + b' ' * (colnum - 1) + b'^'
|
||||
except IndexError:
|
||||
bad = b""
|
||||
if errtype == rocket.ERR_NON_MONOTONIC:
|
||||
err = "timestamp is not monotonically increasing"
|
||||
elif errtype == rocket.ERR_OUT_OF_INTERVAL:
|
||||
if obj < start:
|
||||
err = sprintf("Data timestamp %s < start time %s",
|
||||
timestamp_to_string(obj),
|
||||
timestamp_to_string(start))
|
||||
else:
|
||||
err = sprintf("Data timestamp %s >= end time %s",
|
||||
timestamp_to_string(obj),
|
||||
timestamp_to_string(end))
|
||||
else:
|
||||
err = str(obj)
|
||||
bad_str = bad.decode('utf-8', errors='backslashreplace')
|
||||
raise ValueError("error parsing input data: " +
|
||||
where + err + "\n" + bad_str)
|
||||
tot_rows += added_rows
|
||||
except Exception:
|
||||
# Some failure, so try to roll things back by truncating or
|
||||
# deleting files that we may have appended data to.
|
||||
cleanpos = self.nrows
|
||||
while cleanpos <= tot_rows:
|
||||
(subdir, fname, offs, count) = self._offset_from_row(cleanpos)
|
||||
self._remove_or_truncate_file(subdir, fname, offs)
|
||||
cleanpos += count
|
||||
# Re-raise original exception
|
||||
raise
|
||||
else:
|
||||
# Success, so update self.nrows accordingly
|
||||
self.nrows = tot_rows
|
||||
|
||||
def get_data(self, start, stop, binary=False):
|
||||
"""Extract data corresponding to Python range [n:m],
|
||||
and returns a formatted string"""
|
||||
if (start is None or stop is None or
|
||||
start > stop or start < 0 or stop > self.nrows):
|
||||
raise IndexError("Index out of range")
|
||||
|
||||
ret = []
|
||||
row = start
|
||||
remaining = stop - start
|
||||
while remaining > 0:
|
||||
(subdir, filename, offset, count) = self._offset_from_row(row)
|
||||
if count > remaining:
|
||||
count = remaining
|
||||
f = self.file_open(subdir, filename)
|
||||
if binary:
|
||||
ret.append(f.extract_binary(offset, count))
|
||||
else:
|
||||
ret.append(f.extract_string(offset, count))
|
||||
remaining -= count
|
||||
row += count
|
||||
return b"".join(ret)
|
||||
|
||||
def __getitem__(self, row):
|
||||
"""Extract timestamps from a row, with table[n] notation."""
|
||||
if row < 0 or row >= self.nrows:
|
||||
raise IndexError("Index out of range")
|
||||
(subdir, filename, offset, count) = self._offset_from_row(row)
|
||||
f = self.file_open(subdir, filename)
|
||||
return f.extract_timestamp(offset)
|
||||
|
||||
def _remove_rows(self, subdir, filename, start, stop):
|
||||
"""Helper to mark specific rows as being removed from a
|
||||
file, and potentially remove or truncate the file itself."""
|
||||
# Close potentially open file in file_open LRU cache
|
||||
self.file_open.cache_remove(self, subdir, filename)
|
||||
|
||||
# We keep a file like 0000.removed that contains a list of
|
||||
# which rows have been "removed". Note that we never have to
|
||||
# remove entries from this list, because we never decrease
|
||||
# self.nrows, and so we will never overwrite those locations in the
|
||||
# file. Only when the list covers the entire extent of the
|
||||
# file will that file be removed.
|
||||
datafile = os.path.join(self.root, subdir, filename)
|
||||
cachefile = datafile + b".removed"
|
||||
try:
|
||||
with open(cachefile, "rb") as f:
|
||||
ranges = pickle.load(f)
|
||||
cachefile_present = True
|
||||
except Exception:
|
||||
ranges = []
|
||||
cachefile_present = False
|
||||
|
||||
# Append our new range and sort
|
||||
ranges.append((start, stop))
|
||||
ranges.sort()
|
||||
|
||||
# Merge adjacent ranges into "out"
|
||||
merged = []
|
||||
prev = None
|
||||
for new in ranges:
|
||||
if prev is None:
|
||||
# No previous range, so remember this one
|
||||
prev = new
|
||||
elif prev[1] == new[0]:
|
||||
# Previous range connected to this new one; extend prev
|
||||
prev = (prev[0], new[1])
|
||||
else:
|
||||
# Not connected; append previous and start again
|
||||
merged.append(prev)
|
||||
prev = new
|
||||
# Last range we were looking at goes into the file. We know
|
||||
# there was at least one (the one we just removed).
|
||||
merged.append(prev)
|
||||
|
||||
# If the range covered the whole file, we can delete it now.
|
||||
# Note that the last file in a table may be only partially
|
||||
# full (smaller than self.rows_per_file). We purposely leave
|
||||
# those files around rather than deleting them, because the
|
||||
# remainder will be filled on a subsequent append(), and things
|
||||
# are generally easier if we don't have to special-case that.
|
||||
if (len(merged) == 1 and
|
||||
merged[0][0] == 0 and merged[0][1] == self.rows_per_file):
|
||||
# Delete files
|
||||
if cachefile_present:
|
||||
os.remove(cachefile)
|
||||
self._remove_or_truncate_file(subdir, filename, 0)
|
||||
else:
|
||||
# File needs to stick around. This means we can get
|
||||
# degenerate cases where we have large files containing as
|
||||
# little as one row. Try to punch a hole in the file,
|
||||
# so that this region doesn't take up filesystem space.
|
||||
offset = start * self.row_size
|
||||
count = (stop - start) * self.row_size
|
||||
nilmdb.utils.fallocate.punch_hole(datafile, offset, count)
|
||||
|
||||
# Update cache. Try to do it atomically.
|
||||
nilmdb.utils.atomic.replace_file(cachefile,
|
||||
pickle.dumps(merged, 2))
|
||||
|
||||
def remove(self, start, stop):
|
||||
"""Remove specified rows [start, stop) from this table.
|
||||
|
||||
If a file is left empty, it is fully removed. Otherwise, a
|
||||
parallel data file is used to remember which rows have been
|
||||
removed, and the file is otherwise untouched."""
|
||||
if start < 0 or start > stop or stop > self.nrows:
|
||||
raise IndexError("Index out of range")
|
||||
|
||||
row = start
|
||||
remaining = stop - start
|
||||
while remaining:
|
||||
# Loop through each file that we need to touch
|
||||
(subdir, filename, offset, count) = self._offset_from_row(row)
|
||||
if count > remaining:
|
||||
count = remaining
|
||||
row_offset = offset // self.row_size
|
||||
# Mark the rows as being removed
|
||||
self._remove_rows(subdir, filename, row_offset, row_offset + count)
|
||||
remaining -= count
|
||||
row += count
|
15
nilmdb/server/errors.py
Normal file
15
nilmdb/server/errors.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
"""Exceptions"""
|
||||
|
||||
|
||||
class NilmDBError(Exception):
|
||||
"""Base exception for NilmDB errors"""
|
||||
def __init__(self, msg="Unspecified error"):
|
||||
super().__init__(msg)
|
||||
|
||||
|
||||
class StreamError(NilmDBError):
|
||||
pass
|
||||
|
||||
|
||||
class OverlapError(NilmDBError):
|
||||
pass
|
329
nilmdb/server/interval.pyx
Normal file
329
nilmdb/server/interval.pyx
Normal file
|
@ -0,0 +1,329 @@
|
|||
# cython: language_level=2
|
||||
|
||||
"""Interval, IntervalSet
|
||||
|
||||
The Interval implemented here is just like
|
||||
nilmdb.utils.interval.Interval, except implemented in Cython for
|
||||
speed.
|
||||
|
||||
Represents an interval of time, and a set of such intervals.
|
||||
|
||||
Intervals are half-open, ie. they include data points with timestamps
|
||||
[start, end)
|
||||
"""
|
||||
|
||||
# First implementation kept a sorted list of intervals and used
|
||||
# biesct() to optimize some operations, but this was too slow.
|
||||
|
||||
# Second version was based on the quicksect implementation from
|
||||
# python-bx, modified slightly to handle floating point intervals.
|
||||
# This didn't support deletion.
|
||||
|
||||
# Third version is more similar to the first version, using a rb-tree
|
||||
# instead of a simple sorted list to maintain O(log n) operations.
|
||||
|
||||
# Fourth version is an optimized rb-tree that stores interval starts
|
||||
# and ends directly in the tree, like bxinterval did.
|
||||
|
||||
from ..utils.time import min_timestamp as nilmdb_min_timestamp
|
||||
from ..utils.time import max_timestamp as nilmdb_max_timestamp
|
||||
from ..utils.time import timestamp_to_string
|
||||
from ..utils.iterator import imerge
|
||||
from ..utils.interval import IntervalError
|
||||
import itertools
|
||||
|
||||
cimport rbtree
|
||||
from libc.stdint cimport uint64_t, int64_t
|
||||
|
||||
ctypedef int64_t timestamp_t
|
||||
|
||||
cdef class Interval:
|
||||
"""Represents an interval of time."""
|
||||
|
||||
cdef public timestamp_t start, end
|
||||
|
||||
def __init__(self, timestamp_t start, timestamp_t end):
|
||||
"""
|
||||
'start' and 'end' are arbitrary numbers that represent time
|
||||
"""
|
||||
if start >= end:
|
||||
# Explicitly disallow zero-width intervals (since they're half-open)
|
||||
raise IntervalError("start %s must precede end %s" % (start, end))
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
def __repr__(self):
|
||||
s = repr(self.start) + ", " + repr(self.end)
|
||||
return self.__class__.__name__ + "(" + s + ")"
|
||||
|
||||
def __str__(self):
|
||||
return ("[" + timestamp_to_string(self.start) +
|
||||
" -> " + timestamp_to_string(self.end) + ")")
|
||||
|
||||
# Compare two intervals. If non-equal, order by start then end
|
||||
def __lt__(self, Interval other):
|
||||
return (self.start, self.end) < (other.start, other.end)
|
||||
def __gt__(self, Interval other):
|
||||
return (self.start, self.end) > (other.start, other.end)
|
||||
def __le__(self, Interval other):
|
||||
return (self.start, self.end) <= (other.start, other.end)
|
||||
def __ge__(self, Interval other):
|
||||
return (self.start, self.end) >= (other.start, other.end)
|
||||
def __eq__(self, Interval other):
|
||||
return (self.start, self.end) == (other.start, other.end)
|
||||
def __ne__(self, Interval other):
|
||||
return (self.start, self.end) != (other.start, other.end)
|
||||
|
||||
cpdef intersects(self, Interval other):
|
||||
"""Return True if two Interval objects intersect"""
|
||||
if (self.end <= other.start or self.start >= other.end):
|
||||
return False
|
||||
return True
|
||||
|
||||
cpdef subset(self, timestamp_t start, timestamp_t end):
|
||||
"""Return a new Interval that is a subset of this one"""
|
||||
# A subclass that tracks additional data might override this.
|
||||
if start < self.start or end > self.end:
|
||||
raise IntervalError("not a subset")
|
||||
return Interval(start, end)
|
||||
|
||||
cdef class DBInterval(Interval):
|
||||
"""
|
||||
Like Interval, but also tracks corresponding start/end times and
|
||||
positions within the database. These are not currently modified
|
||||
when subsets are taken, but can be used later to help zero in on
|
||||
database positions.
|
||||
|
||||
The actual 'start' and 'end' will always fall within the database
|
||||
start and end, e.g.:
|
||||
db_start = 100, db_startpos = 10000
|
||||
start = 123
|
||||
end = 150
|
||||
db_end = 200, db_endpos = 20000
|
||||
"""
|
||||
|
||||
cpdef public timestamp_t db_start, db_end
|
||||
cpdef public uint64_t db_startpos, db_endpos
|
||||
|
||||
def __init__(self, start, end,
|
||||
db_start, db_end,
|
||||
db_startpos, db_endpos):
|
||||
"""
|
||||
'db_start' and 'db_end' are arbitrary numbers that represent
|
||||
time. They must be a strict superset of the time interval
|
||||
covered by 'start' and 'end'. The 'db_startpos' and
|
||||
'db_endpos' are arbitrary database position indicators that
|
||||
correspond to those points.
|
||||
"""
|
||||
Interval.__init__(self, start, end)
|
||||
self.db_start = db_start
|
||||
self.db_end = db_end
|
||||
self.db_startpos = db_startpos
|
||||
self.db_endpos = db_endpos
|
||||
if db_start > start or db_end < end:
|
||||
raise IntervalError("database times must span the interval times")
|
||||
|
||||
def __repr__(self):
|
||||
s = repr(self.start) + ", " + repr(self.end)
|
||||
s += ", " + repr(self.db_start) + ", " + repr(self.db_end)
|
||||
s += ", " + repr(self.db_startpos) + ", " + repr(self.db_endpos)
|
||||
return self.__class__.__name__ + "(" + s + ")"
|
||||
|
||||
cpdef subset(self, timestamp_t start, timestamp_t end):
|
||||
"""
|
||||
Return a new DBInterval that is a subset of this one
|
||||
"""
|
||||
if start < self.start or end > self.end:
|
||||
raise IntervalError("not a subset")
|
||||
return DBInterval(start, end,
|
||||
self.db_start, self.db_end,
|
||||
self.db_startpos, self.db_endpos)
|
||||
|
||||
cdef class IntervalSet:
|
||||
"""
|
||||
A non-intersecting set of intervals.
|
||||
"""
|
||||
|
||||
cdef public rbtree.RBTree tree
|
||||
|
||||
def __init__(self, source=None):
|
||||
"""
|
||||
'source' is an Interval or IntervalSet to add.
|
||||
"""
|
||||
self.tree = rbtree.RBTree()
|
||||
if source is not None:
|
||||
self += source
|
||||
|
||||
def __iter__(self):
|
||||
for node in self.tree:
|
||||
if node.obj:
|
||||
yield node.obj
|
||||
|
||||
def __len__(self):
|
||||
return sum(1 for x in self)
|
||||
|
||||
def __repr__(self):
|
||||
descs = [ repr(x) for x in self ]
|
||||
return self.__class__.__name__ + "([" + ", ".join(descs) + "])"
|
||||
|
||||
def __str__(self):
|
||||
descs = [ str(x) for x in self ]
|
||||
return "[" + ", ".join(descs) + "]"
|
||||
|
||||
def __match__(self, other):
|
||||
# This isn't particularly efficient, but it shouldn't get used in the
|
||||
# general case.
|
||||
"""Test equality of two IntervalSets.
|
||||
|
||||
Treats adjacent Intervals as equivalent to one long interval,
|
||||
so this function really tests whether the IntervalSets cover
|
||||
the same spans of time."""
|
||||
i = 0
|
||||
j = 0
|
||||
outside = True
|
||||
|
||||
def is_adjacent(a, b):
|
||||
"""Return True if two Intervals are adjacent (same end or start)"""
|
||||
if a.end == b.start or b.end == a.start:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
this = list(self)
|
||||
that = list(other)
|
||||
|
||||
try:
|
||||
while True:
|
||||
if (outside):
|
||||
# To match, we need to be finished both sets
|
||||
if (i >= len(this) and j >= len(that)):
|
||||
return True
|
||||
# Or the starts need to match
|
||||
if (this[i].start != that[j].start):
|
||||
return False
|
||||
outside = False
|
||||
else:
|
||||
# We can move on if the two interval ends match
|
||||
if (this[i].end == that[j].end):
|
||||
i += 1
|
||||
j += 1
|
||||
outside = True
|
||||
else:
|
||||
# Whichever ends first needs to be adjacent to the next
|
||||
if (this[i].end < that[j].end):
|
||||
if (not is_adjacent(this[i],this[i+1])):
|
||||
return False
|
||||
i += 1
|
||||
else:
|
||||
if (not is_adjacent(that[j],that[j+1])):
|
||||
return False
|
||||
j += 1
|
||||
except IndexError:
|
||||
return False
|
||||
|
||||
# Use __richcmp__ instead of __eq__, __ne__ for Cython.
|
||||
def __richcmp__(self, other, int op):
|
||||
if op == 2: # ==
|
||||
return self.__match__(other)
|
||||
elif op == 3: # !=
|
||||
return not self.__match__(other)
|
||||
return False
|
||||
#def __eq__(self, other):
|
||||
# return self.__match__(other)
|
||||
#
|
||||
#def __ne__(self, other):
|
||||
# return not self.__match__(other)
|
||||
|
||||
def __iadd__(self, object other not None):
|
||||
"""Inplace add -- modifies self
|
||||
|
||||
This throws an exception if the regions being added intersect."""
|
||||
if isinstance(other, Interval):
|
||||
if self.intersects(other):
|
||||
raise IntervalError("Tried to add overlapping interval "
|
||||
"to this set")
|
||||
self.tree.insert(rbtree.RBNode(other.start, other.end, other))
|
||||
else:
|
||||
for x in other:
|
||||
self.__iadd__(x)
|
||||
return self
|
||||
|
||||
def iadd_nocheck(self, Interval other not None):
|
||||
"""Inplace add -- modifies self.
|
||||
'Optimized' version that doesn't check for intersection and
|
||||
only inserts the new interval into the tree."""
|
||||
self.tree.insert(rbtree.RBNode(other.start, other.end, other))
|
||||
|
||||
def __isub__(self, Interval other not None):
|
||||
"""Inplace subtract -- modifies self
|
||||
|
||||
Removes an interval from the set. Must exist exactly
|
||||
as provided -- cannot remove a subset of an existing interval."""
|
||||
i = self.tree.find(other.start, other.end)
|
||||
if i is None:
|
||||
raise IntervalError("interval " + str(other) + " not in tree")
|
||||
self.tree.delete(i)
|
||||
return self
|
||||
|
||||
def __add__(self, other not None):
|
||||
"""Add -- returns a new object"""
|
||||
new = IntervalSet(self)
|
||||
new += IntervalSet(other)
|
||||
return new
|
||||
|
||||
def __and__(self, other not None):
|
||||
"""
|
||||
Compute a new IntervalSet from the intersection of this
|
||||
IntervalSet with one other interval.
|
||||
|
||||
Output intervals are built as subsets of the intervals in the
|
||||
first argument (self).
|
||||
"""
|
||||
out = IntervalSet()
|
||||
for i in self.intersection(other):
|
||||
out.tree.insert(rbtree.RBNode(i.start, i.end, i))
|
||||
return out
|
||||
|
||||
def intersection(self, Interval interval not None, orig = False):
|
||||
"""
|
||||
Compute a sequence of intervals that correspond to the
|
||||
intersection between `self` and the provided interval.
|
||||
Returns a generator that yields each of these intervals
|
||||
in turn.
|
||||
|
||||
Output intervals are built as subsets of the intervals in the
|
||||
first argument (self).
|
||||
|
||||
If orig = True, also return the original interval that was
|
||||
(potentially) subsetted to make the one that is being
|
||||
returned.
|
||||
"""
|
||||
if orig:
|
||||
for n in self.tree.intersect(interval.start, interval.end):
|
||||
i = n.obj
|
||||
subset = i.subset(max(i.start, interval.start),
|
||||
min(i.end, interval.end))
|
||||
yield (subset, i)
|
||||
else:
|
||||
for n in self.tree.intersect(interval.start, interval.end):
|
||||
i = n.obj
|
||||
subset = i.subset(max(i.start, interval.start),
|
||||
min(i.end, interval.end))
|
||||
yield subset
|
||||
|
||||
cpdef intersects(self, Interval other):
|
||||
"""Return True if this IntervalSet intersects another interval"""
|
||||
for n in self.tree.intersect(other.start, other.end):
|
||||
if n.obj.intersects(other):
|
||||
return True
|
||||
return False
|
||||
|
||||
def find_end(self, timestamp_t t):
|
||||
"""
|
||||
Return an Interval from this tree that ends at time t, or
|
||||
None if it doesn't exist.
|
||||
"""
|
||||
n = self.tree.find_left_end(t)
|
||||
if n and n.obj.end == t:
|
||||
return n.obj
|
||||
return None
|
1
nilmdb/server/interval.pyxdep
Normal file
1
nilmdb/server/interval.pyxdep
Normal file
|
@ -0,0 +1 @@
|
|||
rbtree.pxd
|
717
nilmdb/server/nilmdb.py
Normal file
717
nilmdb/server/nilmdb.py
Normal file
|
@ -0,0 +1,717 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""NilmDB
|
||||
|
||||
Object that represents a NILM database file.
|
||||
|
||||
Manages both the SQL database and the table storage backend.
|
||||
"""
|
||||
|
||||
import os
|
||||
import errno
|
||||
import sqlite3
|
||||
|
||||
import nilmdb.utils
|
||||
from nilmdb.utils.printf import printf
|
||||
from nilmdb.utils.time import timestamp_to_bytes
|
||||
|
||||
from nilmdb.utils.interval import IntervalError
|
||||
from nilmdb.server.interval import Interval, DBInterval, IntervalSet
|
||||
|
||||
from nilmdb.server import bulkdata
|
||||
from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
|
||||
|
||||
# Note about performance and transactions:
|
||||
#
|
||||
# Committing a transaction in the default sync mode (PRAGMA synchronous=FULL)
|
||||
# takes about 125msec. sqlite3 will commit transactions at 3 times:
|
||||
# 1: explicit con.commit()
|
||||
# 2: between a series of DML commands and non-DML commands, e.g.
|
||||
# after a series of INSERT, SELECT, but before a CREATE TABLE or PRAGMA.
|
||||
# 3: at the end of an explicit transaction, e.g. "with self.con as con:"
|
||||
#
|
||||
# To speed things up, we can set 'PRAGMA synchronous=OFF'. Or, it
|
||||
# seems that 'PRAGMA synchronous=NORMAL' and 'PRAGMA journal_mode=WAL'
|
||||
# give an equivalent speedup more safely. That is what is used here.
|
||||
_sql_schema_updates = {
|
||||
0: {"next": 1, "sql": """
|
||||
-- All streams
|
||||
CREATE TABLE streams(
|
||||
id INTEGER PRIMARY KEY, -- stream ID
|
||||
path TEXT UNIQUE NOT NULL, -- path, e.g. '/newton/prep'
|
||||
layout TEXT NOT NULL -- layout name, e.g. float32_8
|
||||
);
|
||||
|
||||
-- Individual timestamped ranges in those streams.
|
||||
-- For a given start_time and end_time, this tells us that the
|
||||
-- data is stored between start_pos and end_pos.
|
||||
-- Times are stored as μs since Unix epoch
|
||||
-- Positions are opaque: PyTables rows, file offsets, etc.
|
||||
--
|
||||
-- Note: end_pos points to the row _after_ end_time, so end_pos-1
|
||||
-- is the last valid row.
|
||||
CREATE TABLE ranges(
|
||||
stream_id INTEGER NOT NULL,
|
||||
start_time INTEGER NOT NULL,
|
||||
end_time INTEGER NOT NULL,
|
||||
start_pos INTEGER NOT NULL,
|
||||
end_pos INTEGER NOT NULL
|
||||
);
|
||||
CREATE INDEX _ranges_index ON ranges (stream_id, start_time, end_time);
|
||||
"""},
|
||||
|
||||
1: {"next": 3, "sql": """
|
||||
-- Generic dictionary-type metadata that can be associated with a stream
|
||||
CREATE TABLE metadata(
|
||||
stream_id INTEGER NOT NULL,
|
||||
key TEXT NOT NULL,
|
||||
value TEXT
|
||||
);
|
||||
"""},
|
||||
|
||||
2: {"error": "old format with floating-point timestamps requires "
|
||||
"nilmdb 1.3.1 or older"},
|
||||
|
||||
3: {"next": None},
|
||||
}
|
||||
|
||||
|
||||
@nilmdb.utils.must_close()
|
||||
class NilmDB():
|
||||
verbose = 0
|
||||
|
||||
def __init__(self, basepath,
|
||||
max_results=None,
|
||||
max_removals=None,
|
||||
max_int_removals=None,
|
||||
bulkdata_args=None):
|
||||
"""Initialize NilmDB at the given basepath.
|
||||
Other arguments are for debugging / testing:
|
||||
|
||||
'max_results' is the max rows to send in a single
|
||||
stream_intervals or stream_extract response.
|
||||
|
||||
'max_removals' is the max rows to delete at once
|
||||
in stream_remove.
|
||||
|
||||
'max_int_removals' is the max intervals to delete
|
||||
at once in stream_remove.
|
||||
|
||||
'bulkdata_args' is kwargs for the bulkdata module.
|
||||
"""
|
||||
if bulkdata_args is None:
|
||||
bulkdata_args = {}
|
||||
|
||||
# set up path
|
||||
self.basepath = os.path.abspath(basepath)
|
||||
|
||||
# Create the database path if it doesn't exist
|
||||
try:
|
||||
os.makedirs(self.basepath)
|
||||
except OSError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise IOError("can't create tree " + self.basepath)
|
||||
|
||||
# Our data goes inside it
|
||||
self.data = bulkdata.BulkData(self.basepath, **bulkdata_args)
|
||||
|
||||
# SQLite database too
|
||||
sqlfilename = os.path.join(self.basepath, "data.sql")
|
||||
self.con = sqlite3.connect(sqlfilename, check_same_thread=True)
|
||||
try:
|
||||
self._sql_schema_update()
|
||||
except Exception:
|
||||
self.data.close()
|
||||
raise
|
||||
|
||||
# See big comment at top about the performance implications of this
|
||||
self.con.execute("PRAGMA synchronous=NORMAL")
|
||||
self.con.execute("PRAGMA journal_mode=WAL")
|
||||
|
||||
# Approximate largest number of elements that we want to send
|
||||
# in a single reply (for stream_intervals, stream_extract).
|
||||
self.max_results = max_results or 16384
|
||||
|
||||
# Remove up to this many rows per call to stream_remove.
|
||||
self.max_removals = max_removals or 1048576
|
||||
|
||||
# Remove up to this many intervals per call to stream_remove.
|
||||
self.max_int_removals = max_int_removals or 4096
|
||||
|
||||
def get_basepath(self):
|
||||
return self.basepath
|
||||
|
||||
def close(self):
|
||||
if self.con:
|
||||
self.con.commit()
|
||||
self.con.close()
|
||||
self.con = None
|
||||
self.data.close()
|
||||
|
||||
def _sql_schema_update(self):
|
||||
cur = self.con.cursor()
|
||||
version = cur.execute("PRAGMA user_version").fetchone()[0]
|
||||
oldversion = version
|
||||
|
||||
while True:
|
||||
if version not in _sql_schema_updates:
|
||||
raise Exception(self.basepath + ": unknown database version "
|
||||
+ str(version))
|
||||
update = _sql_schema_updates[version]
|
||||
if "error" in update:
|
||||
raise Exception(self.basepath + ": can't use database version "
|
||||
+ str(version) + ": " + update["error"])
|
||||
if update["next"] is None:
|
||||
break
|
||||
cur.executescript(update["sql"])
|
||||
version = update["next"]
|
||||
if self.verbose:
|
||||
printf("Database schema updated to %d\n", version)
|
||||
|
||||
if version != oldversion:
|
||||
with self.con:
|
||||
cur.execute("PRAGMA user_version = {v:d}".format(v=version))
|
||||
|
||||
def _check_user_times(self, start, end):
|
||||
if start is None:
|
||||
start = nilmdb.utils.time.min_timestamp
|
||||
if end is None:
|
||||
end = nilmdb.utils.time.max_timestamp
|
||||
if start >= end:
|
||||
raise NilmDBError("start must precede end")
|
||||
return (start, end)
|
||||
|
||||
@nilmdb.utils.lru_cache(size=64)
|
||||
def _get_intervals(self, stream_id):
|
||||
"""
|
||||
Return a mutable IntervalSet corresponding to the given stream ID.
|
||||
"""
|
||||
iset = IntervalSet()
|
||||
result = self.con.execute("SELECT start_time, end_time, "
|
||||
"start_pos, end_pos "
|
||||
"FROM ranges "
|
||||
"WHERE stream_id=?", (stream_id,))
|
||||
try:
|
||||
for (start_time, end_time, start_pos, end_pos) in result:
|
||||
iset += DBInterval(start_time, end_time,
|
||||
start_time, end_time,
|
||||
start_pos, end_pos)
|
||||
except IntervalError:
|
||||
raise NilmDBError("unexpected overlap in ranges table!")
|
||||
|
||||
return iset
|
||||
|
||||
def _sql_interval_insert(self, id, start, end, start_pos, end_pos):
|
||||
"""Helper that adds interval to the SQL database only"""
|
||||
self.con.execute("INSERT INTO ranges "
|
||||
"(stream_id,start_time,end_time,start_pos,end_pos) "
|
||||
"VALUES (?,?,?,?,?)",
|
||||
(id, start, end, start_pos, end_pos))
|
||||
|
||||
def _sql_interval_delete(self, id, start, end, start_pos, end_pos):
|
||||
"""Helper that removes interval from the SQL database only"""
|
||||
self.con.execute("DELETE FROM ranges WHERE "
|
||||
"stream_id=? AND start_time=? AND "
|
||||
"end_time=? AND start_pos=? AND end_pos=?",
|
||||
(id, start, end, start_pos, end_pos))
|
||||
|
||||
def _add_interval(self, stream_id, interval, start_pos, end_pos):
|
||||
"""
|
||||
Add interval to the internal interval cache, and to the database.
|
||||
Note: arguments must be ints (not numpy.int64, etc)
|
||||
"""
|
||||
# Load this stream's intervals
|
||||
iset = self._get_intervals(stream_id)
|
||||
|
||||
# Check for adjacency. If there's a stream in the database
|
||||
# that ends exactly when this one starts, and the database
|
||||
# rows match up, we can make one interval that covers the
|
||||
# time range [adjacent.start -> interval.end)
|
||||
# and database rows [ adjacent.start_pos -> end_pos ].
|
||||
# Only do this if the resulting interval isn't too large.
|
||||
max_merged_rows = 8000 * 60 * 60 * 1.05 # 1.05 hours at 8 KHz
|
||||
adjacent = iset.find_end(interval.start)
|
||||
if (adjacent is not None and
|
||||
start_pos == adjacent.db_endpos and
|
||||
(end_pos - adjacent.db_startpos) < max_merged_rows):
|
||||
# First delete the old one, both from our iset and the
|
||||
# database
|
||||
iset -= adjacent
|
||||
self._sql_interval_delete(stream_id,
|
||||
adjacent.db_start, adjacent.db_end,
|
||||
adjacent.db_startpos, adjacent.db_endpos)
|
||||
|
||||
# Now update our interval so the fallthrough add is
|
||||
# correct.
|
||||
interval.start = adjacent.start
|
||||
start_pos = adjacent.db_startpos
|
||||
|
||||
# Add the new interval to the iset
|
||||
iset.iadd_nocheck(DBInterval(interval.start, interval.end,
|
||||
interval.start, interval.end,
|
||||
start_pos, end_pos))
|
||||
|
||||
# Insert into the database
|
||||
self._sql_interval_insert(stream_id, interval.start, interval.end,
|
||||
int(start_pos), int(end_pos))
|
||||
|
||||
self.con.commit()
|
||||
|
||||
def _remove_interval(self, stream_id, original, remove):
|
||||
"""
|
||||
Remove an interval from the internal cache and the database.
|
||||
|
||||
stream_id: id of stream
|
||||
original: original DBInterval; must be already present in DB
|
||||
to_remove: DBInterval to remove; must be subset of 'original'
|
||||
"""
|
||||
# Load this stream's intervals
|
||||
iset = self._get_intervals(stream_id)
|
||||
|
||||
# Remove existing interval from the cached set and the database
|
||||
iset -= original
|
||||
self._sql_interval_delete(stream_id,
|
||||
original.db_start, original.db_end,
|
||||
original.db_startpos, original.db_endpos)
|
||||
|
||||
# Add back the intervals that would be left over if the
|
||||
# requested interval is removed. There may be two of them, if
|
||||
# the removed piece was in the middle.
|
||||
def add(iset, start, end, start_pos, end_pos):
|
||||
iset += DBInterval(start, end, start, end, start_pos, end_pos)
|
||||
self._sql_interval_insert(stream_id, start, end,
|
||||
start_pos, end_pos)
|
||||
|
||||
if original.start != remove.start:
|
||||
# Interval before the removed region
|
||||
add(iset, original.start, remove.start,
|
||||
original.db_startpos, remove.db_startpos)
|
||||
|
||||
if original.end != remove.end:
|
||||
# Interval after the removed region
|
||||
add(iset, remove.end, original.end,
|
||||
remove.db_endpos, original.db_endpos)
|
||||
|
||||
# Commit SQL changes
|
||||
self.con.commit()
|
||||
|
||||
return
|
||||
|
||||
def stream_list(self, path=None, layout=None, extended=False):
|
||||
"""Return list of lists of all streams in the database.
|
||||
|
||||
If path is specified, include only streams with a path that
|
||||
matches the given string.
|
||||
|
||||
If layout is specified, include only streams with a layout
|
||||
that matches the given string.
|
||||
|
||||
If extended=False, returns a list of lists containing
|
||||
the path and layout: [ path, layout ]
|
||||
|
||||
If extended=True, returns a list of lists containing
|
||||
more information:
|
||||
path
|
||||
layout
|
||||
interval_min (earliest interval start)
|
||||
interval_max (latest interval end)
|
||||
rows (total number of rows of data)
|
||||
time (total time covered by this stream, in timestamp units)
|
||||
"""
|
||||
params = ()
|
||||
query = "SELECT streams.path, streams.layout"
|
||||
if extended:
|
||||
query += ", min(ranges.start_time), max(ranges.end_time) "
|
||||
query += ", coalesce(sum(ranges.end_pos - ranges.start_pos), 0) "
|
||||
query += ", coalesce(sum(ranges.end_time - ranges.start_time), 0) "
|
||||
query += " FROM streams"
|
||||
if extended:
|
||||
query += " LEFT JOIN ranges ON streams.id = ranges.stream_id"
|
||||
query += " WHERE 1=1"
|
||||
if layout is not None:
|
||||
query += " AND streams.layout=?"
|
||||
params += (layout,)
|
||||
if path is not None:
|
||||
query += " AND streams.path=?"
|
||||
params += (path,)
|
||||
query += " GROUP BY streams.id ORDER BY streams.path"
|
||||
result = self.con.execute(query, params).fetchall()
|
||||
return [list(x) for x in result]
|
||||
|
||||
def stream_intervals(self, path, start=None, end=None, diffpath=None):
|
||||
"""
|
||||
List all intervals in 'path' between 'start' and 'end'. If
|
||||
'diffpath' is not none, list instead the set-difference
|
||||
between the intervals in the two streams; i.e. all interval
|
||||
ranges that are present in 'path' but not 'diffpath'.
|
||||
|
||||
Returns (intervals, restart) tuple.
|
||||
|
||||
'intervals' is a list of [start,end] timestamps of all intervals
|
||||
that exist for path, between start and end.
|
||||
|
||||
'restart', if not None, means that there were too many results
|
||||
to return in a single request. The data is complete from the
|
||||
starting timestamp to the point at which it was truncated, and
|
||||
a new request with a start time of 'restart' will fetch the
|
||||
next block of data.
|
||||
"""
|
||||
stream_id = self._stream_id(path)
|
||||
intervals = self._get_intervals(stream_id)
|
||||
if diffpath:
|
||||
diffstream_id = self._stream_id(diffpath)
|
||||
diffintervals = self._get_intervals(diffstream_id)
|
||||
(start, end) = self._check_user_times(start, end)
|
||||
requested = Interval(start, end)
|
||||
result = []
|
||||
if diffpath:
|
||||
getter = nilmdb.utils.interval.set_difference(
|
||||
intervals.intersection(requested),
|
||||
diffintervals.intersection(requested))
|
||||
else:
|
||||
getter = intervals.intersection(requested)
|
||||
for n, i in enumerate(getter):
|
||||
if n >= self.max_results:
|
||||
restart = i.start
|
||||
break
|
||||
result.append([i.start, i.end])
|
||||
else:
|
||||
restart = None
|
||||
return (result, restart)
|
||||
|
||||
def stream_create(self, path, layout_name):
|
||||
"""Create a new table in the database.
|
||||
|
||||
path: path to the data (e.g. '/newton/prep').
|
||||
Paths must contain at least two elements, e.g.:
|
||||
/newton/prep
|
||||
/newton/raw
|
||||
/newton/upstairs/prep
|
||||
/newton/upstairs/raw
|
||||
|
||||
layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8'
|
||||
"""
|
||||
# Create the bulk storage. Raises ValueError on error, which we
|
||||
# pass along.
|
||||
self.data.create(path, layout_name)
|
||||
|
||||
# Insert into SQL database once the bulk storage is happy
|
||||
with self.con as con:
|
||||
con.execute("INSERT INTO streams (path, layout) VALUES (?,?)",
|
||||
(path, layout_name))
|
||||
|
||||
def _stream_id(self, path):
|
||||
"""Return unique stream ID"""
|
||||
result = self.con.execute("SELECT id FROM streams WHERE path=?",
|
||||
(path,)).fetchone()
|
||||
if result is None:
|
||||
raise StreamError("No stream at path " + path)
|
||||
return result[0]
|
||||
|
||||
def stream_set_metadata(self, path, data):
|
||||
"""Set stream metadata from a dictionary, e.g.
|
||||
{ description: 'Downstairs lighting',
|
||||
v_scaling: 123.45 }
|
||||
This replaces all existing metadata.
|
||||
"""
|
||||
stream_id = self._stream_id(path)
|
||||
with self.con as con:
|
||||
con.execute("DELETE FROM metadata WHERE stream_id=?", (stream_id,))
|
||||
for key in data:
|
||||
if data[key] != '':
|
||||
con.execute("INSERT INTO metadata VALUES (?, ?, ?)",
|
||||
(stream_id, key, data[key]))
|
||||
|
||||
def stream_get_metadata(self, path):
|
||||
"""Return stream metadata as a dictionary."""
|
||||
stream_id = self._stream_id(path)
|
||||
result = self.con.execute("SELECT metadata.key, metadata.value "
|
||||
"FROM metadata "
|
||||
"WHERE metadata.stream_id=?", (stream_id,))
|
||||
data = {}
|
||||
for (key, value) in result:
|
||||
data[key] = value
|
||||
return data
|
||||
|
||||
def stream_update_metadata(self, path, newdata):
|
||||
"""Update stream metadata from a dictionary"""
|
||||
data = self.stream_get_metadata(path)
|
||||
data.update(newdata)
|
||||
self.stream_set_metadata(path, data)
|
||||
|
||||
def stream_rename(self, oldpath, newpath):
|
||||
"""Rename a stream."""
|
||||
stream_id = self._stream_id(oldpath)
|
||||
|
||||
# Rename the data
|
||||
self.data.rename(oldpath, newpath)
|
||||
|
||||
# Rename the stream in the database
|
||||
with self.con as con:
|
||||
con.execute("UPDATE streams SET path=? WHERE id=?",
|
||||
(newpath, stream_id))
|
||||
|
||||
def stream_destroy(self, path):
|
||||
"""Fully remove a table from the database. Fails if there are
|
||||
any intervals data present; remove them first. Metadata is
|
||||
also removed."""
|
||||
stream_id = self._stream_id(path)
|
||||
|
||||
# Verify that no intervals are present, and clear the cache
|
||||
iset = self._get_intervals(stream_id)
|
||||
if iset:
|
||||
raise NilmDBError("all intervals must be removed before "
|
||||
"destroying a stream")
|
||||
self._get_intervals.cache_remove(self, stream_id)
|
||||
|
||||
# Delete the bulkdata storage
|
||||
self.data.destroy(path)
|
||||
|
||||
# Delete metadata, stream, intervals (should be none)
|
||||
with self.con as con:
|
||||
con.execute("DELETE FROM metadata WHERE stream_id=?", (stream_id,))
|
||||
con.execute("DELETE FROM ranges WHERE stream_id=?", (stream_id,))
|
||||
con.execute("DELETE FROM streams WHERE id=?", (stream_id,))
|
||||
|
||||
def stream_insert(self, path, start, end, data, binary=False):
|
||||
"""Insert new data into the database.
|
||||
path: Path at which to add the data
|
||||
start: Starting timestamp
|
||||
end: Ending timestamp
|
||||
data: Textual data, formatted according to the layout of path
|
||||
|
||||
'binary', if True, means that 'data' is raw binary:
|
||||
little-endian, matching the current table's layout,
|
||||
including the int64 timestamp.
|
||||
"""
|
||||
# First check for basic overlap using timestamp info given.
|
||||
stream_id = self._stream_id(path)
|
||||
iset = self._get_intervals(stream_id)
|
||||
interval = Interval(start, end)
|
||||
if iset.intersects(interval):
|
||||
raise OverlapError("new data overlaps existing data at range: "
|
||||
+ str(iset & interval))
|
||||
|
||||
# Tenatively append the data. This will raise a ValueError if
|
||||
# there are any parse errors.
|
||||
table = self.data.getnode(path)
|
||||
row_start = table.nrows
|
||||
table.append_data(data, start, end, binary)
|
||||
row_end = table.nrows
|
||||
|
||||
# Insert the record into the sql database.
|
||||
self._add_interval(stream_id, interval, row_start, row_end)
|
||||
|
||||
# And that's all
|
||||
return
|
||||
|
||||
def _bisect_left(self, a, x, lo, hi):
|
||||
# Like bisect.bisect_left, but doesn't choke on large indices on
|
||||
# 32-bit systems, like bisect's fast C implementation does.
|
||||
while lo < hi:
|
||||
mid = (lo + hi) // 2
|
||||
if a[mid] < x:
|
||||
lo = mid + 1
|
||||
else:
|
||||
hi = mid
|
||||
return lo
|
||||
|
||||
def _find_start(self, table, dbinterval):
|
||||
"""
|
||||
Given a DBInterval, find the row in the database that
|
||||
corresponds to the start time. Return the first database
|
||||
position with a timestamp (first element) greater than or
|
||||
equal to 'start'.
|
||||
"""
|
||||
# Optimization for the common case where an interval wasn't truncated
|
||||
if dbinterval.start == dbinterval.db_start:
|
||||
return dbinterval.db_startpos
|
||||
return self._bisect_left(table,
|
||||
dbinterval.start,
|
||||
dbinterval.db_startpos,
|
||||
dbinterval.db_endpos)
|
||||
|
||||
def _find_end(self, table, dbinterval):
|
||||
"""
|
||||
Given a DBInterval, find the row in the database that follows
|
||||
the end time. Return the first database position after the
|
||||
row with timestamp (first element) greater than or equal
|
||||
to 'end'.
|
||||
"""
|
||||
# Optimization for the common case where an interval wasn't truncated
|
||||
if dbinterval.end == dbinterval.db_end:
|
||||
return dbinterval.db_endpos
|
||||
# Note that we still use bisect_left here, because we don't
|
||||
# want to include the given timestamp in the results. This is
|
||||
# so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return
|
||||
# non-overlapping data.
|
||||
return self._bisect_left(table,
|
||||
dbinterval.end,
|
||||
dbinterval.db_startpos,
|
||||
dbinterval.db_endpos)
|
||||
|
||||
def stream_extract(self, path, start=None, end=None,
|
||||
count=False, markup=False, binary=False):
|
||||
"""
|
||||
Returns (data, restart) tuple.
|
||||
|
||||
'data' is ASCII-formatted data from the database, formatted
|
||||
according to the layout of the stream.
|
||||
|
||||
'restart', if not None, means that there were too many results to
|
||||
return in a single request. The data is complete from the
|
||||
starting timestamp to the point at which it was truncated,
|
||||
and a new request with a start time of 'restart' will fetch
|
||||
the next block of data.
|
||||
|
||||
'count', if true, means to not return raw data, but just the count
|
||||
of rows that would have been returned. This is much faster
|
||||
than actually fetching the data. It is not limited by
|
||||
max_results.
|
||||
|
||||
'markup', if true, indicates that returned data should be
|
||||
marked with a comment denoting when a particular interval
|
||||
starts, and another comment when an interval ends.
|
||||
|
||||
'binary', if true, means to return raw binary rather than
|
||||
ASCII-formatted data.
|
||||
"""
|
||||
stream_id = self._stream_id(path)
|
||||
table = self.data.getnode(path)
|
||||
intervals = self._get_intervals(stream_id)
|
||||
(start, end) = self._check_user_times(start, end)
|
||||
requested = Interval(start, end)
|
||||
result = []
|
||||
matched = 0
|
||||
remaining = self.max_results
|
||||
restart = None
|
||||
if binary and (markup or count):
|
||||
raise NilmDBError("binary mode can't be used with markup or count")
|
||||
for interval in intervals.intersection(requested):
|
||||
# Reading single rows from the table is too slow, so
|
||||
# we use two bisections to find both the starting and
|
||||
# ending row for this particular interval, then
|
||||
# read the entire range as one slice.
|
||||
row_start = self._find_start(table, interval)
|
||||
row_end = self._find_end(table, interval)
|
||||
|
||||
if count:
|
||||
matched += row_end - row_start
|
||||
continue
|
||||
|
||||
# Shorten it if we'll hit the maximum number of results
|
||||
row_max = row_start + remaining
|
||||
if row_max < row_end:
|
||||
row_end = row_max
|
||||
restart = table[row_max]
|
||||
|
||||
# Add markup
|
||||
if markup:
|
||||
result.append(b"# interval-start " +
|
||||
timestamp_to_bytes(interval.start) + b"\n")
|
||||
|
||||
# Gather these results up
|
||||
result.append(table.get_data(row_start, row_end, binary))
|
||||
|
||||
# Count them
|
||||
remaining -= row_end - row_start
|
||||
|
||||
# Add markup, and exit if restart is set.
|
||||
if restart is not None:
|
||||
if markup:
|
||||
result.append(b"# interval-end " +
|
||||
timestamp_to_bytes(restart) + b"\n")
|
||||
break
|
||||
if markup:
|
||||
result.append(b"# interval-end " +
|
||||
timestamp_to_bytes(interval.end) + b"\n")
|
||||
|
||||
if count:
|
||||
return matched
|
||||
full_result = b"".join(result)
|
||||
return (full_result, restart)
|
||||
|
||||
def stream_remove(self, path, start=None, end=None):
|
||||
"""
|
||||
Remove data from the specified time interval within a stream.
|
||||
|
||||
Removes data in the interval [start, end), and intervals are
|
||||
truncated or split appropriately.
|
||||
|
||||
Returns a (removed, restart) tuple.
|
||||
|
||||
'removed' is the number of data points that were removed.
|
||||
|
||||
'restart', if not None, means there were too many rows to
|
||||
remove in a single request. This function should be called
|
||||
again with a start time of 'restart' to complete the removal.
|
||||
"""
|
||||
stream_id = self._stream_id(path)
|
||||
table = self.data.getnode(path)
|
||||
intervals = self._get_intervals(stream_id)
|
||||
(start, end) = self._check_user_times(start, end)
|
||||
to_remove = Interval(start, end)
|
||||
removed = 0
|
||||
remaining = self.max_removals
|
||||
int_remaining = self.max_int_removals
|
||||
restart = None
|
||||
|
||||
# Can't remove intervals from within the iterator, so we need to
|
||||
# remember what's currently in the intersection now.
|
||||
all_candidates = list(intervals.intersection(to_remove, orig=True))
|
||||
|
||||
remove_start = None
|
||||
remove_end = None
|
||||
|
||||
for (dbint, orig) in all_candidates:
|
||||
# Stop if we've hit the max number of interval removals
|
||||
if int_remaining <= 0:
|
||||
restart = dbint.start
|
||||
break
|
||||
|
||||
# Find row start and end
|
||||
row_start = self._find_start(table, dbint)
|
||||
row_end = self._find_end(table, dbint)
|
||||
|
||||
# Shorten it if we'll hit the maximum number of removals
|
||||
row_max = row_start + remaining
|
||||
if row_max < row_end:
|
||||
row_end = row_max
|
||||
dbint.end = table[row_max]
|
||||
restart = dbint.end
|
||||
|
||||
# Adjust the DBInterval to match the newly found ends
|
||||
dbint.db_start = dbint.start
|
||||
dbint.db_end = dbint.end
|
||||
dbint.db_startpos = row_start
|
||||
dbint.db_endpos = row_end
|
||||
|
||||
# Remove interval from the database
|
||||
self._remove_interval(stream_id, orig, dbint)
|
||||
|
||||
# Remove data from the underlying table storage,
|
||||
# coalescing adjacent removals to reduce the number of calls
|
||||
# to table.remove.
|
||||
if remove_end == row_start:
|
||||
# Extend our coalesced region
|
||||
remove_end = row_end
|
||||
else:
|
||||
# Perform previous removal, then save this one
|
||||
if remove_end is not None:
|
||||
table.remove(remove_start, remove_end)
|
||||
remove_start = row_start
|
||||
remove_end = row_end
|
||||
|
||||
# Count how many were removed
|
||||
removed += row_end - row_start
|
||||
remaining -= row_end - row_start
|
||||
int_remaining -= 1
|
||||
|
||||
if restart is not None:
|
||||
break
|
||||
|
||||
# Perform any final coalesced removal
|
||||
if remove_end is not None:
|
||||
table.remove(remove_start, remove_end)
|
||||
|
||||
return (removed, restart)
|
25
nilmdb/server/rbtree.pxd
Normal file
25
nilmdb/server/rbtree.pxd
Normal file
|
@ -0,0 +1,25 @@
|
|||
# cython: language_level=2
|
||||
|
||||
cdef class RBNode:
|
||||
cdef public object obj
|
||||
cdef public double start, end
|
||||
cdef public int red
|
||||
cdef public RBNode left, right, parent
|
||||
|
||||
cdef class RBTree:
|
||||
cdef public RBNode nil, root
|
||||
|
||||
cpdef getroot(RBTree self)
|
||||
cdef void __rotate_left(RBTree self, RBNode x)
|
||||
cdef void __rotate_right(RBTree self, RBNode y)
|
||||
cdef RBNode __successor(RBTree self, RBNode x)
|
||||
cpdef RBNode successor(RBTree self, RBNode x)
|
||||
cdef RBNode __predecessor(RBTree self, RBNode x)
|
||||
cpdef RBNode predecessor(RBTree self, RBNode x)
|
||||
cpdef insert(RBTree self, RBNode z)
|
||||
cdef void __insert_fixup(RBTree self, RBNode x)
|
||||
cpdef delete(RBTree self, RBNode z)
|
||||
cdef inline void __delete_fixup(RBTree self, RBNode x)
|
||||
cpdef RBNode find(RBTree self, double start, double end)
|
||||
cpdef RBNode find_left_end(RBTree self, double t)
|
||||
cpdef RBNode find_right_start(RBTree self, double t)
|
378
nilmdb/server/rbtree.pyx
Normal file
378
nilmdb/server/rbtree.pyx
Normal file
|
@ -0,0 +1,378 @@
|
|||
# cython: profile=False
|
||||
# cython: cdivision=True
|
||||
# cython: language_level=2
|
||||
|
||||
"""
|
||||
Jim Paris <jim@jtan.com>
|
||||
|
||||
Red-black tree, where keys are stored as start/end timestamps.
|
||||
This is a basic interval tree that holds half-open intervals:
|
||||
[start, end)
|
||||
Intervals must not overlap. Fixing that would involve making this
|
||||
into an augmented interval tree as described in CLRS 14.3.
|
||||
|
||||
Code that assumes non-overlapping intervals is marked with the
|
||||
string 'non-overlapping'.
|
||||
"""
|
||||
|
||||
import sys
|
||||
cimport rbtree
|
||||
|
||||
cdef class RBNode:
|
||||
"""One node of the Red/Black tree, containing a key (start, end)
|
||||
and value (obj)"""
|
||||
def __init__(self, double start, double end, object obj = None):
|
||||
self.obj = obj
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.red = False
|
||||
self.left = None
|
||||
self.right = None
|
||||
|
||||
def __str__(self):
|
||||
if self.red:
|
||||
color = "R"
|
||||
else:
|
||||
color = "B"
|
||||
if self.start == sys.float_info.min:
|
||||
return "[node nil]"
|
||||
return ("[node ("
|
||||
+ str(self.obj) + ") "
|
||||
+ str(self.start) + " -> " + str(self.end) + " "
|
||||
+ color + "]")
|
||||
|
||||
cdef class RBTree:
|
||||
"""Red/Black tree"""
|
||||
|
||||
# Init
|
||||
def __init__(self):
|
||||
self.nil = RBNode(start = sys.float_info.min,
|
||||
end = sys.float_info.min)
|
||||
self.nil.left = self.nil
|
||||
self.nil.right = self.nil
|
||||
self.nil.parent = self.nil
|
||||
|
||||
self.root = RBNode(start = sys.float_info.max,
|
||||
end = sys.float_info.max)
|
||||
self.root.left = self.nil
|
||||
self.root.right = self.nil
|
||||
self.root.parent = self.nil
|
||||
|
||||
# We have a dummy root node to simplify operations, so from an
|
||||
# external point of view, its left child is the real root.
|
||||
cpdef getroot(self):
|
||||
return self.root.left
|
||||
|
||||
# Rotations and basic operations
|
||||
cdef void __rotate_left(self, RBNode x):
|
||||
"""Rotate left:
|
||||
# x y
|
||||
# / \ --> / \
|
||||
# z y x w
|
||||
# / \ / \
|
||||
# v w z v
|
||||
"""
|
||||
cdef RBNode y = x.right
|
||||
x.right = y.left
|
||||
if y.left is not self.nil:
|
||||
y.left.parent = x
|
||||
y.parent = x.parent
|
||||
if x is x.parent.left:
|
||||
x.parent.left = y
|
||||
else:
|
||||
x.parent.right = y
|
||||
y.left = x
|
||||
x.parent = y
|
||||
|
||||
cdef void __rotate_right(self, RBNode y):
|
||||
"""Rotate right:
|
||||
# y x
|
||||
# / \ --> / \
|
||||
# x w z y
|
||||
# / \ / \
|
||||
# z v v w
|
||||
"""
|
||||
cdef RBNode x = y.left
|
||||
y.left = x.right
|
||||
if x.right is not self.nil:
|
||||
x.right.parent = y
|
||||
x.parent = y.parent
|
||||
if y is y.parent.left:
|
||||
y.parent.left = x
|
||||
else:
|
||||
y.parent.right = x
|
||||
x.right = y
|
||||
y.parent = x
|
||||
|
||||
cdef RBNode __successor(self, RBNode x):
|
||||
"""Returns the successor of RBNode x"""
|
||||
cdef RBNode y = x.right
|
||||
if y is not self.nil:
|
||||
while y.left is not self.nil:
|
||||
y = y.left
|
||||
else:
|
||||
y = x.parent
|
||||
while x is y.right:
|
||||
x = y
|
||||
y = y.parent
|
||||
if y is self.root:
|
||||
return self.nil
|
||||
return y
|
||||
cpdef RBNode successor(self, RBNode x):
|
||||
"""Returns the successor of RBNode x, or None"""
|
||||
cdef RBNode y = self.__successor(x)
|
||||
return y if y is not self.nil else None
|
||||
|
||||
cdef RBNode __predecessor(self, RBNode x):
|
||||
"""Returns the predecessor of RBNode x"""
|
||||
cdef RBNode y = x.left
|
||||
if y is not self.nil:
|
||||
while y.right is not self.nil:
|
||||
y = y.right
|
||||
else:
|
||||
y = x.parent
|
||||
while x is y.left:
|
||||
if y is self.root:
|
||||
y = self.nil
|
||||
break
|
||||
x = y
|
||||
y = y.parent
|
||||
return y
|
||||
cpdef RBNode predecessor(self, RBNode x):
|
||||
"""Returns the predecessor of RBNode x, or None"""
|
||||
cdef RBNode y = self.__predecessor(x)
|
||||
return y if y is not self.nil else None
|
||||
|
||||
# Insertion
|
||||
cpdef insert(self, RBNode z):
|
||||
"""Insert RBNode z into RBTree and rebalance as necessary"""
|
||||
z.left = self.nil
|
||||
z.right = self.nil
|
||||
cdef RBNode y = self.root
|
||||
cdef RBNode x = self.root.left
|
||||
while x is not self.nil:
|
||||
y = x
|
||||
if (x.start > z.start or (x.start == z.start and x.end > z.end)):
|
||||
x = x.left
|
||||
else:
|
||||
x = x.right
|
||||
z.parent = y
|
||||
if (y is self.root or
|
||||
(y.start > z.start or (y.start == z.start and y.end > z.end))):
|
||||
y.left = z
|
||||
else:
|
||||
y.right = z
|
||||
# relabel/rebalance
|
||||
self.__insert_fixup(z)
|
||||
|
||||
cdef void __insert_fixup(self, RBNode x):
|
||||
"""Rebalance/fix RBTree after a simple insertion of RBNode x"""
|
||||
x.red = True
|
||||
while x.parent.red:
|
||||
if x.parent is x.parent.parent.left:
|
||||
y = x.parent.parent.right
|
||||
if y.red:
|
||||
x.parent.red = False
|
||||
y.red = False
|
||||
x.parent.parent.red = True
|
||||
x = x.parent.parent
|
||||
else:
|
||||
if x is x.parent.right:
|
||||
x = x.parent
|
||||
self.__rotate_left(x)
|
||||
x.parent.red = False
|
||||
x.parent.parent.red = True
|
||||
self.__rotate_right(x.parent.parent)
|
||||
else: # same as above, left/right switched
|
||||
y = x.parent.parent.left
|
||||
if y.red:
|
||||
x.parent.red = False
|
||||
y.red = False
|
||||
x.parent.parent.red = True
|
||||
x = x.parent.parent
|
||||
else:
|
||||
if x is x.parent.left:
|
||||
x = x.parent
|
||||
self.__rotate_right(x)
|
||||
x.parent.red = False
|
||||
x.parent.parent.red = True
|
||||
self.__rotate_left(x.parent.parent)
|
||||
self.root.left.red = False
|
||||
|
||||
# Deletion
|
||||
cpdef delete(self, RBNode z):
|
||||
if z.left is None or z.right is None:
|
||||
raise AttributeError("you can only delete a node object "
|
||||
+ "from the tree; use find() to get one")
|
||||
cdef RBNode x, y
|
||||
if z.left is self.nil or z.right is self.nil:
|
||||
y = z
|
||||
else:
|
||||
y = self.__successor(z)
|
||||
if y.left is self.nil:
|
||||
x = y.right
|
||||
else:
|
||||
x = y.left
|
||||
x.parent = y.parent
|
||||
if x.parent is self.root:
|
||||
self.root.left = x
|
||||
else:
|
||||
if y is y.parent.left:
|
||||
y.parent.left = x
|
||||
else:
|
||||
y.parent.right = x
|
||||
if y is not z:
|
||||
# y is the node to splice out, x is its child
|
||||
y.left = z.left
|
||||
y.right = z.right
|
||||
y.parent = z.parent
|
||||
z.left.parent = y
|
||||
z.right.parent = y
|
||||
if z is z.parent.left:
|
||||
z.parent.left = y
|
||||
else:
|
||||
z.parent.right = y
|
||||
if not y.red:
|
||||
y.red = z.red
|
||||
self.__delete_fixup(x)
|
||||
else:
|
||||
y.red = z.red
|
||||
else:
|
||||
if not y.red:
|
||||
self.__delete_fixup(x)
|
||||
|
||||
cdef void __delete_fixup(self, RBNode x):
|
||||
"""Rebalance/fix RBTree after a deletion. RBNode x is the
|
||||
child of the spliced out node."""
|
||||
cdef RBNode rootLeft = self.root.left
|
||||
while not x.red and x is not rootLeft:
|
||||
if x is x.parent.left:
|
||||
w = x.parent.right
|
||||
if w.red:
|
||||
w.red = False
|
||||
x.parent.red = True
|
||||
self.__rotate_left(x.parent)
|
||||
w = x.parent.right
|
||||
if not w.right.red and not w.left.red:
|
||||
w.red = True
|
||||
x = x.parent
|
||||
else:
|
||||
if not w.right.red:
|
||||
w.left.red = False
|
||||
w.red = True
|
||||
self.__rotate_right(w)
|
||||
w = x.parent.right
|
||||
w.red = x.parent.red
|
||||
x.parent.red = False
|
||||
w.right.red = False
|
||||
self.__rotate_left(x.parent)
|
||||
x = rootLeft # exit loop
|
||||
else: # same as above, left/right switched
|
||||
w = x.parent.left
|
||||
if w.red:
|
||||
w.red = False
|
||||
x.parent.red = True
|
||||
self.__rotate_right(x.parent)
|
||||
w = x.parent.left
|
||||
if not w.left.red and not w.right.red:
|
||||
w.red = True
|
||||
x = x.parent
|
||||
else:
|
||||
if not w.left.red:
|
||||
w.right.red = False
|
||||
w.red = True
|
||||
self.__rotate_left(w)
|
||||
w = x.parent.left
|
||||
w.red = x.parent.red
|
||||
x.parent.red = False
|
||||
w.left.red = False
|
||||
self.__rotate_right(x.parent)
|
||||
x = rootLeft # exit loop
|
||||
x.red = False
|
||||
|
||||
# Walking, searching
|
||||
def __iter__(self):
|
||||
return self.inorder()
|
||||
|
||||
def inorder(self, RBNode x = None):
|
||||
"""Generator that performs an inorder walk for the tree
|
||||
rooted at RBNode x"""
|
||||
if x is None:
|
||||
x = self.getroot()
|
||||
while x.left is not self.nil:
|
||||
x = x.left
|
||||
while x is not self.nil:
|
||||
yield x
|
||||
x = self.__successor(x)
|
||||
|
||||
cpdef RBNode find(self, double start, double end):
|
||||
"""Return the node with exactly the given start and end."""
|
||||
cdef RBNode x = self.getroot()
|
||||
while x is not self.nil:
|
||||
if start < x.start:
|
||||
x = x.left
|
||||
elif start == x.start:
|
||||
if end == x.end:
|
||||
break # found it
|
||||
elif end < x.end:
|
||||
x = x.left
|
||||
else:
|
||||
x = x.right
|
||||
else:
|
||||
x = x.right
|
||||
return x if x is not self.nil else None
|
||||
|
||||
cpdef RBNode find_left_end(self, double t):
|
||||
"""Find the leftmode node with end >= t. With non-overlapping
|
||||
intervals, this is the first node that might overlap time t.
|
||||
|
||||
Note that this relies on non-overlapping intervals, since
|
||||
it assumes that we can use the endpoints to traverse the
|
||||
tree even though it was created using the start points."""
|
||||
cdef RBNode x = self.getroot()
|
||||
while x is not self.nil:
|
||||
if t < x.end:
|
||||
if x.left is self.nil:
|
||||
break
|
||||
x = x.left
|
||||
elif t == x.end:
|
||||
break
|
||||
else:
|
||||
if x.right is self.nil:
|
||||
x = self.__successor(x)
|
||||
break
|
||||
x = x.right
|
||||
return x if x is not self.nil else None
|
||||
|
||||
cpdef RBNode find_right_start(self, double t):
|
||||
"""Find the rightmode node with start <= t. With non-overlapping
|
||||
intervals, this is the last node that might overlap time t."""
|
||||
cdef RBNode x = self.getroot()
|
||||
while x is not self.nil:
|
||||
if t < x.start:
|
||||
if x.left is self.nil:
|
||||
x = self.__predecessor(x)
|
||||
break
|
||||
x = x.left
|
||||
elif t == x.start:
|
||||
break
|
||||
else:
|
||||
if x.right is self.nil:
|
||||
break
|
||||
x = x.right
|
||||
return x if x is not self.nil else None
|
||||
|
||||
# Intersections
|
||||
def intersect(self, double start, double end):
|
||||
"""Generator that returns nodes that overlap the given
|
||||
(start,end) range. Assumes non-overlapping intervals."""
|
||||
# Start with the leftmode node that ends after start
|
||||
cdef RBNode n = self.find_left_end(start)
|
||||
while n is not None:
|
||||
if n.start >= end:
|
||||
# this node starts after the requested end; we're done
|
||||
break
|
||||
if start < n.end:
|
||||
# this node overlaps our requested area
|
||||
yield n
|
||||
n = self.successor(n)
|
1
nilmdb/server/rbtree.pyxdep
Normal file
1
nilmdb/server/rbtree.pyxdep
Normal file
|
@ -0,0 +1 @@
|
|||
rbtree.pxd
|
806
nilmdb/server/rocket.c
Normal file
806
nilmdb/server/rocket.c
Normal file
|
@ -0,0 +1,806 @@
|
|||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
#include <endian.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#include <inttypes.h>
|
||||
|
||||
/* Values missing from stdint.h */
|
||||
#define UINT8_MIN 0
|
||||
#define UINT16_MIN 0
|
||||
#define UINT32_MIN 0
|
||||
#define UINT64_MIN 0
|
||||
|
||||
/* Marker values (if min == max, skip range check) */
|
||||
#define FLOAT32_MIN 0
|
||||
#define FLOAT32_MAX 0
|
||||
#define FLOAT64_MIN 0
|
||||
#define FLOAT64_MAX 0
|
||||
|
||||
typedef int64_t timestamp_t;
|
||||
|
||||
/* Somewhat arbitrary, just so we can use fixed sizes for strings
|
||||
etc. */
|
||||
static const int MAX_LAYOUT_COUNT = 1024;
|
||||
|
||||
/* Error object and constants */
|
||||
static PyObject *ParseError;
|
||||
typedef enum {
|
||||
ERR_OTHER,
|
||||
ERR_NON_MONOTONIC,
|
||||
ERR_OUT_OF_INTERVAL,
|
||||
} parseerror_code_t;
|
||||
static void add_parseerror_codes(PyObject *module)
|
||||
{
|
||||
PyModule_AddIntMacro(module, ERR_OTHER);
|
||||
PyModule_AddIntMacro(module, ERR_NON_MONOTONIC);
|
||||
PyModule_AddIntMacro(module, ERR_OUT_OF_INTERVAL);
|
||||
}
|
||||
|
||||
/* Helpers to raise ParseErrors. Use "return raise_str(...)" etc. */
|
||||
static PyObject *raise_str(int line, int col, int code, const char *string)
|
||||
{
|
||||
PyObject *o;
|
||||
o = Py_BuildValue("(iiis)", line, col, code, string);
|
||||
if (o != NULL) {
|
||||
PyErr_SetObject(ParseError, o);
|
||||
Py_DECREF(o);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
static PyObject *raise_int(int line, int col, int code, int64_t num)
|
||||
{
|
||||
PyObject *o;
|
||||
o = Py_BuildValue("(iiiL)", line, col, code, (long long)num);
|
||||
if (o != NULL) {
|
||||
PyErr_SetObject(ParseError, o);
|
||||
Py_DECREF(o);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/****
|
||||
* Layout and type helpers
|
||||
*/
|
||||
typedef union {
|
||||
int8_t i;
|
||||
uint8_t u;
|
||||
} union8_t;
|
||||
typedef union {
|
||||
int16_t i;
|
||||
uint16_t u;
|
||||
} union16_t;
|
||||
typedef union {
|
||||
int32_t i;
|
||||
uint32_t u;
|
||||
float f;
|
||||
} union32_t;
|
||||
typedef union {
|
||||
int64_t i;
|
||||
uint64_t u;
|
||||
double d;
|
||||
} union64_t;
|
||||
|
||||
typedef enum {
|
||||
LAYOUT_TYPE_NONE,
|
||||
LAYOUT_TYPE_INT8,
|
||||
LAYOUT_TYPE_UINT8,
|
||||
LAYOUT_TYPE_INT16,
|
||||
LAYOUT_TYPE_UINT16,
|
||||
LAYOUT_TYPE_INT32,
|
||||
LAYOUT_TYPE_UINT32,
|
||||
LAYOUT_TYPE_INT64,
|
||||
LAYOUT_TYPE_UINT64,
|
||||
LAYOUT_TYPE_FLOAT32,
|
||||
LAYOUT_TYPE_FLOAT64,
|
||||
} layout_type_t;
|
||||
|
||||
struct {
|
||||
char *string;
|
||||
layout_type_t layout;
|
||||
int size;
|
||||
} type_lookup[] = {
|
||||
{ "int8", LAYOUT_TYPE_INT8, 1 },
|
||||
{ "uint8", LAYOUT_TYPE_UINT8, 1 },
|
||||
{ "int16", LAYOUT_TYPE_INT16, 2 },
|
||||
{ "uint16", LAYOUT_TYPE_UINT16, 2 },
|
||||
{ "int32", LAYOUT_TYPE_INT32, 4 },
|
||||
{ "uint32", LAYOUT_TYPE_UINT32, 4 },
|
||||
{ "int64", LAYOUT_TYPE_INT64, 8 },
|
||||
{ "uint64", LAYOUT_TYPE_UINT64, 8 },
|
||||
{ "float32", LAYOUT_TYPE_FLOAT32, 4 },
|
||||
{ "float64", LAYOUT_TYPE_FLOAT64, 8 },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
/****
|
||||
* Object definition, init, etc
|
||||
*/
|
||||
|
||||
/* Rocket object */
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
layout_type_t layout_type;
|
||||
int layout_count;
|
||||
int binary_size;
|
||||
FILE *file;
|
||||
int file_size;
|
||||
} Rocket;
|
||||
|
||||
/* Dealloc / new */
|
||||
static void Rocket_dealloc(Rocket *self)
|
||||
{
|
||||
if (self->file) {
|
||||
fprintf(stderr, "rocket: file wasn't closed\n");
|
||||
fclose(self->file);
|
||||
self->file = NULL;
|
||||
}
|
||||
Py_TYPE(self)->tp_free((PyObject *)self);
|
||||
}
|
||||
|
||||
static PyObject *Rocket_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
Rocket *self;
|
||||
|
||||
self = (Rocket *)type->tp_alloc(type, 0);
|
||||
if (!self)
|
||||
return NULL;
|
||||
self->layout_type = LAYOUT_TYPE_NONE;
|
||||
self->layout_count = 0;
|
||||
self->binary_size = 0;
|
||||
self->file = NULL;
|
||||
self->file_size = -1;
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
/* .__init__(layout, file) */
|
||||
static int Rocket_init(Rocket *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
const char *layout, *path;
|
||||
int pathlen;
|
||||
static char *kwlist[] = { "layout", "file", NULL };
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "sz#", kwlist,
|
||||
&layout, &path, &pathlen))
|
||||
return -1;
|
||||
if (!layout)
|
||||
return -1;
|
||||
if (path) {
|
||||
if (strlen(path) != (size_t)pathlen) {
|
||||
PyErr_SetString(PyExc_ValueError, "path must not "
|
||||
"contain NUL characters");
|
||||
return -1;
|
||||
}
|
||||
if ((self->file = fopen(path, "a+b")) == NULL) {
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return -1;
|
||||
}
|
||||
self->file_size = -1;
|
||||
} else {
|
||||
self->file = NULL;
|
||||
}
|
||||
|
||||
const char *under;
|
||||
char *tmp;
|
||||
under = strchr(layout, '_');
|
||||
if (!under) {
|
||||
PyErr_SetString(PyExc_ValueError, "no such layout: "
|
||||
"badly formatted string");
|
||||
return -1;
|
||||
}
|
||||
self->layout_count = strtoul(under+1, &tmp, 10);
|
||||
if (self->layout_count < 1 || *tmp != '\0') {
|
||||
PyErr_SetString(PyExc_ValueError, "no such layout: "
|
||||
"bad count");
|
||||
return -1;
|
||||
}
|
||||
if (self->layout_count >= MAX_LAYOUT_COUNT) {
|
||||
PyErr_SetString(PyExc_ValueError, "no such layout: "
|
||||
"count too high");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int i;
|
||||
for (i = 0; type_lookup[i].string; i++)
|
||||
if (strncmp(layout, type_lookup[i].string, under-layout) == 0)
|
||||
break;
|
||||
if (!type_lookup[i].string) {
|
||||
PyErr_SetString(PyExc_ValueError, "no such layout: "
|
||||
"bad data type");
|
||||
return -1;
|
||||
}
|
||||
self->layout_type = type_lookup[i].layout;
|
||||
self->binary_size = 8 + (type_lookup[i].size * self->layout_count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* .close() */
|
||||
static PyObject *Rocket_close(Rocket *self)
|
||||
{
|
||||
if (self->file) {
|
||||
fclose(self->file);
|
||||
self->file = NULL;
|
||||
}
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
|
||||
/* .file_size property */
|
||||
static PyObject *Rocket_get_file_size(Rocket *self)
|
||||
{
|
||||
if (!self->file) {
|
||||
PyErr_SetString(PyExc_AttributeError, "no file");
|
||||
return NULL;
|
||||
}
|
||||
if (self->file_size < 0) {
|
||||
int oldpos;
|
||||
if (((oldpos = ftell(self->file)) < 0) ||
|
||||
(fseek(self->file, 0, SEEK_END) < 0) ||
|
||||
((self->file_size = ftell(self->file)) < 0) ||
|
||||
(fseek(self->file, oldpos, SEEK_SET) < 0)) {
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return PyLong_FromLong(self->file_size);
|
||||
}
|
||||
|
||||
/****
|
||||
* Append from string
|
||||
*/
|
||||
static inline long int strtoll10(const char *nptr, char **endptr) {
|
||||
return strtoll(nptr, endptr, 10);
|
||||
}
|
||||
static inline long int strtoull10(const char *nptr, char **endptr) {
|
||||
return strtoull(nptr, endptr, 10);
|
||||
}
|
||||
|
||||
/* .append_string(count, data, offset, linenum, start, end, last_timestamp) */
|
||||
static PyObject *Rocket_append_string(Rocket *self, PyObject *args)
|
||||
{
|
||||
int count;
|
||||
const char *data;
|
||||
int offset;
|
||||
const char *linestart;
|
||||
int linenum;
|
||||
long long ll1, ll2, ll3;
|
||||
timestamp_t start;
|
||||
timestamp_t end;
|
||||
timestamp_t last_timestamp;
|
||||
|
||||
int written = 0;
|
||||
char *endptr;
|
||||
union8_t t8;
|
||||
union16_t t16;
|
||||
union32_t t32;
|
||||
union64_t t64;
|
||||
int i;
|
||||
|
||||
/* Input data is bytes. Using 'y#' instead of 'y' might be
|
||||
preferable, but strto* requires the null terminator. */
|
||||
if (!PyArg_ParseTuple(args, "iyiiLLL:append_string", &count,
|
||||
&data, &offset, &linenum,
|
||||
&ll1, &ll2, &ll3))
|
||||
return NULL;
|
||||
start = ll1;
|
||||
end = ll2;
|
||||
last_timestamp = ll3;
|
||||
|
||||
/* Skip spaces, but don't skip over a newline. */
|
||||
#define SKIP_BLANK(buf) do { \
|
||||
while (isspace(*buf)) { \
|
||||
if (*buf == '\n') \
|
||||
break; \
|
||||
buf++; \
|
||||
} } while(0)
|
||||
|
||||
const char *buf = &data[offset];
|
||||
while (written < count && *buf)
|
||||
{
|
||||
linestart = buf;
|
||||
linenum++;
|
||||
|
||||
/* Skip leading whitespace and commented lines */
|
||||
SKIP_BLANK(buf);
|
||||
if (*buf == '#') {
|
||||
while (*buf && *buf != '\n')
|
||||
buf++;
|
||||
if (*buf)
|
||||
buf++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Extract timestamp */
|
||||
t64.i = strtoll(buf, &endptr, 10);
|
||||
if (endptr == buf || !isspace(*endptr)) {
|
||||
/* Try parsing as a double instead */
|
||||
t64.d = strtod(buf, &endptr);
|
||||
if (endptr == buf)
|
||||
goto bad_timestamp;
|
||||
if (!isspace(*endptr))
|
||||
goto cant_parse_value;
|
||||
t64.i = round(t64.d);
|
||||
}
|
||||
if (t64.i <= last_timestamp)
|
||||
return raise_int(linenum, buf - linestart + 1,
|
||||
ERR_NON_MONOTONIC, t64.i);
|
||||
last_timestamp = t64.i;
|
||||
if (t64.i < start || t64.i >= end)
|
||||
return raise_int(linenum, buf - linestart + 1,
|
||||
ERR_OUT_OF_INTERVAL, t64.i);
|
||||
t64.u = le64toh(t64.u);
|
||||
if (fwrite(&t64.u, 8, 1, self->file) != 1)
|
||||
goto err;
|
||||
buf = endptr;
|
||||
|
||||
/* Parse all values in the line */
|
||||
switch (self->layout_type) {
|
||||
#define CS(type, parsefunc, parsetype, realtype, disktype, letoh, bytes) \
|
||||
case LAYOUT_TYPE_##type: \
|
||||
/* parse and write in a loop */ \
|
||||
for (i = 0; i < self->layout_count; i++) { \
|
||||
/* skip non-newlines */ \
|
||||
SKIP_BLANK(buf); \
|
||||
if (*buf == '\n') \
|
||||
goto wrong_number_of_values; \
|
||||
/* parse number */ \
|
||||
parsetype = parsefunc(buf, &endptr); \
|
||||
if (*endptr && !isspace(*endptr)) \
|
||||
goto cant_parse_value; \
|
||||
/* check limits */ \
|
||||
if (type##_MIN != type##_MAX && \
|
||||
(parsetype < type##_MIN || \
|
||||
parsetype > type##_MAX)) \
|
||||
goto value_out_of_range; \
|
||||
/* convert to disk representation */ \
|
||||
realtype = parsetype; \
|
||||
disktype = letoh(disktype); \
|
||||
/* write it */ \
|
||||
if (fwrite(&disktype, bytes, \
|
||||
1, self->file) != 1) \
|
||||
goto err; \
|
||||
/* advance buf */ \
|
||||
buf = endptr; \
|
||||
} \
|
||||
/* Skip trailing whitespace and comments */ \
|
||||
SKIP_BLANK(buf); \
|
||||
if (*buf == '#') \
|
||||
while (*buf && *buf != '\n') \
|
||||
buf++; \
|
||||
if (*buf == '\n') \
|
||||
buf++; \
|
||||
else if (*buf != '\0') \
|
||||
goto extra_data_on_line; \
|
||||
break
|
||||
|
||||
CS(INT8, strtoll10, t64.i, t8.i, t8.u, , 1);
|
||||
CS(UINT8, strtoull10, t64.u, t8.u, t8.u, , 1);
|
||||
CS(INT16, strtoll10, t64.i, t16.i, t16.u, le16toh, 2);
|
||||
CS(UINT16, strtoull10, t64.u, t16.u, t16.u, le16toh, 2);
|
||||
CS(INT32, strtoll10, t64.i, t32.i, t32.u, le32toh, 4);
|
||||
CS(UINT32, strtoull10, t64.u, t32.u, t32.u, le32toh, 4);
|
||||
CS(INT64, strtoll10, t64.i, t64.i, t64.u, le64toh, 8);
|
||||
CS(UINT64, strtoull10, t64.u, t64.u, t64.u, le64toh, 8);
|
||||
CS(FLOAT32, strtod, t64.d, t32.f, t32.u, le32toh, 4);
|
||||
CS(FLOAT64, strtod, t64.d, t64.d, t64.u, le64toh, 8);
|
||||
#undef CS
|
||||
default:
|
||||
PyErr_SetString(PyExc_TypeError, "unknown type");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Done this line */
|
||||
written++;
|
||||
}
|
||||
|
||||
fflush(self->file);
|
||||
|
||||
/* Build return value and return */
|
||||
offset = buf - data;
|
||||
PyObject *o;
|
||||
o = Py_BuildValue("(iiLi)", written, offset,
|
||||
(long long)last_timestamp, linenum);
|
||||
return o;
|
||||
err:
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return NULL;
|
||||
bad_timestamp:
|
||||
return raise_str(linenum, buf - linestart + 1,
|
||||
ERR_OTHER, "bad timestamp");
|
||||
cant_parse_value:
|
||||
return raise_str(linenum, buf - linestart + 1,
|
||||
ERR_OTHER, "can't parse value");
|
||||
wrong_number_of_values:
|
||||
return raise_str(linenum, buf - linestart + 1,
|
||||
ERR_OTHER, "wrong number of values");
|
||||
value_out_of_range:
|
||||
return raise_str(linenum, buf - linestart + 1,
|
||||
ERR_OTHER, "value out of range");
|
||||
extra_data_on_line:
|
||||
return raise_str(linenum, buf - linestart + 1,
|
||||
ERR_OTHER, "extra data on line");
|
||||
}
|
||||
|
||||
/****
|
||||
* Append from binary data
|
||||
*/
|
||||
|
||||
/* .append_binary(count, data, offset, linenum, start, end, last_timestamp) */
|
||||
static PyObject *Rocket_append_binary(Rocket *self, PyObject *args)
|
||||
{
|
||||
int count;
|
||||
const uint8_t *data;
|
||||
int data_len;
|
||||
int linenum;
|
||||
int offset;
|
||||
long long ll1, ll2, ll3;
|
||||
timestamp_t start;
|
||||
timestamp_t end;
|
||||
timestamp_t last_timestamp;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "iy#iiLLL:append_binary",
|
||||
&count, &data, &data_len, &offset,
|
||||
&linenum, &ll1, &ll2, &ll3))
|
||||
return NULL;
|
||||
start = ll1;
|
||||
end = ll2;
|
||||
last_timestamp = ll3;
|
||||
|
||||
/* Advance to offset */
|
||||
if (offset > data_len)
|
||||
return raise_str(0, 0, ERR_OTHER, "bad offset");
|
||||
data += offset;
|
||||
data_len -= offset;
|
||||
|
||||
/* Figure out max number of rows to insert */
|
||||
int rows = data_len / self->binary_size;
|
||||
if (rows > count)
|
||||
rows = count;
|
||||
|
||||
/* Check timestamps */
|
||||
timestamp_t ts;
|
||||
int i;
|
||||
for (i = 0; i < rows; i++) {
|
||||
/* Read raw timestamp, byteswap if needed */
|
||||
memcpy(&ts, &data[i * self->binary_size], 8);
|
||||
ts = le64toh(ts);
|
||||
|
||||
/* Check limits */
|
||||
if (ts <= last_timestamp)
|
||||
return raise_int(i, 0, ERR_NON_MONOTONIC, ts);
|
||||
last_timestamp = ts;
|
||||
if (ts < start || ts >= end)
|
||||
return raise_int(i, 0, ERR_OUT_OF_INTERVAL, ts);
|
||||
}
|
||||
|
||||
/* Write binary data */
|
||||
if (fwrite(data, self->binary_size, rows, self->file) != (size_t)rows) {
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return NULL;
|
||||
}
|
||||
fflush(self->file);
|
||||
|
||||
/* Build return value and return */
|
||||
PyObject *o;
|
||||
o = Py_BuildValue("(iiLi)", rows, offset + rows * self->binary_size,
|
||||
(long long)last_timestamp, linenum);
|
||||
return o;
|
||||
}
|
||||
|
||||
/****
|
||||
* Extract to binary bytes object containing ASCII text-formatted data
|
||||
*/
|
||||
|
||||
static PyObject *Rocket_extract_string(Rocket *self, PyObject *args)
|
||||
{
|
||||
long count;
|
||||
long offset;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "ll", &offset, &count))
|
||||
return NULL;
|
||||
if (!self->file) {
|
||||
PyErr_SetString(PyExc_Exception, "no file");
|
||||
return NULL;
|
||||
}
|
||||
/* Seek to target location */
|
||||
if (fseek(self->file, offset, SEEK_SET) < 0) {
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *str = NULL, *new;
|
||||
long len_alloc = 0;
|
||||
long len = 0;
|
||||
int ret;
|
||||
|
||||
/* min space free in string (and the maximum length of one
|
||||
line); this is generous */
|
||||
const int min_free = 32 * MAX_LAYOUT_COUNT;
|
||||
|
||||
/* how much to allocate at once */
|
||||
const int alloc_size = 1048576;
|
||||
|
||||
int row, i;
|
||||
union8_t t8;
|
||||
union16_t t16;
|
||||
union32_t t32;
|
||||
union64_t t64;
|
||||
for (row = 0; row < count; row++) {
|
||||
/* Make sure there's space for a line */
|
||||
if ((len_alloc - len) < min_free) {
|
||||
/* grow by 1 meg at a time */
|
||||
len_alloc += alloc_size;
|
||||
new = realloc(str, len_alloc);
|
||||
if (new == NULL)
|
||||
goto err;
|
||||
str = new;
|
||||
}
|
||||
|
||||
/* Read and print timestamp */
|
||||
if (fread(&t64.u, 8, 1, self->file) != 1)
|
||||
goto err;
|
||||
t64.u = le64toh(t64.u);
|
||||
ret = sprintf(&str[len], "%" PRId64, t64.i);
|
||||
if (ret <= 0)
|
||||
goto err;
|
||||
len += ret;
|
||||
|
||||
/* Read and print values */
|
||||
switch (self->layout_type) {
|
||||
#define CASE(type, fmt, fmttype, disktype, letoh, bytes) \
|
||||
case LAYOUT_TYPE_##type: \
|
||||
/* read and format in a loop */ \
|
||||
for (i = 0; i < self->layout_count; i++) { \
|
||||
if (fread(&disktype, bytes, \
|
||||
1, self->file) != 1) \
|
||||
goto err; \
|
||||
disktype = letoh(disktype); \
|
||||
ret = sprintf(&str[len], " " fmt, \
|
||||
fmttype); \
|
||||
if (ret <= 0) \
|
||||
goto err; \
|
||||
len += ret; \
|
||||
} \
|
||||
break
|
||||
CASE(INT8, "%" PRId8, t8.i, t8.u, , 1);
|
||||
CASE(UINT8, "%" PRIu8, t8.u, t8.u, , 1);
|
||||
CASE(INT16, "%" PRId16, t16.i, t16.u, le16toh, 2);
|
||||
CASE(UINT16, "%" PRIu16, t16.u, t16.u, le16toh, 2);
|
||||
CASE(INT32, "%" PRId32, t32.i, t32.u, le32toh, 4);
|
||||
CASE(UINT32, "%" PRIu32, t32.u, t32.u, le32toh, 4);
|
||||
CASE(INT64, "%" PRId64, t64.i, t64.u, le64toh, 8);
|
||||
CASE(UINT64, "%" PRIu64, t64.u, t64.u, le64toh, 8);
|
||||
/* These next two are a bit debatable. floats
|
||||
are 6-9 significant figures, so we print 7.
|
||||
Doubles are 15-19, so we print 17. This is
|
||||
similar to the old prep format for float32.
|
||||
*/
|
||||
CASE(FLOAT32, "%.6e", t32.f, t32.u, le32toh, 4);
|
||||
CASE(FLOAT64, "%.16e", t64.d, t64.u, le64toh, 8);
|
||||
#undef CASE
|
||||
default:
|
||||
PyErr_SetString(PyExc_TypeError, "unknown type");
|
||||
if (str) free(str);
|
||||
return NULL;
|
||||
}
|
||||
str[len++] = '\n';
|
||||
}
|
||||
|
||||
PyObject *pystr = PyBytes_FromStringAndSize(str, len);
|
||||
free(str);
|
||||
return pystr;
|
||||
err:
|
||||
if (str) free(str);
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/****
|
||||
* Extract to binary bytes object containing raw little-endian binary data
|
||||
*/
|
||||
static PyObject *Rocket_extract_binary(Rocket *self, PyObject *args)
|
||||
{
|
||||
long count;
|
||||
long offset;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "ll", &offset, &count))
|
||||
return NULL;
|
||||
if (!self->file) {
|
||||
PyErr_SetString(PyExc_Exception, "no file");
|
||||
return NULL;
|
||||
}
|
||||
/* Seek to target location */
|
||||
if (fseek(self->file, offset, SEEK_SET) < 0) {
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uint8_t *str;
|
||||
int len = count * self->binary_size;
|
||||
str = malloc(len);
|
||||
if (str == NULL) {
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Data in the file is already in the desired little-endian
|
||||
binary format, so just read it directly. */
|
||||
if (fread(str, self->binary_size, count, self->file) != (size_t)count) {
|
||||
free(str);
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *pystr = PyBytes_FromStringAndSize((char *)str, len);
|
||||
free(str);
|
||||
return pystr;
|
||||
}
|
||||
|
||||
/****
|
||||
* Extract timestamp
|
||||
*/
|
||||
static PyObject *Rocket_extract_timestamp(Rocket *self, PyObject *args)
|
||||
{
|
||||
long offset;
|
||||
union64_t t64;
|
||||
if (!PyArg_ParseTuple(args, "l", &offset))
|
||||
return NULL;
|
||||
if (!self->file) {
|
||||
PyErr_SetString(PyExc_Exception, "no file");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Seek to target location and read timestamp */
|
||||
if ((fseek(self->file, offset, SEEK_SET) < 0) ||
|
||||
(fread(&t64.u, 8, 1, self->file) != 1)) {
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Convert and return */
|
||||
t64.u = le64toh(t64.u);
|
||||
return Py_BuildValue("L", (long long)t64.i);
|
||||
}
|
||||
|
||||
/****
|
||||
* Module and type setup
|
||||
*/
|
||||
|
||||
static PyGetSetDef Rocket_getsetters[] = {
|
||||
{ "file_size", (getter)Rocket_get_file_size, NULL,
|
||||
"file size in bytes", NULL },
|
||||
{ NULL },
|
||||
};
|
||||
|
||||
static PyMemberDef Rocket_members[] = {
|
||||
{ "binary_size", T_INT, offsetof(Rocket, binary_size), 0,
|
||||
"binary size per row" },
|
||||
{ NULL },
|
||||
};
|
||||
|
||||
static PyMethodDef Rocket_methods[] = {
|
||||
{ "close",
|
||||
(PyCFunction)Rocket_close, METH_NOARGS,
|
||||
"close(self)\n\n"
|
||||
"Close file handle" },
|
||||
|
||||
{ "append_string",
|
||||
(PyCFunction)Rocket_append_string, METH_VARARGS,
|
||||
"append_string(self, count, data, offset, line, start, end, ts)\n\n"
|
||||
"Parse string and append data.\n"
|
||||
"\n"
|
||||
" count: maximum number of rows to add\n"
|
||||
" data: string data\n"
|
||||
" offset: byte offset into data to start parsing\n"
|
||||
" line: current line number of data\n"
|
||||
" start: starting timestamp for interval\n"
|
||||
" end: end timestamp for interval\n"
|
||||
" ts: last timestamp that was previously parsed\n"
|
||||
"\n"
|
||||
"Raises ParseError if timestamps are non-monotonic, outside\n"
|
||||
"the start/end interval etc.\n"
|
||||
"\n"
|
||||
"On success, return a tuple:\n"
|
||||
" added_rows: how many rows were added from the file\n"
|
||||
" data_offset: current offset into the data string\n"
|
||||
" last_timestamp: last timestamp we parsed\n"
|
||||
" linenum: current line number" },
|
||||
|
||||
{ "append_binary",
|
||||
(PyCFunction)Rocket_append_binary, METH_VARARGS,
|
||||
"append_binary(self, count, data, offset, line, start, end, ts)\n\n"
|
||||
"Append binary data, which must match the data layout.\n"
|
||||
"\n"
|
||||
" count: maximum number of rows to add\n"
|
||||
" data: binary data\n"
|
||||
" offset: byte offset into data to start adding\n"
|
||||
" line: current line number (unused)\n"
|
||||
" start: starting timestamp for interval\n"
|
||||
" end: end timestamp for interval\n"
|
||||
" ts: last timestamp that was previously parsed\n"
|
||||
"\n"
|
||||
"Raises ParseError if timestamps are non-monotonic, outside\n"
|
||||
"the start/end interval etc.\n"
|
||||
"\n"
|
||||
"On success, return a tuple:\n"
|
||||
" added_rows: how many rows were added from the file\n"
|
||||
" data_offset: current offset into the data string\n"
|
||||
" last_timestamp: last timestamp we parsed\n"
|
||||
" linenum: current line number (copied from argument)" },
|
||||
|
||||
{ "extract_string",
|
||||
(PyCFunction)Rocket_extract_string, METH_VARARGS,
|
||||
"extract_string(self, offset, count)\n\n"
|
||||
"Extract count rows of data from the file at offset offset.\n"
|
||||
"Return an ascii formatted string according to the layout" },
|
||||
|
||||
{ "extract_binary",
|
||||
(PyCFunction)Rocket_extract_binary, METH_VARARGS,
|
||||
"extract_binary(self, offset, count)\n\n"
|
||||
"Extract count rows of data from the file at offset offset.\n"
|
||||
"Return a raw binary string of data matching the data layout." },
|
||||
|
||||
{ "extract_timestamp",
|
||||
(PyCFunction)Rocket_extract_timestamp, METH_VARARGS,
|
||||
"extract_timestamp(self, offset)\n\n"
|
||||
"Extract a single timestamp from the file" },
|
||||
|
||||
{ NULL },
|
||||
};
|
||||
|
||||
static PyTypeObject RocketType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
|
||||
.tp_name = "rocket.Rocket",
|
||||
.tp_basicsize = sizeof(Rocket),
|
||||
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
|
||||
|
||||
.tp_new = Rocket_new,
|
||||
.tp_dealloc = (destructor)Rocket_dealloc,
|
||||
.tp_init = (initproc)Rocket_init,
|
||||
.tp_methods = Rocket_methods,
|
||||
.tp_members = Rocket_members,
|
||||
.tp_getset = Rocket_getsetters,
|
||||
|
||||
.tp_doc = ("rocket.Rocket(layout, file)\n\n"
|
||||
"C implementation of the \"rocket\" data parsing\n"
|
||||
"interface, which translates between the binary\n"
|
||||
"format on disk and the ASCII or Python list\n"
|
||||
"format used when communicating with the rest of\n"
|
||||
"the system.")
|
||||
};
|
||||
|
||||
static PyMethodDef module_methods[] = {
|
||||
{ NULL },
|
||||
};
|
||||
|
||||
static struct PyModuleDef moduledef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
.m_name = "rocker",
|
||||
.m_doc = "Rocket data parsing and formatting module",
|
||||
.m_size = -1,
|
||||
.m_methods = module_methods,
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC PyInit_rocket(void)
|
||||
{
|
||||
PyObject *module;
|
||||
|
||||
RocketType.tp_new = PyType_GenericNew;
|
||||
if (PyType_Ready(&RocketType) < 0)
|
||||
return NULL;
|
||||
|
||||
module = PyModule_Create(&moduledef);
|
||||
Py_INCREF(&RocketType);
|
||||
PyModule_AddObject(module, "Rocket", (PyObject *)&RocketType);
|
||||
|
||||
ParseError = PyErr_NewException("rocket.ParseError", NULL, NULL);
|
||||
Py_INCREF(ParseError);
|
||||
PyModule_AddObject(module, "ParseError", ParseError);
|
||||
add_parseerror_codes(module);
|
||||
|
||||
return module;
|
||||
}
|
546
nilmdb/server/server.py
Normal file
546
nilmdb/server/server.py
Normal file
|
@ -0,0 +1,546 @@
|
|||
"""CherryPy-based server for accessing NILM database via HTTP"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import socket
|
||||
import traceback
|
||||
|
||||
import psutil
|
||||
import cherrypy
|
||||
|
||||
import nilmdb.server
|
||||
from nilmdb.utils.printf import sprintf
|
||||
from nilmdb.server.errors import NilmDBError
|
||||
from nilmdb.utils.time import string_to_timestamp
|
||||
|
||||
from nilmdb.server.serverutil import (
|
||||
chunked_response,
|
||||
response_type,
|
||||
exception_to_httperror,
|
||||
CORS_allow,
|
||||
json_to_request_params,
|
||||
json_error_page,
|
||||
cherrypy_start,
|
||||
cherrypy_stop,
|
||||
bool_param,
|
||||
)
|
||||
|
||||
# Add CORS_allow tool
|
||||
cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow)
|
||||
|
||||
|
||||
class NilmApp():
|
||||
def __init__(self, db):
|
||||
self.db = db
|
||||
|
||||
|
||||
# CherryPy apps
|
||||
class Root(NilmApp):
|
||||
"""Root application for NILM database"""
|
||||
# /
|
||||
@cherrypy.expose
|
||||
def index(self):
|
||||
cherrypy.response.headers['Content-Type'] = 'text/plain'
|
||||
msg = sprintf("This is NilmDB version %s, running on host %s.\n",
|
||||
nilmdb.__version__, socket.getfqdn())
|
||||
return msg
|
||||
|
||||
# /favicon.ico
|
||||
@cherrypy.expose
|
||||
def favicon_ico(self):
|
||||
raise cherrypy.NotFound()
|
||||
|
||||
# /version
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_out()
|
||||
def version(self):
|
||||
return nilmdb.__version__
|
||||
|
||||
# /dbinfo
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_out()
|
||||
def dbinfo(self):
|
||||
"""Return a dictionary with the database path,
|
||||
size of the database in bytes, and free disk space in bytes"""
|
||||
path = self.db.get_basepath()
|
||||
usage = psutil.disk_usage(path)
|
||||
dbsize = nilmdb.utils.du(path)
|
||||
return {
|
||||
"path": path,
|
||||
"size": dbsize,
|
||||
"other": max(usage.used - dbsize, 0),
|
||||
"reserved": max(usage.total - usage.used - usage.free, 0),
|
||||
"free": usage.free
|
||||
}
|
||||
|
||||
|
||||
class Stream(NilmApp):
|
||||
"""Stream-specific operations"""
|
||||
|
||||
# Helpers
|
||||
def _get_times(self, start_param, end_param):
|
||||
(start, end) = (None, None)
|
||||
try:
|
||||
if start_param is not None:
|
||||
start = string_to_timestamp(start_param)
|
||||
except Exception:
|
||||
raise cherrypy.HTTPError("400 Bad Request", sprintf(
|
||||
"invalid start (%s): must be a numeric timestamp",
|
||||
start_param))
|
||||
try:
|
||||
if end_param is not None:
|
||||
end = string_to_timestamp(end_param)
|
||||
except Exception:
|
||||
raise cherrypy.HTTPError("400 Bad Request", sprintf(
|
||||
"invalid end (%s): must be a numeric timestamp", end_param))
|
||||
if start is not None and end is not None:
|
||||
if start >= end:
|
||||
raise cherrypy.HTTPError(
|
||||
"400 Bad Request",
|
||||
sprintf("start must precede end (%s >= %s)",
|
||||
start_param, end_param))
|
||||
return (start, end)
|
||||
|
||||
# /stream/list
|
||||
# /stream/list?layout=float32_8
|
||||
# /stream/list?path=/newton/prep&extended=1
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_out()
|
||||
def list(self, path=None, layout=None, extended=None):
|
||||
"""List all streams in the database. With optional path or
|
||||
layout parameter, just list streams that match the given path
|
||||
or layout.
|
||||
|
||||
If extended is missing or zero, returns a list of lists
|
||||
containing the path and layout: [ path, layout ]
|
||||
|
||||
If extended is true, returns a list of lists containing
|
||||
extended info: [ path, layout, extent_min, extent_max,
|
||||
total_rows, total_seconds ]. More data may be added.
|
||||
"""
|
||||
return self.db.stream_list(path, layout, bool(extended))
|
||||
|
||||
# /stream/create?path=/newton/prep&layout=float32_8
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_in()
|
||||
@cherrypy.tools.json_out()
|
||||
@exception_to_httperror(NilmDBError, ValueError)
|
||||
@cherrypy.tools.CORS_allow(methods=["POST"])
|
||||
def create(self, path, layout):
|
||||
"""Create a new stream in the database. Provide path
|
||||
and one of the nilmdb.layout.layouts keys.
|
||||
"""
|
||||
return self.db.stream_create(path, layout)
|
||||
|
||||
# /stream/destroy?path=/newton/prep
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_in()
|
||||
@cherrypy.tools.json_out()
|
||||
@exception_to_httperror(NilmDBError)
|
||||
@cherrypy.tools.CORS_allow(methods=["POST"])
|
||||
def destroy(self, path):
|
||||
"""Delete a stream. Fails if any data is still present."""
|
||||
return self.db.stream_destroy(path)
|
||||
|
||||
# /stream/rename?oldpath=/newton/prep&newpath=/newton/prep/1
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_in()
|
||||
@cherrypy.tools.json_out()
|
||||
@exception_to_httperror(NilmDBError, ValueError)
|
||||
@cherrypy.tools.CORS_allow(methods=["POST"])
|
||||
def rename(self, oldpath, newpath):
|
||||
"""Rename a stream."""
|
||||
return self.db.stream_rename(oldpath, newpath)
|
||||
|
||||
# /stream/get_metadata?path=/newton/prep
|
||||
# /stream/get_metadata?path=/newton/prep&key=foo&key=bar
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_out()
|
||||
def get_metadata(self, path, key=None):
|
||||
"""Get metadata for the named stream. If optional
|
||||
key parameters are specified, only return metadata
|
||||
matching the given keys."""
|
||||
try:
|
||||
data = self.db.stream_get_metadata(path)
|
||||
except nilmdb.server.nilmdb.StreamError as e:
|
||||
raise cherrypy.HTTPError("404 Not Found", str(e))
|
||||
if key is None: # If no keys specified, return them all
|
||||
key = list(data.keys())
|
||||
elif not isinstance(key, list):
|
||||
key = [key]
|
||||
result = {}
|
||||
for k in key:
|
||||
if k in data:
|
||||
result[k] = data[k]
|
||||
else: # Return "None" for keys with no matching value
|
||||
result[k] = None
|
||||
return result
|
||||
|
||||
# Helper for set_metadata and get_metadata
|
||||
def _metadata_helper(self, function, path, data):
|
||||
if not isinstance(data, dict):
|
||||
try:
|
||||
data = dict(json.loads(data))
|
||||
except TypeError as e:
|
||||
raise NilmDBError("can't parse 'data' parameter: " + str(e))
|
||||
for key in data:
|
||||
if not isinstance(data[key], (str, float, int)):
|
||||
raise NilmDBError("metadata values must be a string or number")
|
||||
function(path, data)
|
||||
|
||||
# /stream/set_metadata?path=/newton/prep&data=<json>
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_in()
|
||||
@cherrypy.tools.json_out()
|
||||
@exception_to_httperror(NilmDBError, LookupError)
|
||||
@cherrypy.tools.CORS_allow(methods=["POST"])
|
||||
def set_metadata(self, path, data):
|
||||
"""Set metadata for the named stream, replacing any existing
|
||||
metadata. Data can be json-encoded or a plain dictionary."""
|
||||
self._metadata_helper(self.db.stream_set_metadata, path, data)
|
||||
|
||||
# /stream/update_metadata?path=/newton/prep&data=<json>
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_in()
|
||||
@cherrypy.tools.json_out()
|
||||
@exception_to_httperror(NilmDBError, LookupError, ValueError)
|
||||
@cherrypy.tools.CORS_allow(methods=["POST"])
|
||||
def update_metadata(self, path, data):
|
||||
"""Set metadata for the named stream, replacing any existing
|
||||
metadata. Data can be json-encoded or a plain dictionary."""
|
||||
self._metadata_helper(self.db.stream_update_metadata, path, data)
|
||||
|
||||
# /stream/insert?path=/newton/prep
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_out()
|
||||
@exception_to_httperror(NilmDBError, ValueError)
|
||||
@cherrypy.tools.CORS_allow(methods=["PUT"])
|
||||
def insert(self, path, start, end, binary=False):
|
||||
"""
|
||||
Insert new data into the database. Provide textual data
|
||||
(matching the path's layout) as a HTTP PUT.
|
||||
|
||||
If 'binary' is True, expect raw binary data, rather than lines
|
||||
of ASCII-formatted data. Raw binary data is always
|
||||
little-endian and matches the database types (including an
|
||||
int64 timestamp).
|
||||
"""
|
||||
binary = bool_param(binary)
|
||||
|
||||
# Important that we always read the input before throwing any
|
||||
# errors, to keep lengths happy for persistent connections.
|
||||
# Note that CherryPy 3.2.2 has a bug where this fails for GET
|
||||
# requests, if we ever want to handle those (issue #1134)
|
||||
body = cherrypy.request.body.read()
|
||||
|
||||
# Verify content type for binary data
|
||||
content_type = cherrypy.request.headers.get('content-type')
|
||||
if binary and content_type:
|
||||
if content_type != "application/octet-stream":
|
||||
raise cherrypy.HTTPError("400", "Content type must be "
|
||||
"application/octet-stream for "
|
||||
"binary data, not " + content_type)
|
||||
|
||||
# Note that non-binary data is *not* decoded from bytes to string,
|
||||
# but rather passed directly to stream_insert.
|
||||
|
||||
# Check path and get layout
|
||||
if len(self.db.stream_list(path=path)) != 1:
|
||||
raise cherrypy.HTTPError("404", "No such stream: " + path)
|
||||
|
||||
# Check limits
|
||||
(start, end) = self._get_times(start, end)
|
||||
|
||||
# Pass the data directly to nilmdb, which will parse it and
|
||||
# raise a ValueError if there are any problems.
|
||||
self.db.stream_insert(path, start, end, body, binary)
|
||||
|
||||
# Done
|
||||
return
|
||||
|
||||
# /stream/remove?path=/newton/prep
|
||||
# /stream/remove?path=/newton/prep&start=1234567890.0&end=1234567899.0
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_in()
|
||||
@cherrypy.tools.CORS_allow(methods=["POST"])
|
||||
@chunked_response
|
||||
@response_type("application/x-json-stream")
|
||||
def remove(self, path, start=None, end=None):
|
||||
"""
|
||||
Remove data from the backend database. Removes all data in
|
||||
the interval [start, end).
|
||||
|
||||
Returns the number of data points removed. Since this is a potentially
|
||||
long-running operation, multiple numbers may be returned as the
|
||||
data gets removed from the backend database. The total number of
|
||||
points removed is the sum of all of these numbers.
|
||||
"""
|
||||
(start, end) = self._get_times(start, end)
|
||||
|
||||
if len(self.db.stream_list(path=path)) != 1:
|
||||
raise cherrypy.HTTPError("404", "No such stream: " + path)
|
||||
|
||||
def content(start, end):
|
||||
# Note: disable chunked responses to see tracebacks from here.
|
||||
while True:
|
||||
(removed, restart) = self.db.stream_remove(path, start, end)
|
||||
response = json.dumps(removed) + "\r\n"
|
||||
yield response.encode('utf-8')
|
||||
if restart is None:
|
||||
break
|
||||
start = restart
|
||||
return content(start, end)
|
||||
|
||||
# /stream/intervals?path=/newton/prep
|
||||
# /stream/intervals?path=/newton/prep&start=1234567890.0&end=1234567899.0
|
||||
# /stream/intervals?path=/newton/prep&diffpath=/newton/prep2
|
||||
@cherrypy.expose
|
||||
@chunked_response
|
||||
@response_type("application/x-json-stream")
|
||||
def intervals(self, path, start=None, end=None, diffpath=None):
|
||||
"""
|
||||
Get intervals from backend database. Streams the resulting
|
||||
intervals as JSON strings separated by CR LF pairs. This may
|
||||
make multiple requests to the nilmdb backend to avoid causing
|
||||
it to block for too long.
|
||||
|
||||
Returns intervals between 'start' and 'end' belonging to
|
||||
'path'. If 'diff' is provided, the set-difference between
|
||||
intervals in 'path' and intervals in 'diffpath' are
|
||||
returned instead.
|
||||
|
||||
Note that the response type is the non-standard
|
||||
'application/x-json-stream' for lack of a better option.
|
||||
"""
|
||||
(start, end) = self._get_times(start, end)
|
||||
|
||||
if len(self.db.stream_list(path=path)) != 1:
|
||||
raise cherrypy.HTTPError("404", "No such stream: " + path)
|
||||
|
||||
if diffpath and len(self.db.stream_list(path=diffpath)) != 1:
|
||||
raise cherrypy.HTTPError("404", "No such stream: " + diffpath)
|
||||
|
||||
def content(start, end):
|
||||
# Note: disable chunked responses to see tracebacks from here.
|
||||
while True:
|
||||
(ints, restart) = self.db.stream_intervals(path, start, end,
|
||||
diffpath)
|
||||
response = ''.join([json.dumps(i) + "\r\n" for i in ints])
|
||||
yield response.encode('utf-8')
|
||||
if restart is None:
|
||||
break
|
||||
start = restart
|
||||
return content(start, end)
|
||||
|
||||
# /stream/extract?path=/newton/prep&start=1234567890.0&end=1234567899.0
|
||||
@cherrypy.expose
|
||||
@chunked_response
|
||||
def extract(self, path, start=None, end=None,
|
||||
count=False, markup=False, binary=False):
|
||||
"""
|
||||
Extract data from backend database. Streams the resulting
|
||||
entries as ASCII text lines separated by newlines. This may
|
||||
make multiple requests to the nilmdb backend to avoid causing
|
||||
it to block for too long.
|
||||
|
||||
If 'count' is True, returns a count rather than actual data.
|
||||
|
||||
If 'markup' is True, adds comments to the stream denoting each
|
||||
interval's start and end timestamp.
|
||||
|
||||
If 'binary' is True, return raw binary data, rather than lines
|
||||
of ASCII-formatted data. Raw binary data is always
|
||||
little-endian and matches the database types (including an
|
||||
int64 timestamp).
|
||||
"""
|
||||
binary = bool_param(binary)
|
||||
markup = bool_param(markup)
|
||||
count = bool_param(count)
|
||||
|
||||
(start, end) = self._get_times(start, end)
|
||||
|
||||
# Check path and get layout
|
||||
if len(self.db.stream_list(path=path)) != 1:
|
||||
raise cherrypy.HTTPError("404", "No such stream: " + path)
|
||||
|
||||
if binary:
|
||||
content_type = "application/octet-stream"
|
||||
if markup or count:
|
||||
raise cherrypy.HTTPError("400", "can't mix binary and "
|
||||
"markup or count modes")
|
||||
else:
|
||||
content_type = "text/plain"
|
||||
cherrypy.response.headers['Content-Type'] = content_type
|
||||
|
||||
def content(start, end):
|
||||
# Note: disable chunked responses to see tracebacks from here.
|
||||
if count:
|
||||
matched = self.db.stream_extract(path, start, end,
|
||||
count=True)
|
||||
yield sprintf(b"%d\n", matched)
|
||||
return
|
||||
|
||||
while True:
|
||||
(data, restart) = self.db.stream_extract(
|
||||
path, start, end, count=False,
|
||||
markup=markup, binary=binary)
|
||||
yield data
|
||||
|
||||
if restart is None:
|
||||
return
|
||||
start = restart
|
||||
return content(start, end)
|
||||
|
||||
|
||||
class Exiter():
|
||||
"""App that exits the server, for testing"""
|
||||
@cherrypy.expose
|
||||
def index(self):
|
||||
cherrypy.response.headers['Content-Type'] = 'text/plain'
|
||||
|
||||
def content():
|
||||
yield b'Exiting by request'
|
||||
raise SystemExit
|
||||
|
||||
return content()
|
||||
index._cp_config = {'response.stream': True}
|
||||
|
||||
|
||||
class Server():
|
||||
def __init__(self, db, host='127.0.0.1', port=8080,
|
||||
stoppable=False, # whether /exit URL exists
|
||||
fast_shutdown=False, # don't wait for clients to disconn.
|
||||
force_traceback=False, # include traceback in all errors
|
||||
basepath='', # base URL path for cherrypy.tree
|
||||
):
|
||||
# Save server version, just for verification during tests
|
||||
self.version = nilmdb.__version__
|
||||
|
||||
self.db = db
|
||||
if not getattr(db, "_thread_safe", None):
|
||||
raise KeyError("Database object " + str(db) + " doesn't claim "
|
||||
"to be thread safe. You should pass "
|
||||
"nilmdb.utils.serializer_proxy(NilmDB)(args) "
|
||||
"rather than NilmDB(args).")
|
||||
|
||||
# Build up global server configuration
|
||||
cherrypy.config.update({
|
||||
'environment': 'embedded',
|
||||
'server.socket_host': host,
|
||||
'server.socket_port': port,
|
||||
'engine.autoreload.on': False,
|
||||
'server.max_request_body_size': 8*1024*1024,
|
||||
})
|
||||
|
||||
# Build up application specific configuration
|
||||
app_config = {}
|
||||
app_config.update({
|
||||
'error_page.default': self.json_error_page,
|
||||
})
|
||||
|
||||
# Some default headers to just help identify that things are working
|
||||
app_config.update({'response.headers.X-Jim-Is-Awesome': 'yeah'})
|
||||
|
||||
# Set up Cross-Origin Resource Sharing (CORS) handler so we
|
||||
# can correctly respond to browsers' CORS preflight requests.
|
||||
# This also limits verbs to GET and HEAD by default.
|
||||
app_config.update({'tools.CORS_allow.on': True,
|
||||
'tools.CORS_allow.methods': ['GET', 'HEAD']})
|
||||
|
||||
# Configure the 'json_in' tool to also allow other content-types
|
||||
# (like x-www-form-urlencoded), and to treat JSON as a dict that
|
||||
# fills requests.param.
|
||||
app_config.update({'tools.json_in.force': False,
|
||||
'tools.json_in.processor': json_to_request_params})
|
||||
|
||||
# Send tracebacks in error responses. They're hidden by the
|
||||
# error_page function for client errors (code 400-499).
|
||||
app_config.update({'request.show_tracebacks': True})
|
||||
self.force_traceback = force_traceback
|
||||
|
||||
# Patch CherryPy error handler to never pad out error messages.
|
||||
# This isn't necessary, but then again, neither is padding the
|
||||
# error messages.
|
||||
cherrypy._cperror._ie_friendly_error_sizes = {}
|
||||
|
||||
# Build up the application and mount it
|
||||
root = Root(self.db)
|
||||
root.stream = Stream(self.db)
|
||||
if stoppable:
|
||||
root.exit = Exiter()
|
||||
cherrypy.tree.apps = {}
|
||||
cherrypy.tree.mount(root, basepath, config={"/": app_config})
|
||||
|
||||
# Shutdowns normally wait for clients to disconnect. To speed
|
||||
# up tests, set fast_shutdown = True
|
||||
if fast_shutdown:
|
||||
cherrypy.server.shutdown_timeout = 0
|
||||
else:
|
||||
cherrypy.server.shutdown_timeout = 5
|
||||
|
||||
# Set up the WSGI application pointer for external programs
|
||||
self.wsgi_application = cherrypy.tree
|
||||
|
||||
def json_error_page(self, status, message, traceback, version):
|
||||
"""Return a custom error page in JSON so the client can parse it"""
|
||||
return json_error_page(status, message, traceback, version,
|
||||
self.force_traceback)
|
||||
|
||||
def start(self, blocking=False, event=None):
|
||||
cherrypy_start(blocking, event)
|
||||
|
||||
def stop(self):
|
||||
cherrypy_stop()
|
||||
|
||||
|
||||
# Use a single global nilmdb.server.NilmDB and nilmdb.server.Server
|
||||
# instance since the database can only be opened once. For this to
|
||||
# work, the web server must use only a single process and single
|
||||
# Python interpreter. Multiple threads are OK.
|
||||
_wsgi_server = None
|
||||
|
||||
|
||||
def wsgi_application(dbpath, basepath):
|
||||
"""Return a WSGI application object with a database at the
|
||||
specified path.
|
||||
|
||||
'dbpath' is a filesystem location, e.g. /home/nilm/db
|
||||
|
||||
'basepath' is the URL path of the application base, which
|
||||
is the same as the first argument to Apache's WSGIScriptAlias
|
||||
directive.
|
||||
"""
|
||||
def application(environ, start_response):
|
||||
global _wsgi_server
|
||||
if _wsgi_server is None:
|
||||
# Try to start the server
|
||||
try:
|
||||
db = nilmdb.utils.serializer_proxy(
|
||||
nilmdb.server.NilmDB)(dbpath)
|
||||
_wsgi_server = nilmdb.server.Server(
|
||||
db, basepath=basepath.rstrip('/'))
|
||||
except Exception:
|
||||
# Build an error message on failure
|
||||
import pprint
|
||||
err = sprintf("Initializing database at path '%s' failed:\n\n",
|
||||
dbpath)
|
||||
err += traceback.format_exc()
|
||||
import pwd
|
||||
import grp
|
||||
err += sprintf("\nRunning as: uid=%d (%s), gid=%d (%s) "
|
||||
"on host %s, pid %d\n",
|
||||
os.getuid(), pwd.getpwuid(os.getuid())[0],
|
||||
os.getgid(), grp.getgrgid(os.getgid())[0],
|
||||
socket.gethostname(), os.getpid())
|
||||
err += sprintf("\nEnvironment:\n%s\n", pprint.pformat(environ))
|
||||
if _wsgi_server is None:
|
||||
# Serve up the error with our own mini WSGI app.
|
||||
err_b = err.encode('utf-8')
|
||||
headers = [('Content-type', 'text/plain; charset=utf-8'),
|
||||
('Content-length', str(len(err_b)))]
|
||||
start_response("500 Internal Server Error", headers)
|
||||
return [err_b]
|
||||
|
||||
# Call the normal application
|
||||
return _wsgi_server.wsgi_application(environ, start_response)
|
||||
return application
|
225
nilmdb/server/serverutil.py
Normal file
225
nilmdb/server/serverutil.py
Normal file
|
@ -0,0 +1,225 @@
|
|||
"""Miscellaneous decorators and other helpers for running a CherryPy
|
||||
server"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import decorator
|
||||
import functools
|
||||
import threading
|
||||
|
||||
import cherrypy
|
||||
|
||||
|
||||
# Helper to parse parameters into booleans
|
||||
def bool_param(s):
|
||||
"""Return a bool indicating whether parameter 's' was True or False,
|
||||
supporting a few different types for 's'."""
|
||||
try:
|
||||
ss = s.lower()
|
||||
if ss in ["0", "false", "f", "no", "n"]:
|
||||
return False
|
||||
if ss in ["1", "true", "t", "yes", "y"]:
|
||||
return True
|
||||
except Exception:
|
||||
return bool(s)
|
||||
raise cherrypy.HTTPError("400 Bad Request",
|
||||
"can't parse parameter: " + ss)
|
||||
|
||||
|
||||
# Decorators
|
||||
def chunked_response(func):
|
||||
"""Decorator to enable chunked responses."""
|
||||
# Set this to False to get better tracebacks from some requests
|
||||
# (/stream/extract, /stream/intervals).
|
||||
func._cp_config = {'response.stream': True}
|
||||
return func
|
||||
|
||||
|
||||
def response_type(content_type):
|
||||
"""Return a decorator-generating function that sets the
|
||||
response type to the specified string."""
|
||||
def wrapper(func, *args, **kwargs):
|
||||
cherrypy.response.headers['Content-Type'] = content_type
|
||||
return func(*args, **kwargs)
|
||||
return decorator.decorator(wrapper)
|
||||
|
||||
|
||||
def exception_to_httperror(*expected):
|
||||
"""Return a decorator-generating function that catches expected
|
||||
errors and throws a HTTPError describing it instead.
|
||||
|
||||
@exception_to_httperror(NilmDBError, ValueError)
|
||||
def foo():
|
||||
pass
|
||||
"""
|
||||
def wrapper(func, *args, **kwargs):
|
||||
exc_info = None
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except expected:
|
||||
# Re-raise it, but maintain the original traceback
|
||||
exc_info = sys.exc_info()
|
||||
new_exc = cherrypy.HTTPError("400 Bad Request", str(exc_info[1]))
|
||||
raise new_exc.with_traceback(exc_info[2])
|
||||
finally:
|
||||
del exc_info
|
||||
# We need to preserve the function's argspecs for CherryPy to
|
||||
# handle argument errors correctly. Decorator.decorator takes
|
||||
# care of that.
|
||||
return decorator.decorator(wrapper)
|
||||
|
||||
|
||||
# Custom CherryPy tools
|
||||
def CORS_allow(methods):
|
||||
"""This does several things:
|
||||
|
||||
Handles CORS preflight requests.
|
||||
Adds Allow: header to all requests.
|
||||
Raise 405 if request.method not in method.
|
||||
|
||||
It is similar to cherrypy.tools.allow, with the CORS stuff added.
|
||||
|
||||
Add this to CherryPy with:
|
||||
cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow)
|
||||
"""
|
||||
request = cherrypy.request.headers
|
||||
response = cherrypy.response.headers
|
||||
|
||||
if not isinstance(methods, (tuple, list)):
|
||||
methods = [methods]
|
||||
methods = [m.upper() for m in methods if m]
|
||||
if not methods:
|
||||
methods = ['GET', 'HEAD']
|
||||
elif 'GET' in methods and 'HEAD' not in methods:
|
||||
methods.append('HEAD')
|
||||
response['Allow'] = ', '.join(methods)
|
||||
|
||||
# Allow all origins
|
||||
if 'Origin' in request:
|
||||
response['Access-Control-Allow-Origin'] = request['Origin']
|
||||
|
||||
# If it's a CORS request, send response.
|
||||
request_method = request.get("Access-Control-Request-Method", None)
|
||||
request_headers = request.get("Access-Control-Request-Headers", None)
|
||||
if (cherrypy.request.method == "OPTIONS" and
|
||||
request_method and request_headers):
|
||||
response['Access-Control-Allow-Headers'] = request_headers
|
||||
response['Access-Control-Allow-Methods'] = ', '.join(methods)
|
||||
# Try to stop further processing and return a 200 OK
|
||||
cherrypy.response.status = "200 OK"
|
||||
cherrypy.response.body = b""
|
||||
cherrypy.request.handler = lambda: ""
|
||||
return
|
||||
|
||||
# Reject methods that were not explicitly allowed
|
||||
if cherrypy.request.method not in methods:
|
||||
raise cherrypy.HTTPError(405)
|
||||
|
||||
|
||||
# Helper for json_in tool to process JSON data into normal request
|
||||
# parameters.
|
||||
def json_to_request_params(body):
|
||||
cherrypy.lib.jsontools.json_processor(body)
|
||||
if not isinstance(cherrypy.request.json, dict):
|
||||
raise cherrypy.HTTPError(415)
|
||||
cherrypy.request.params.update(cherrypy.request.json)
|
||||
|
||||
|
||||
# Used as an "error_page.default" handler
|
||||
def json_error_page(status, message, traceback, version,
|
||||
force_traceback=False):
|
||||
"""Return a custom error page in JSON so the client can parse it"""
|
||||
errordata = {"status": status,
|
||||
"message": message,
|
||||
"version": version,
|
||||
"traceback": traceback}
|
||||
# Don't send a traceback if the error was 400-499 (client's fault)
|
||||
code = int(status.split()[0])
|
||||
if not force_traceback:
|
||||
if 400 <= code <= 499:
|
||||
errordata["traceback"] = ""
|
||||
# Override the response type, which was previously set to text/html
|
||||
cherrypy.serving.response.headers['Content-Type'] = (
|
||||
"application/json;charset=utf-8")
|
||||
# Undo the HTML escaping that cherrypy's get_error_page function applies
|
||||
# (cherrypy issue 1135)
|
||||
for k, v in errordata.items():
|
||||
v = v.replace("<", "<")
|
||||
v = v.replace(">", ">")
|
||||
v = v.replace("&", "&")
|
||||
errordata[k] = v
|
||||
return json.dumps(errordata, separators=(',', ':'))
|
||||
|
||||
|
||||
class CherryPyExit(SystemExit):
|
||||
pass
|
||||
|
||||
|
||||
def cherrypy_patch_exit():
|
||||
# Cherrypy stupidly calls os._exit(70) when it can't bind the port
|
||||
# and exits. Instead of that, raise a CherryPyExit (derived from
|
||||
# SystemExit). This exception may not make it back up to the caller
|
||||
# due to internal thread use in the CherryPy engine, but there should
|
||||
# be at least some indication that it happened.
|
||||
bus = cherrypy.process.wspbus.bus
|
||||
if "_patched_exit" in bus.__dict__:
|
||||
return
|
||||
bus._patched_exit = True
|
||||
|
||||
def patched_exit(orig):
|
||||
real_exit = os._exit
|
||||
|
||||
def fake_exit(code):
|
||||
raise CherryPyExit(code)
|
||||
os._exit = fake_exit
|
||||
try:
|
||||
orig()
|
||||
finally:
|
||||
os._exit = real_exit
|
||||
bus.exit = functools.partial(patched_exit, bus.exit)
|
||||
|
||||
# A behavior change in Python 3.8 means that some thread exceptions,
|
||||
# derived from SystemExit, now print tracebacks where they didn't
|
||||
# used to: https://bugs.python.org/issue1230540
|
||||
# Install a thread exception hook that ignores CherryPyExit;
|
||||
# to make this match the behavior where we didn't set
|
||||
# threading.excepthook, we also need to ignore SystemExit.
|
||||
def hook(args):
|
||||
if args.exc_type == CherryPyExit or args.exc_type == SystemExit:
|
||||
return
|
||||
sys.excepthook(args.exc_type, args.exc_value,
|
||||
args.exc_traceback) # pragma: no cover
|
||||
threading.excepthook = hook
|
||||
|
||||
|
||||
# Start/stop CherryPy standalone server
|
||||
def cherrypy_start(blocking=False, event=False):
|
||||
"""Start the CherryPy server, handling errors and signals
|
||||
somewhat gracefully."""
|
||||
|
||||
cherrypy_patch_exit()
|
||||
|
||||
# Start the server
|
||||
cherrypy.engine.start()
|
||||
|
||||
# Signal that the engine has started successfully
|
||||
if event is not None:
|
||||
event.set()
|
||||
|
||||
if blocking:
|
||||
try:
|
||||
cherrypy.engine.wait(cherrypy.engine.states.EXITING,
|
||||
interval=0.1, channel='main')
|
||||
except (KeyboardInterrupt, IOError):
|
||||
cherrypy.engine.log('Keyboard Interrupt: shutting down')
|
||||
cherrypy.engine.exit()
|
||||
except SystemExit:
|
||||
cherrypy.engine.log('SystemExit raised: shutting down')
|
||||
cherrypy.engine.exit()
|
||||
raise
|
||||
|
||||
|
||||
# Stop CherryPy server
|
||||
def cherrypy_stop():
|
||||
cherrypy.engine.exit()
|
|
@ -1,46 +0,0 @@
|
|||
from nilmdb import Interval, IntervalSet, IntervalError, FileInterval
|
||||
from datetime import datetime
|
||||
from nose.tools import assert_raises
|
||||
|
||||
from test_interval import iset
|
||||
|
||||
def fiset(string):
|
||||
"""Like iset, but builds with FileIntervals instead of Intervals"""
|
||||
iset = IntervalSet()
|
||||
for i, c in enumerate(string):
|
||||
day = datetime.strptime("{0:04d}".format(i+2000), "%Y")
|
||||
if (c == "["):
|
||||
start = day
|
||||
elif (c == "|"):
|
||||
iset += FileInterval(start, day, "test.dat")
|
||||
start = day
|
||||
elif (c == "]"):
|
||||
iset += FileInterval(start, day, "test.dat")
|
||||
del start
|
||||
return iset
|
||||
|
||||
def test_fileinterval_vs_interval():
|
||||
"""Test FileInterval/Interval inheritance"""
|
||||
|
||||
i = iset("[--]")
|
||||
f = fiset("[--]")
|
||||
|
||||
# check types
|
||||
assert(isinstance(i[0], Interval))
|
||||
assert(not isinstance(i[0], FileInterval))
|
||||
assert(isinstance(f[0], Interval))
|
||||
assert(isinstance(f[0], FileInterval))
|
||||
|
||||
# when doing an intersection, result should be a subset of the first arg
|
||||
u = (i & f)
|
||||
assert(isinstance(u[0], Interval))
|
||||
assert(not isinstance(u[0], FileInterval))
|
||||
u = (f & i)
|
||||
assert(isinstance(u[0], Interval))
|
||||
assert(isinstance(u[0], FileInterval))
|
||||
|
||||
# they're still the same though
|
||||
assert(i == f == u)
|
||||
|
||||
# just for coverage
|
||||
assert_raises(IntervalError, fiset("[]")[0].subset, f[0].start, f[0].end)
|
|
@ -1,188 +0,0 @@
|
|||
from nilmdb import Interval, IntervalSet, IntervalError
|
||||
from datetime import datetime
|
||||
from nose.tools import assert_raises
|
||||
import itertools
|
||||
|
||||
def test_interval():
|
||||
"""Test the Interval class"""
|
||||
d1 = datetime.strptime("19801205","%Y%m%d")
|
||||
d2 = datetime.strptime("19900216","%Y%m%d")
|
||||
d3 = datetime.strptime("20111205","%Y%m%d")
|
||||
|
||||
# basic construction
|
||||
i = Interval(d1, d1)
|
||||
i = Interval(d1, d3)
|
||||
assert(i.start == d1)
|
||||
assert(i.end == d3)
|
||||
|
||||
# assignment should work
|
||||
i.start = d2
|
||||
try:
|
||||
i.end = d1
|
||||
raise Exception("should have died there")
|
||||
except IntervalError:
|
||||
pass
|
||||
i.start = d1
|
||||
i.end = d2
|
||||
|
||||
# end before start
|
||||
assert_raises(IntervalError, Interval, d3, d1)
|
||||
|
||||
# wrong type
|
||||
assert_raises(IntervalError, Interval, 1, 2)
|
||||
|
||||
# compare
|
||||
assert(Interval(d1, d2) == Interval(d1, d2))
|
||||
assert(Interval(d1, d2) < Interval(d1, d3))
|
||||
assert(Interval(d1, d3) > Interval(d1, d2))
|
||||
assert(Interval(d1, d2) < Interval(d2, d3))
|
||||
assert(Interval(d1, d3) < Interval(d2, d3))
|
||||
assert(Interval(d2, d2) > Interval(d1, d3))
|
||||
assert(Interval(d3, d3) == Interval(d3, d3))
|
||||
assert_raises(TypeError, cmp, i, 123)
|
||||
|
||||
# subset
|
||||
assert(Interval(d1, d3).subset(d1, d2) == Interval(d1, d2))
|
||||
assert_raises(IntervalError, Interval(d2, d3).subset, d1, d2)
|
||||
|
||||
# append
|
||||
assert(Interval(d1, d2).is_adjacent(Interval(d2,d3)))
|
||||
assert(Interval(d2, d3).is_adjacent(Interval(d1,d2)))
|
||||
assert(not Interval(d2, d3).is_adjacent(Interval(d1,d3)))
|
||||
assert_raises(TypeError, Interval(d1, d2).is_adjacent, 1)
|
||||
|
||||
# misc
|
||||
assert(repr(i) == repr(eval(repr(i).replace("datetime.",""))))
|
||||
assert(str(i) == "[1980-12-05 00:00:00 -> 1990-02-16 00:00:00]")
|
||||
|
||||
def test_interval_intersect():
|
||||
"""Test Interval intersections"""
|
||||
dates = [ datetime.strptime(year, "%y") for year in [ "00", "01", "02", "03" ] ]
|
||||
perm = list(itertools.permutations(dates, 2))
|
||||
prod = list(itertools.product(perm, perm))
|
||||
should_intersect = {
|
||||
False: [4, 5, 8, 20, 48, 56, 60, 96, 97, 100],
|
||||
True: [0, 1, 2, 12, 13, 14, 16, 17, 24, 25, 26, 28, 29,
|
||||
32, 49, 50, 52, 53, 61, 62, 64, 65, 68, 98, 101, 104]}
|
||||
for i,((a,b),(c,d)) in enumerate(prod):
|
||||
try:
|
||||
i1 = Interval(a, b)
|
||||
i2 = Interval(c, d)
|
||||
assert(i1.intersects(i2) == i2.intersects(i1))
|
||||
assert(i in should_intersect[i1.intersects(i2)])
|
||||
except IntervalError:
|
||||
assert(i not in should_intersect[True] and
|
||||
i not in should_intersect[False])
|
||||
assert_raises(TypeError, i1.intersects, 1234)
|
||||
|
||||
def test_intervalset_construct():
|
||||
"""Test interval set construction"""
|
||||
dates = [ datetime.strptime(year, "%y") for year in [ "00", "01", "02", "03" ]]
|
||||
|
||||
a = Interval(dates[0], dates[1])
|
||||
b = Interval(dates[1], dates[2])
|
||||
c = Interval(dates[0], dates[2])
|
||||
d = Interval(dates[2], dates[3])
|
||||
|
||||
iseta = IntervalSet(a)
|
||||
isetb = IntervalSet([a, b])
|
||||
isetc = IntervalSet([a])
|
||||
assert(iseta != isetb)
|
||||
assert(iseta == isetc)
|
||||
assert(iseta != 3)
|
||||
assert(IntervalSet(a) != IntervalSet(b))
|
||||
|
||||
assert_raises(TypeError, cmp, iseta, isetb)
|
||||
assert_raises(IntervalError, IntervalSet, [a, b, c])
|
||||
assert_raises(TypeError, IntervalSet, [1, 2])
|
||||
|
||||
iset = IntervalSet(isetb) # test iterator
|
||||
assert(iset == isetb)
|
||||
assert(len(iset) == 2)
|
||||
assert(len(IntervalSet()) == 0)
|
||||
|
||||
# Test adding
|
||||
iset = IntervalSet(a)
|
||||
iset += IntervalSet(b)
|
||||
assert(iset == IntervalSet([a, b]))
|
||||
iset = IntervalSet(a)
|
||||
iset += b
|
||||
assert(iset == IntervalSet([a, b]))
|
||||
iset = IntervalSet(a) + IntervalSet(b)
|
||||
assert(iset == IntervalSet([a, b]))
|
||||
iset = IntervalSet(b) + a
|
||||
assert(iset == IntervalSet([a, b]))
|
||||
|
||||
# A set consisting of [0-1],[1-2] should match a set consisting of [0-2]
|
||||
assert(IntervalSet([a,b]) == IntervalSet([c]))
|
||||
# Etc
|
||||
assert(IntervalSet([a,d]) != IntervalSet([c]))
|
||||
assert(IntervalSet([c]) != IntervalSet([a,d]))
|
||||
assert(IntervalSet([c,d]) != IntervalSet([b,d]))
|
||||
|
||||
# misc
|
||||
assert(repr(iset) == repr(eval(repr(iset).replace("datetime.",""))))
|
||||
|
||||
def iset(string):
|
||||
"""Build an IntervalSet from a string, for testing purposes
|
||||
|
||||
Each character is a year
|
||||
[ = interval start
|
||||
| = interval end + adjacent start
|
||||
] = interval end
|
||||
anything else is ignored
|
||||
"""
|
||||
iset = IntervalSet()
|
||||
for i, c in enumerate(string):
|
||||
day = datetime.strptime("{0:04d}".format(i+2000), "%Y")
|
||||
if (c == "["):
|
||||
start = day
|
||||
elif (c == "|"):
|
||||
iset += Interval(start, day)
|
||||
start = day
|
||||
elif (c == "]"):
|
||||
iset += Interval(start, day)
|
||||
del start
|
||||
return iset
|
||||
|
||||
def test_intervalset_iset():
|
||||
"""Test basic iset construction"""
|
||||
assert(iset(" [----] ") ==
|
||||
iset(" [-|--] "))
|
||||
|
||||
assert(iset("[] [--] ") +
|
||||
iset(" [] [--]") ==
|
||||
iset("[|] [-----]"))
|
||||
|
||||
def test_intervalset_intsersect():
|
||||
"""Test intersection (&)"""
|
||||
assert_raises(TypeError, iset("[--]").__and__, 1234)
|
||||
|
||||
assert(iset("[---------]") &
|
||||
iset(" [---] ") ==
|
||||
iset(" [---] "))
|
||||
|
||||
assert(iset(" [---] ") &
|
||||
iset("[---------]") ==
|
||||
iset(" [---] "))
|
||||
|
||||
assert(iset(" [-----]") &
|
||||
iset(" [-----] ") ==
|
||||
iset(" [--] "))
|
||||
|
||||
assert(iset(" [---]") &
|
||||
iset(" [--] ") ==
|
||||
iset(" "))
|
||||
|
||||
assert(iset(" [-|---]") &
|
||||
iset(" [-----|-] ") ==
|
||||
iset(" [----] "))
|
||||
|
||||
assert(iset(" [-|-] ") &
|
||||
iset(" [-|--|--] ") ==
|
||||
iset(" [---] "))
|
||||
|
||||
assert(iset(" [----][--]") &
|
||||
iset("[-] [--] []") ==
|
||||
iset(" [] [-] []"))
|
||||
|
16
nilmdb/utils/__init__.py
Normal file
16
nilmdb/utils/__init__.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
"""NilmDB utilities"""
|
||||
|
||||
|
||||
from nilmdb.utils.timer import Timer
|
||||
from nilmdb.utils.serializer import serializer_proxy
|
||||
from nilmdb.utils.lrucache import lru_cache
|
||||
from nilmdb.utils.diskusage import du, human_size
|
||||
from nilmdb.utils.mustclose import must_close
|
||||
from nilmdb.utils import atomic
|
||||
import nilmdb.utils.threadsafety
|
||||
import nilmdb.utils.fallocate
|
||||
import nilmdb.utils.time
|
||||
import nilmdb.utils.iterator
|
||||
import nilmdb.utils.interval
|
||||
import nilmdb.utils.lock
|
||||
import nilmdb.utils.sort
|
19
nilmdb/utils/atomic.py
Normal file
19
nilmdb/utils/atomic.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Atomic file writing helper.
|
||||
|
||||
import os
|
||||
|
||||
|
||||
def replace_file(filename, content):
|
||||
"""Attempt to atomically and durably replace the filename with the
|
||||
given contents"""
|
||||
|
||||
newfilename = filename + b".new"
|
||||
|
||||
# Write to new file, flush it
|
||||
with open(newfilename, "wb") as f:
|
||||
f.write(content)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
|
||||
# Move new file over old one
|
||||
os.replace(newfilename, filename)
|
36
nilmdb/utils/diskusage.py
Normal file
36
nilmdb/utils/diskusage.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
import os
|
||||
import errno
|
||||
from math import log
|
||||
|
||||
|
||||
def human_size(num):
|
||||
"""Human friendly file size"""
|
||||
unit_list = list(zip(['bytes', 'kiB', 'MiB', 'GiB', 'TiB'],
|
||||
[0, 0, 1, 2, 2]))
|
||||
if num == 0:
|
||||
return '0 bytes'
|
||||
if num == 1:
|
||||
return '1 byte'
|
||||
exponent = min(int(log(num, 1024)), len(unit_list) - 1)
|
||||
quotient = float(num) / 1024**exponent
|
||||
unit, num_decimals = unit_list[exponent]
|
||||
format_string = '{:.%sf} {}' % (num_decimals)
|
||||
return format_string.format(quotient, unit)
|
||||
|
||||
|
||||
def du(path):
|
||||
"""Like du -sb, returns total size of path in bytes. Ignore
|
||||
errors that might occur if we encounter broken symlinks or
|
||||
files in the process of being removed."""
|
||||
try:
|
||||
st = os.stat(path)
|
||||
size = st.st_blocks * 512
|
||||
if os.path.isdir(path):
|
||||
for thisfile in os.listdir(path):
|
||||
filepath = os.path.join(path, thisfile)
|
||||
size += du(filepath)
|
||||
return size
|
||||
except OSError as e:
|
||||
if e.errno != errno.ENOENT:
|
||||
raise
|
||||
return 0
|
20
nilmdb/utils/fallocate.py
Normal file
20
nilmdb/utils/fallocate.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
# Implementation of hole punching via fallocate, if the OS
|
||||
# and filesystem support it.
|
||||
|
||||
import fallocate
|
||||
|
||||
|
||||
def punch_hole(filename, offset, length, ignore_errors=True):
|
||||
"""Punch a hole in the file. This isn't well supported, so errors
|
||||
are ignored by default."""
|
||||
try:
|
||||
with open(filename, "r+") as f:
|
||||
fallocate.fallocate(
|
||||
f.fileno(),
|
||||
offset,
|
||||
length,
|
||||
fallocate.FALLOC_FL_KEEP_SIZE | fallocate.FALLOC_FL_PUNCH_HOLE)
|
||||
except Exception:
|
||||
if ignore_errors:
|
||||
return
|
||||
raise
|
168
nilmdb/utils/interval.py
Normal file
168
nilmdb/utils/interval.py
Normal file
|
@ -0,0 +1,168 @@
|
|||
"""Interval. Like nilmdb.server.interval, but re-implemented here
|
||||
in plain Python so clients have easier access to it, and with a few
|
||||
helper functions.
|
||||
|
||||
Intervals are half-open, ie. they include data points with timestamps
|
||||
[start, end)
|
||||
"""
|
||||
|
||||
import nilmdb.utils.time
|
||||
import nilmdb.utils.iterator
|
||||
|
||||
|
||||
class IntervalError(Exception):
|
||||
"""Error due to interval overlap, etc"""
|
||||
pass
|
||||
|
||||
|
||||
# Interval
|
||||
class Interval:
|
||||
"""Represents an interval of time."""
|
||||
|
||||
def __init__(self, start, end):
|
||||
"""
|
||||
'start' and 'end' are arbitrary numbers that represent time
|
||||
"""
|
||||
if start >= end:
|
||||
# Explicitly disallow zero-width intervals, since they're half-open
|
||||
raise IntervalError("start %s must precede end %s" % (start, end))
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
def __repr__(self):
|
||||
s = repr(self.start) + ", " + repr(self.end)
|
||||
return self.__class__.__name__ + "(" + s + ")"
|
||||
|
||||
def __str__(self):
|
||||
return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) +
|
||||
" -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")")
|
||||
|
||||
def human_string(self):
|
||||
return ("[ " + nilmdb.utils.time.timestamp_to_human(self.start) +
|
||||
" -> " + nilmdb.utils.time.timestamp_to_human(self.end) + " ]")
|
||||
|
||||
# Compare two intervals. If non-equal, order by start then end
|
||||
def __lt__(self, other):
|
||||
return (self.start, self.end) < (other.start, other.end)
|
||||
|
||||
def __gt__(self, other):
|
||||
return (self.start, self.end) > (other.start, other.end)
|
||||
|
||||
def __le__(self, other):
|
||||
return (self.start, self.end) <= (other.start, other.end)
|
||||
|
||||
def __ge__(self, other):
|
||||
return (self.start, self.end) >= (other.start, other.end)
|
||||
|
||||
def __eq__(self, other):
|
||||
return (self.start, self.end) == (other.start, other.end)
|
||||
|
||||
def __ne__(self, other):
|
||||
return (self.start, self.end) != (other.start, other.end)
|
||||
|
||||
def intersects(self, other):
|
||||
"""Return True if two Interval objects intersect"""
|
||||
if not isinstance(other, Interval):
|
||||
raise TypeError("need an Interval")
|
||||
if self.end <= other.start or self.start >= other.end:
|
||||
return False
|
||||
return True
|
||||
|
||||
def subset(self, start, end):
|
||||
"""Return a new Interval that is a subset of this one"""
|
||||
# A subclass that tracks additional data might override this.
|
||||
if start < self.start or end > self.end:
|
||||
raise IntervalError("not a subset")
|
||||
return Interval(start, end)
|
||||
|
||||
|
||||
def _interval_math_helper(a, b, op, subset=True):
|
||||
"""Helper for set_difference, intersection functions,
|
||||
to compute interval subsets based on a math operator on ranges
|
||||
present in A and B. Subsets are computed from A, or new intervals
|
||||
are generated if subset = False."""
|
||||
# Iterate through all starts and ends in sorted order. Add a
|
||||
# tag to the iterator so that we can figure out which one they
|
||||
# were, after sorting.
|
||||
def decorate(it, key_start, key_end):
|
||||
for i in it:
|
||||
yield i.start, key_start, i
|
||||
yield i.end, key_end, i
|
||||
a_iter = decorate(iter(a), 0, 2)
|
||||
b_iter = decorate(iter(b), 1, 3)
|
||||
|
||||
# Now iterate over the timestamps of each start and end.
|
||||
# At each point, evaluate which type of end it is, to determine
|
||||
# how to build up the output intervals.
|
||||
a_interval = None
|
||||
in_a = False
|
||||
in_b = False
|
||||
out_start = None
|
||||
for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter):
|
||||
if k == 0:
|
||||
a_interval = i
|
||||
in_a = True
|
||||
elif k == 1:
|
||||
in_b = True
|
||||
elif k == 2:
|
||||
in_a = False
|
||||
else: # k == 3
|
||||
in_b = False
|
||||
include = op(in_a, in_b)
|
||||
if include and out_start is None:
|
||||
out_start = ts
|
||||
elif not include:
|
||||
if out_start is not None and out_start != ts:
|
||||
if subset:
|
||||
yield a_interval.subset(out_start, ts)
|
||||
else:
|
||||
yield Interval(out_start, ts)
|
||||
out_start = None
|
||||
|
||||
|
||||
def set_difference(a, b):
|
||||
"""
|
||||
Compute the difference (a \\ b) between the intervals in 'a' and
|
||||
the intervals in 'b'; i.e., the ranges that are present in 'self'
|
||||
but not 'other'.
|
||||
|
||||
'a' and 'b' must both be iterables.
|
||||
|
||||
Returns a generator that yields each interval in turn.
|
||||
Output intervals are built as subsets of the intervals in the
|
||||
first argument (a).
|
||||
"""
|
||||
return _interval_math_helper(a, b, (lambda a, b: a and not b))
|
||||
|
||||
|
||||
def intersection(a, b):
|
||||
"""
|
||||
Compute the intersection between the intervals in 'a' and the
|
||||
intervals in 'b'; i.e., the ranges that are present in both 'a'
|
||||
and 'b'.
|
||||
|
||||
'a' and 'b' must both be iterables.
|
||||
|
||||
Returns a generator that yields each interval in turn.
|
||||
Output intervals are built as subsets of the intervals in the
|
||||
first argument (a).
|
||||
"""
|
||||
return _interval_math_helper(a, b, (lambda a, b: a and b))
|
||||
|
||||
|
||||
def optimize(it):
|
||||
"""
|
||||
Given an iterable 'it' with intervals, optimize them by joining
|
||||
together intervals that are adjacent in time, and return a generator
|
||||
that yields the new intervals.
|
||||
"""
|
||||
saved_int = None
|
||||
for interval in it:
|
||||
if saved_int is not None:
|
||||
if saved_int.end == interval.start:
|
||||
interval.start = saved_int.start
|
||||
else:
|
||||
yield saved_int
|
||||
saved_int = interval
|
||||
if saved_int is not None:
|
||||
yield saved_int
|
38
nilmdb/utils/iterator.py
Normal file
38
nilmdb/utils/iterator.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
# Misc iterator tools
|
||||
|
||||
# Iterator merging, based on http://code.activestate.com/recipes/491285/
|
||||
import heapq
|
||||
|
||||
|
||||
def imerge(*iterables):
|
||||
'''Merge multiple sorted inputs into a single sorted output.
|
||||
|
||||
Equivalent to: sorted(itertools.chain(*iterables))
|
||||
|
||||
>>> list(imerge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
|
||||
[0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
|
||||
|
||||
'''
|
||||
heappop, siftup, _Stop = heapq.heappop, heapq._siftup, StopIteration
|
||||
|
||||
h = []
|
||||
h_append = h.append
|
||||
for it in map(iter, iterables):
|
||||
try:
|
||||
nexter = it.__next__
|
||||
h_append([nexter(), nexter])
|
||||
except _Stop:
|
||||
pass
|
||||
heapq.heapify(h)
|
||||
|
||||
while 1:
|
||||
try:
|
||||
while 1:
|
||||
v, nexter = s = h[0] # raises IndexError when h is empty
|
||||
yield v
|
||||
s[0] = nexter() # raises StopIteration when exhausted
|
||||
siftup(h, 0) # restore heap condition
|
||||
except _Stop:
|
||||
heappop(h) # remove empty iterator
|
||||
except IndexError:
|
||||
return
|
22
nilmdb/utils/lock.py
Normal file
22
nilmdb/utils/lock.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
# File locking
|
||||
|
||||
import fcntl
|
||||
import errno
|
||||
|
||||
|
||||
def exclusive_lock(f):
|
||||
"""Acquire an exclusive lock. Returns True on successful
|
||||
lock, or False on error."""
|
||||
try:
|
||||
fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except IOError as e:
|
||||
if e.errno in (errno.EACCES, errno.EAGAIN):
|
||||
return False
|
||||
else:
|
||||
raise
|
||||
return True
|
||||
|
||||
|
||||
def exclusive_unlock(f):
|
||||
"""Release an exclusive lock."""
|
||||
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
|
80
nilmdb/utils/lrucache.py
Normal file
80
nilmdb/utils/lrucache.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
# Memoize a function's return value with a least-recently-used cache
|
||||
# Based on:
|
||||
# http://code.activestate.com/recipes/498245-lru-and-lfu-cache-decorators/
|
||||
# with added 'destructor' functionality.
|
||||
|
||||
import collections
|
||||
import decorator
|
||||
|
||||
|
||||
def lru_cache(size=10, onremove=None, keys=slice(None)):
|
||||
"""Least-recently-used cache decorator.
|
||||
|
||||
@lru_cache(size=10, onremove=None)
|
||||
def f(...):
|
||||
pass
|
||||
|
||||
Given a function and arguments, memoize its return value. Up to
|
||||
'size' elements are cached. 'keys' is a slice object that
|
||||
represents which arguments are used as the cache key.
|
||||
|
||||
When evicting a value from the cache, call the function
|
||||
'onremove' with the value that's being evicted.
|
||||
|
||||
Call f.cache_remove(...) to evict the cache entry with the given
|
||||
arguments. Call f.cache_remove_all() to evict all entries.
|
||||
f.cache_hits and f.cache_misses give statistics.
|
||||
"""
|
||||
|
||||
def decorate(func):
|
||||
cache = collections.OrderedDict() # order: least- to most-recent
|
||||
|
||||
def evict(value):
|
||||
if onremove:
|
||||
onremove(value)
|
||||
|
||||
def wrapper(orig, *args, **kwargs):
|
||||
if kwargs:
|
||||
raise NotImplementedError("kwargs not supported")
|
||||
key = args[keys]
|
||||
try:
|
||||
value = cache.pop(key)
|
||||
orig.cache_hits += 1
|
||||
except KeyError:
|
||||
value = orig(*args)
|
||||
orig.cache_misses += 1
|
||||
if len(cache) >= size:
|
||||
evict(cache.popitem(0)[1]) # evict LRU cache entry
|
||||
cache[key] = value # (re-)insert this key at end
|
||||
return value
|
||||
|
||||
def cache_remove(*args):
|
||||
"""Remove the described key from this cache, if present."""
|
||||
key = args
|
||||
if key in cache:
|
||||
evict(cache.pop(key))
|
||||
else:
|
||||
if cache:
|
||||
if len(args) != len(next(iter(cache.keys()))):
|
||||
raise KeyError("trying to remove from LRU cache, but "
|
||||
"number of arguments doesn't match the "
|
||||
"cache key length")
|
||||
|
||||
def cache_remove_all():
|
||||
nonlocal cache
|
||||
for key in cache:
|
||||
evict(cache[key])
|
||||
cache = collections.OrderedDict()
|
||||
|
||||
def cache_info():
|
||||
return (func.cache_hits, func.cache_misses)
|
||||
|
||||
new = decorator.decorator(wrapper, func)
|
||||
func.cache_hits = 0
|
||||
func.cache_misses = 0
|
||||
new.cache_info = cache_info
|
||||
new.cache_remove = cache_remove
|
||||
new.cache_remove_all = cache_remove_all
|
||||
return new
|
||||
|
||||
return decorate
|
71
nilmdb/utils/mustclose.py
Normal file
71
nilmdb/utils/mustclose.py
Normal file
|
@ -0,0 +1,71 @@
|
|||
import sys
|
||||
import inspect
|
||||
import decorator
|
||||
from nilmdb.utils.printf import fprintf
|
||||
|
||||
|
||||
def must_close(errorfile=sys.stderr, wrap_verify=False):
|
||||
"""Class decorator that warns on 'errorfile' at deletion time if
|
||||
the class's close() member wasn't called.
|
||||
|
||||
If 'wrap_verify' is True, every class method is wrapped with a
|
||||
verifier that will raise AssertionError if the .close() method has
|
||||
already been called."""
|
||||
def class_decorator(cls):
|
||||
|
||||
def is_method_or_function(x):
|
||||
return inspect.ismethod(x) or inspect.isfunction(x)
|
||||
|
||||
def wrap_class_method(wrapper):
|
||||
try:
|
||||
orig = getattr(cls, wrapper.__name__)
|
||||
except AttributeError:
|
||||
orig = lambda x: None
|
||||
if is_method_or_function(orig):
|
||||
setattr(cls, wrapper.__name__,
|
||||
decorator.decorator(wrapper, orig))
|
||||
|
||||
@wrap_class_method
|
||||
def __init__(orig, self, *args, **kwargs):
|
||||
ret = orig(self, *args, **kwargs)
|
||||
self.__dict__["_must_close"] = True
|
||||
self.__dict__["_must_close_initialized"] = True
|
||||
return ret
|
||||
|
||||
@wrap_class_method
|
||||
def __del__(orig, self, *args, **kwargs):
|
||||
try:
|
||||
if "_must_close" in self.__dict__:
|
||||
fprintf(errorfile, "error: %s.close() wasn't called!\n",
|
||||
self.__class__.__name__)
|
||||
return orig(self, *args, **kwargs)
|
||||
except:
|
||||
pass
|
||||
|
||||
@wrap_class_method
|
||||
def close(orig, self, *args, **kwargs):
|
||||
if "_must_close" in self.__dict__:
|
||||
del self._must_close
|
||||
return orig(self, *args, **kwargs)
|
||||
|
||||
# Optionally wrap all other functions
|
||||
def verifier(orig, self, *args, **kwargs):
|
||||
if ("_must_close" not in self.__dict__ and
|
||||
"_must_close_initialized" in self.__dict__):
|
||||
raise AssertionError("called " + str(orig) + " after close")
|
||||
return orig(self, *args, **kwargs)
|
||||
if wrap_verify:
|
||||
for (name, method) in inspect.getmembers(cls,
|
||||
is_method_or_function):
|
||||
# Skip some methods
|
||||
if name in ["__del__", "__init__"]:
|
||||
continue
|
||||
# Set up wrapper
|
||||
if inspect.ismethod(method):
|
||||
func = method.__func__
|
||||
else:
|
||||
func = method
|
||||
setattr(cls, name, decorator.decorator(verifier, func))
|
||||
|
||||
return cls
|
||||
return class_decorator
|
13
nilmdb/utils/printf.py
Normal file
13
nilmdb/utils/printf.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
"""printf, fprintf, sprintf"""
|
||||
|
||||
|
||||
def printf(_str, *args):
|
||||
print(_str % args, end='')
|
||||
|
||||
|
||||
def fprintf(_file, _str, *args):
|
||||
print(_str % args, end='', file=_file)
|
||||
|
||||
|
||||
def sprintf(_str, *args):
|
||||
return (_str % args)
|
134
nilmdb/utils/serializer.py
Normal file
134
nilmdb/utils/serializer.py
Normal file
|
@ -0,0 +1,134 @@
|
|||
import queue
|
||||
import threading
|
||||
import sys
|
||||
|
||||
# This file provides a class that will wrap an object and serialize
|
||||
# all calls to its methods. All calls to that object will be queued
|
||||
# and executed from a single thread, regardless of which thread makes
|
||||
# the call.
|
||||
|
||||
# Based partially on http://stackoverflow.com/questions/2642515/
|
||||
|
||||
|
||||
class SerializerThread(threading.Thread):
|
||||
"""Thread that retrieves call information from the queue, makes the
|
||||
call, and returns the results."""
|
||||
def __init__(self, classname, call_queue):
|
||||
threading.Thread.__init__(self)
|
||||
self.name = "Serializer-" + classname + "-" + self.name
|
||||
self.call_queue = call_queue
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
result_queue, func, args, kwargs = self.call_queue.get()
|
||||
# Terminate if result_queue is None
|
||||
if result_queue is None:
|
||||
return
|
||||
exception = None
|
||||
result = None
|
||||
try:
|
||||
result = func(*args, **kwargs) # wrapped
|
||||
except:
|
||||
exception = sys.exc_info()
|
||||
# Ensure we delete these before returning a result, so
|
||||
# we don't unncessarily hold onto a reference while
|
||||
# we're waiting for the next call.
|
||||
del func, args, kwargs
|
||||
result_queue.put((exception, result))
|
||||
del exception, result
|
||||
|
||||
|
||||
def serializer_proxy(obj_or_type):
|
||||
"""Wrap the given object or type in a SerializerObjectProxy.
|
||||
|
||||
Returns a SerializerObjectProxy object that proxies all method
|
||||
calls to the object, as well as attribute retrievals.
|
||||
|
||||
The proxied requests, including instantiation, are performed in a
|
||||
single thread and serialized between caller threads.
|
||||
"""
|
||||
class SerializerCallProxy():
|
||||
def __init__(self, call_queue, func, objectproxy):
|
||||
self.call_queue = call_queue
|
||||
self.func = func
|
||||
# Need to hold a reference to object proxy so it doesn't
|
||||
# go away (and kill the thread) until after get called.
|
||||
self.objectproxy = objectproxy
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
result_queue = queue.Queue()
|
||||
self.call_queue.put((result_queue, self.func, args, kwargs))
|
||||
(exc_info, result) = result_queue.get()
|
||||
if exc_info is None:
|
||||
return result
|
||||
else:
|
||||
raise exc_info[1].with_traceback(exc_info[2])
|
||||
|
||||
class SerializerObjectProxy():
|
||||
def __init__(self, obj_or_type, *args, **kwargs):
|
||||
self.__object = obj_or_type
|
||||
if isinstance(obj_or_type, type):
|
||||
classname = obj_or_type.__name__
|
||||
else:
|
||||
classname = obj_or_type.__class__.__name__
|
||||
self.__call_queue = queue.Queue()
|
||||
self.__thread = SerializerThread(classname, self.__call_queue)
|
||||
self.__thread.daemon = True
|
||||
self.__thread.start()
|
||||
self._thread_safe = True
|
||||
|
||||
def __getattr__(self, key):
|
||||
# If the attribute is a function, we want to return a
|
||||
# proxy that will perform the call through the serializer
|
||||
# when called. Otherwise, we want to return the value
|
||||
# directly. This means we need to grab the attribute once,
|
||||
# and therefore self.__object.__getattr__ may be called
|
||||
# in an unsafe way, from the caller's thread.
|
||||
attr = getattr(self.__object, key)
|
||||
if not callable(attr):
|
||||
# It's not callable, so perform the getattr from within
|
||||
# the serializer thread, then return its value.
|
||||
# That may differ from the "attr" value we just grabbed
|
||||
# from here, due to forced ordering in the serializer.
|
||||
getter = SerializerCallProxy(self.__call_queue, getattr, self)
|
||||
return getter(self.__object, key)
|
||||
else:
|
||||
# It is callable, so return an object that will proxy through
|
||||
# the serializer when called.
|
||||
r = SerializerCallProxy(self.__call_queue, attr, self)
|
||||
return r
|
||||
|
||||
# For an interable object, on __iter__(), save the object's
|
||||
# iterator and return this proxy. On next(), call the object's
|
||||
# iterator through this proxy.
|
||||
def __iter__(self):
|
||||
attr = getattr(self.__object, "__iter__")
|
||||
self.__iter = SerializerCallProxy(self.__call_queue, attr, self)()
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
return SerializerCallProxy(self.__call_queue,
|
||||
self.__iter.__next__, self)()
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.__getattr__("__getitem__")(key)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""Call this to instantiate the type, if a type was passed
|
||||
to serializer_proxy. Otherwise, pass the call through."""
|
||||
ret = SerializerCallProxy(self.__call_queue,
|
||||
self.__object, self)(*args, **kwargs)
|
||||
if isinstance(self.__object, type):
|
||||
# Instantiation
|
||||
self.__object = ret
|
||||
return self
|
||||
return ret
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
# Signal thread to exit, but don't wait for it.
|
||||
self.__call_queue.put((None, None, None, None))
|
||||
except:
|
||||
pass
|
||||
|
||||
return SerializerObjectProxy(obj_or_type)
|
19
nilmdb/utils/sort.py
Normal file
19
nilmdb/utils/sort.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import re
|
||||
|
||||
|
||||
def sort_human(items, key=None):
|
||||
"""Human-friendly sort (/stream/2 before /stream/10)"""
|
||||
def to_num(val):
|
||||
try:
|
||||
return int(val)
|
||||
except Exception:
|
||||
return val
|
||||
|
||||
def human_key(text):
|
||||
if key:
|
||||
text = key(text)
|
||||
# Break into character and numeric chunks.
|
||||
chunks = re.split(r'([0-9]+)', text)
|
||||
return [to_num(c) for c in chunks]
|
||||
|
||||
return sorted(items, key=human_key)
|
97
nilmdb/utils/threadsafety.py
Normal file
97
nilmdb/utils/threadsafety.py
Normal file
|
@ -0,0 +1,97 @@
|
|||
import threading
|
||||
from nilmdb.utils.printf import sprintf
|
||||
|
||||
|
||||
def verify_proxy(obj_or_type, check_thread=True,
|
||||
check_concurrent=True):
|
||||
"""Wrap the given object or type in a VerifyObjectProxy.
|
||||
|
||||
Returns a VerifyObjectProxy that proxies all method calls to the
|
||||
given object, as well as attribute retrievals.
|
||||
|
||||
When calling methods, the following checks are performed. On
|
||||
failure, an exception is raised.
|
||||
|
||||
check_thread = True # Fail if two different threads call methods.
|
||||
check_concurrent = True # Fail if two functions are concurrently
|
||||
# run through this proxy
|
||||
"""
|
||||
class Namespace():
|
||||
pass
|
||||
|
||||
class VerifyCallProxy():
|
||||
def __init__(self, func, parent_namespace):
|
||||
self.func = func
|
||||
self.parent_namespace = parent_namespace
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
p = self.parent_namespace
|
||||
this = threading.current_thread()
|
||||
try:
|
||||
callee = self.func.__name__
|
||||
except AttributeError:
|
||||
callee = "???"
|
||||
|
||||
if p.thread is None:
|
||||
p.thread = this
|
||||
p.thread_callee = callee
|
||||
|
||||
if check_thread and p.thread != this:
|
||||
err = sprintf("unsafe threading: %s called %s.%s,"
|
||||
" but %s called %s.%s",
|
||||
p.thread.name, p.classname, p.thread_callee,
|
||||
this.name, p.classname, callee)
|
||||
raise AssertionError(err)
|
||||
|
||||
need_concur_unlock = False
|
||||
if check_concurrent:
|
||||
if not p.concur_lock.acquire(False):
|
||||
err = sprintf("unsafe concurrency: %s called %s.%s "
|
||||
"while %s is still in %s.%s",
|
||||
this.name, p.classname, callee,
|
||||
p.concur_tname, p.classname, p.concur_callee)
|
||||
raise AssertionError(err)
|
||||
else:
|
||||
p.concur_tname = this.name
|
||||
p.concur_callee = callee
|
||||
need_concur_unlock = True
|
||||
|
||||
try:
|
||||
ret = self.func(*args, **kwargs)
|
||||
finally:
|
||||
if need_concur_unlock:
|
||||
p.concur_lock.release()
|
||||
return ret
|
||||
|
||||
class VerifyObjectProxy():
|
||||
def __init__(self, obj_or_type, *args, **kwargs):
|
||||
p = Namespace()
|
||||
self.__ns = p
|
||||
p.thread = None
|
||||
p.thread_callee = None
|
||||
p.concur_lock = threading.Lock()
|
||||
p.concur_tname = None
|
||||
p.concur_callee = None
|
||||
self.__obj = obj_or_type
|
||||
if isinstance(obj_or_type, type):
|
||||
p.classname = self.__obj.__name__
|
||||
else:
|
||||
p.classname = self.__obj.__class__.__name__
|
||||
|
||||
def __getattr__(self, key):
|
||||
attr = getattr(self.__obj, key)
|
||||
if not callable(attr):
|
||||
return VerifyCallProxy(getattr, self.__ns)(self.__obj, key)
|
||||
return VerifyCallProxy(attr, self.__ns)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""Call this to instantiate the type, if a type was passed
|
||||
to verify_proxy. Otherwise, pass the call through."""
|
||||
ret = VerifyCallProxy(self.__obj, self.__ns)(*args, **kwargs)
|
||||
if isinstance(self.__obj, type):
|
||||
# Instantiation
|
||||
self.__obj = ret
|
||||
return self
|
||||
return ret
|
||||
|
||||
return VerifyObjectProxy(obj_or_type)
|
148
nilmdb/utils/time.py
Normal file
148
nilmdb/utils/time.py
Normal file
|
@ -0,0 +1,148 @@
|
|||
import re
|
||||
import time
|
||||
import datetime_tz
|
||||
|
||||
# Range
|
||||
min_timestamp = (-2**63)
|
||||
max_timestamp = (2**63 - 1)
|
||||
|
||||
# Smallest representable step
|
||||
epsilon = 1
|
||||
|
||||
|
||||
def string_to_timestamp(string):
|
||||
"""Convert a string that represents an integer number of microseconds
|
||||
since epoch."""
|
||||
try:
|
||||
# Parse a string like "1234567890123456" and return an integer
|
||||
return int(string)
|
||||
except ValueError:
|
||||
# Try parsing as a float, in case it's "1234567890123456.0"
|
||||
return int(round(float(string)))
|
||||
|
||||
|
||||
def timestamp_to_string(timestamp):
|
||||
"""Convert a timestamp (integer microseconds since epoch) to a string"""
|
||||
if isinstance(timestamp, float):
|
||||
return str(int(round(timestamp)))
|
||||
else:
|
||||
return str(timestamp)
|
||||
|
||||
|
||||
def timestamp_to_bytes(timestamp):
|
||||
"""Convert a timestamp (integer microseconds since epoch) to a Python
|
||||
bytes object"""
|
||||
return timestamp_to_string(timestamp).encode('utf-8')
|
||||
|
||||
|
||||
def timestamp_to_human(timestamp):
|
||||
"""Convert a timestamp (integer microseconds since epoch) to a
|
||||
human-readable string, using the local timezone for display
|
||||
(e.g. from the TZ env var)."""
|
||||
if timestamp == min_timestamp:
|
||||
return "(minimum)"
|
||||
if timestamp == max_timestamp:
|
||||
return "(maximum)"
|
||||
dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_unix(timestamp))
|
||||
return dt.strftime("%a, %d %b %Y %H:%M:%S.%f %z")
|
||||
|
||||
|
||||
def unix_to_timestamp(unix):
|
||||
"""Convert a Unix timestamp (floating point seconds since epoch)
|
||||
into a NILM timestamp (integer microseconds since epoch)"""
|
||||
return int(round(unix * 1e6))
|
||||
|
||||
|
||||
def timestamp_to_unix(timestamp):
|
||||
"""Convert a NILM timestamp (integer microseconds since epoch)
|
||||
into a Unix timestamp (floating point seconds since epoch)"""
|
||||
return timestamp / 1e6
|
||||
|
||||
|
||||
seconds_to_timestamp = unix_to_timestamp
|
||||
timestamp_to_seconds = timestamp_to_unix
|
||||
|
||||
|
||||
def rate_to_period(hz, cycles=1):
|
||||
"""Convert a rate (in Hz) to a period (in timestamp units).
|
||||
Returns an integer."""
|
||||
period = unix_to_timestamp(cycles) / float(hz)
|
||||
return int(round(period))
|
||||
|
||||
|
||||
def parse_time(toparse):
|
||||
"""
|
||||
Parse a free-form time string and return a nilmdb timestamp
|
||||
(integer microseconds since epoch). If the string doesn't contain a
|
||||
timestamp, the current local timezone is assumed (e.g. from the TZ
|
||||
env var).
|
||||
"""
|
||||
if toparse == "min":
|
||||
return min_timestamp
|
||||
if toparse == "max":
|
||||
return max_timestamp
|
||||
|
||||
# If it starts with @, treat it as a NILM timestamp
|
||||
# (integer microseconds since epoch)
|
||||
try:
|
||||
if toparse[0] == '@':
|
||||
return int(toparse[1:])
|
||||
except (ValueError, KeyError, IndexError):
|
||||
pass
|
||||
|
||||
# If string isn't "now" and doesn't contain at least 4 digits,
|
||||
# consider it invalid. smartparse might otherwise accept
|
||||
# empty strings and strings with just separators.
|
||||
if toparse != "now" and len(re.findall(r"\d", toparse)) < 4:
|
||||
raise ValueError("not enough digits for a timestamp")
|
||||
|
||||
# Try to just parse the time as given
|
||||
try:
|
||||
return unix_to_timestamp(datetime_tz.datetime_tz.
|
||||
smartparse(toparse).totimestamp())
|
||||
except (ValueError, OverflowError, TypeError):
|
||||
pass
|
||||
|
||||
# If it's parseable as a float, treat it as a Unix or NILM
|
||||
# timestamp based on its range.
|
||||
try:
|
||||
val = float(toparse)
|
||||
# range is from about year 2001 - 2128
|
||||
if 1e9 < val < 5e9:
|
||||
return unix_to_timestamp(val)
|
||||
if 1e15 < val < 5e15:
|
||||
return val
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Try to extract a substring in a condensed format that we expect
|
||||
# to see in a filename or header comment
|
||||
res = re.search(r"(^|[^\d])(" # non-numeric or SOL
|
||||
r"(199\d|2\d\d\d)" # year
|
||||
r"[-/]?" # separator
|
||||
r"(0[1-9]|1[012])" # month
|
||||
r"[-/]?" # separator
|
||||
r"([012]\d|3[01])" # day
|
||||
r"[-T ]?" # separator
|
||||
r"([01]\d|2[0-3])" # hour
|
||||
r"[:]?" # separator
|
||||
r"([0-5]\d)" # minute
|
||||
r"[:]?" # separator
|
||||
r"([0-5]\d)?" # second
|
||||
r"([-+]\d\d\d\d)?" # timezone
|
||||
r")", toparse)
|
||||
if res is not None:
|
||||
try:
|
||||
return unix_to_timestamp(datetime_tz.datetime_tz.
|
||||
smartparse(res.group(2)).totimestamp())
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Could also try to successively parse substrings, but let's
|
||||
# just give up for now.
|
||||
raise ValueError("unable to parse timestamp")
|
||||
|
||||
|
||||
def now():
|
||||
"""Return current timestamp"""
|
||||
return unix_to_timestamp(time.time())
|
22
nilmdb/utils/timer.py
Normal file
22
nilmdb/utils/timer.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Simple timer to time a block of code, for optimization debugging
|
||||
# use like:
|
||||
# with nilmdb.utils.Timer("flush"):
|
||||
# foo.flush()
|
||||
|
||||
import contextlib
|
||||
import time
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def Timer(name=None, tosyslog=False):
|
||||
start = time.time()
|
||||
yield
|
||||
elapsed = int((time.time() - start) * 1000)
|
||||
msg = (name or 'elapsed') + ": " + str(elapsed) + " ms"
|
||||
if tosyslog:
|
||||
import syslog
|
||||
syslog.syslog(msg)
|
||||
else:
|
||||
print(msg)
|
103
nilmdb/utils/timestamper.py
Normal file
103
nilmdb/utils/timestamper.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
"""File-like objects that add timestamps to the input lines"""
|
||||
|
||||
from nilmdb.utils.printf import sprintf
|
||||
import nilmdb.utils.time
|
||||
|
||||
|
||||
class Timestamper():
|
||||
"""A file-like object that adds timestamps to lines of an input file."""
|
||||
def __init__(self, infile, ts_iter):
|
||||
"""file: filename, or another file-like object
|
||||
ts_iter: iterator that returns a timestamp string for
|
||||
each line of the file"""
|
||||
if isinstance(infile, str):
|
||||
self.file = open(infile, "rb")
|
||||
else:
|
||||
self.file = infile
|
||||
self.ts_iter = ts_iter
|
||||
|
||||
def close(self):
|
||||
self.file.close()
|
||||
|
||||
def readline(self, *args):
|
||||
while True:
|
||||
line = self.file.readline(*args)
|
||||
if not line:
|
||||
return b""
|
||||
if line[0:1] == b'#':
|
||||
continue
|
||||
# For some reason, coverage on python 3.8 reports that
|
||||
# we never hit this break, even though we definitely do.
|
||||
break # pragma: no cover
|
||||
try:
|
||||
return next(self.ts_iter) + line
|
||||
except StopIteration:
|
||||
return b""
|
||||
|
||||
def readlines(self, size=None):
|
||||
out = b""
|
||||
while True:
|
||||
line = self.readline()
|
||||
out += line
|
||||
if not line or (size and len(out) >= size):
|
||||
break
|
||||
return out
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
result = self.readline()
|
||||
if not result:
|
||||
raise StopIteration
|
||||
return result
|
||||
|
||||
|
||||
class TimestamperRate(Timestamper):
|
||||
"""Timestamper that uses a start time and a fixed rate"""
|
||||
def __init__(self, infile, start, rate, end=None):
|
||||
"""
|
||||
file: file name or object
|
||||
|
||||
start: Unix timestamp for the first value
|
||||
|
||||
rate: 1/rate is added to the timestamp for each line
|
||||
|
||||
end: If specified, raise StopIteration before outputting a value
|
||||
greater than this."""
|
||||
timestamp_to_bytes = nilmdb.utils.time.timestamp_to_bytes
|
||||
rate_to_period = nilmdb.utils.time.rate_to_period
|
||||
|
||||
def iterator(start, rate, end):
|
||||
n = 0
|
||||
rate = float(rate)
|
||||
while True:
|
||||
now = start + rate_to_period(rate, n)
|
||||
if end and now >= end:
|
||||
return
|
||||
yield timestamp_to_bytes(now) + b" "
|
||||
n += 1
|
||||
Timestamper.__init__(self, infile, iterator(start, rate, end))
|
||||
self.start = start
|
||||
self.rate = rate
|
||||
|
||||
def __str__(self):
|
||||
return sprintf("TimestamperRate(..., start=\"%s\", rate=%g)",
|
||||
nilmdb.utils.time.timestamp_to_human(self.start),
|
||||
self.rate)
|
||||
|
||||
|
||||
class TimestamperNow(Timestamper):
|
||||
"""Timestamper that uses current time"""
|
||||
def __init__(self, infile):
|
||||
timestamp_to_bytes = nilmdb.utils.time.timestamp_to_bytes
|
||||
get_now = nilmdb.utils.time.now
|
||||
|
||||
def iterator():
|
||||
while True:
|
||||
yield timestamp_to_bytes(get_now()) + b" "
|
||||
|
||||
Timestamper.__init__(self, infile, iterator())
|
||||
|
||||
def __str__(self):
|
||||
return "TimestamperNow(...)"
|
|
@ -1,5 +0,0 @@
|
|||
all:
|
||||
time python test-indexed-read.py
|
||||
|
||||
clean:
|
||||
rm -f *pyc
|
|
@ -1,2 +0,0 @@
|
|||
New version from:
|
||||
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=641485#15
|
|
@ -1,12 +0,0 @@
|
|||
- Make CherryPy server that can handle simple GET/POST,
|
||||
and a httplib client that can talk to that server.
|
||||
Steps:
|
||||
- Make server handle GET
|
||||
- Make client send request, get response
|
||||
- Add request streaming to server
|
||||
- Add request streaming to client
|
||||
- Make server handle POST
|
||||
- Make client send request, get response
|
||||
- Add request streaming to server
|
||||
- Add request streaming to client
|
||||
- Integrate into a server process that also keeps database open.
|
|
@ -1,3 +0,0 @@
|
|||
Indexing time64 doesn't seem to work -- needed to do "time >= 1243052015" even though the actual database times
|
||||
should be something like 1243052015.847000. Let's switch to just using a 64-bit integer counting e.g.
|
||||
microseconds since 1970-01-01
|
|
@ -1,3 +0,0 @@
|
|||
timestamp > 1243052015
|
||||
took 394.5 minutes in vitables
|
||||
(2340 rows matched)
|
|
@ -1,53 +0,0 @@
|
|||
import sys
|
||||
import tables
|
||||
import nilmdb
|
||||
|
||||
try:
|
||||
import cherrypy
|
||||
cherrypy.tools.json_out
|
||||
except:
|
||||
sys.stderr.write("Cherrypy 3.2+ required\n")
|
||||
sys.exit(1)
|
||||
|
||||
class NilmApp:
|
||||
def __init__(self, db):
|
||||
self.db = db
|
||||
|
||||
class Root(NilmApp):
|
||||
"""NILM Database"""
|
||||
|
||||
server_version = "1.0"
|
||||
|
||||
@cherrypy.expose
|
||||
def index(self):
|
||||
raise cherrypy.NotFound()
|
||||
|
||||
@cherrypy.expose
|
||||
def favicon_ico(self):
|
||||
raise cherrypy.NotFound()
|
||||
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_out()
|
||||
def version(self):
|
||||
return self.server_version
|
||||
|
||||
class Stream(NilmApp):
|
||||
"""Stream operations"""
|
||||
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_out()
|
||||
def list(self):
|
||||
return
|
||||
|
||||
cherrypy.config.update({
|
||||
'server.socket_host': '127.0.0.1',
|
||||
'server.socket_port': 12380
|
||||
})
|
||||
|
||||
db = nilmdb.nilmdb()
|
||||
cherrypy.tree.mount(Root(db), "/")
|
||||
cherrypy.tree.mount(Stream(db), "/stream")
|
||||
|
||||
if __name__ == "__main__":
|
||||
cherrypy.engine.start()
|
||||
cherrypy.engine.block()
|
|
@ -1,16 +0,0 @@
|
|||
import tables
|
||||
import numpy
|
||||
|
||||
class RawSample(tables.IsDescription):
|
||||
timestamp = tables.UInt64Col()
|
||||
voltage = tables.UInt16Col(shape = 3)
|
||||
current = tables.UInt16Col(shape = 3)
|
||||
|
||||
h5file = tables.openFile("test.h5", mode = "w", title = "Test")
|
||||
group = h5file.createGroup("/", "raw", "Raw Data")
|
||||
table = h5file.createTable(group, "nilm1", RawSample, "NILM 1")
|
||||
|
||||
print repr(h5file)
|
||||
|
||||
# write rows
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
from tables import *
|
||||
import re
|
||||
import time
|
||||
|
||||
# A class to describe our data
|
||||
class PrepData(IsDescription):
|
||||
timestamp = Int64Col()
|
||||
p1 = Float32Col()
|
||||
q1 = Float32Col()
|
||||
p3 = Float32Col()
|
||||
q3 = Float32Col()
|
||||
p5 = Float32Col()
|
||||
q5 = Float32Col()
|
||||
p7 = Float32Col()
|
||||
q7 = Float32Col()
|
||||
|
||||
filename = "test.h5"
|
||||
h5file = openFile(filename, mode = "w", title = "NILM Test")
|
||||
|
||||
group = h5file.createGroup("/", "newton", "Newton school")
|
||||
table = h5file.createTable(group, "prep", PrepData, "Prep Data", expectedrows = 120 * 86400 * 90)
|
||||
|
||||
table.cols.timestamp.createIndex()
|
||||
|
||||
for i in range(0, 80):
|
||||
# Open file
|
||||
data = open("data/alldata")
|
||||
count = 0
|
||||
oldtime = time.time()
|
||||
prep = table.row
|
||||
for line in data:
|
||||
count = count + 1
|
||||
if count % 1000000 == 0:
|
||||
print str(i) + ": " + str((time.time() - oldtime)) + ", total " + str(count/1000000) + "m lines"
|
||||
oldtime = time.time()
|
||||
v = re.split('\s+', line)
|
||||
prep['timestamp'] = int(v[0]) + 500000000 * i
|
||||
prep['p1'] = v[1]
|
||||
prep['q1'] = v[2]
|
||||
prep['p3'] = v[3]
|
||||
prep['q3'] = v[4]
|
||||
prep['p5'] = v[5]
|
||||
prep['q5'] = v[6]
|
||||
prep['p7'] = v[7]
|
||||
prep['q7'] = v[8]
|
||||
prep.append()
|
||||
data.close()
|
||||
|
||||
h5file.close()
|
||||
|
||||
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
from tables import *
|
||||
import re
|
||||
import time
|
||||
|
||||
# A class to describe our data
|
||||
class PrepData(IsDescription):
|
||||
timestamp = Time64Col()
|
||||
p1 = Float32Col()
|
||||
q1 = Float32Col()
|
||||
p3 = Float32Col()
|
||||
q3 = Float32Col()
|
||||
p5 = Float32Col()
|
||||
q5 = Float32Col()
|
||||
p7 = Float32Col()
|
||||
q7 = Float32Col()
|
||||
|
||||
filename = "test.h5"
|
||||
h5file = openFile(filename, mode = "w", title = "NILM Test")
|
||||
|
||||
group = h5file.createGroup("/", "newton", "Newton school")
|
||||
table = h5file.createTable(group, "prep", PrepData, "Prep Data")
|
||||
|
||||
table.cols.timestamp.createIndex()
|
||||
|
||||
for i in range(0, 80):
|
||||
# Open file
|
||||
data = open("data/alldata")
|
||||
count = 0
|
||||
oldtime = time.time()
|
||||
prep = table.row
|
||||
for line in data:
|
||||
count = count + 1
|
||||
if count % 1000000 == 0:
|
||||
print str(i) + ": " + str((time.time() - oldtime)) + ", total " + str(count/1000000) + "m lines"
|
||||
oldtime = time.time()
|
||||
v = re.split('\s+', line)
|
||||
prep['timestamp'] = float(v[0]) / 1000.0 + 500000 * i
|
||||
prep['p1'] = v[1]
|
||||
prep['q1'] = v[2]
|
||||
prep['p3'] = v[3]
|
||||
prep['q3'] = v[4]
|
||||
prep['p5'] = v[5]
|
||||
prep['q5'] = v[6]
|
||||
prep['p7'] = v[7]
|
||||
prep['q7'] = v[8]
|
||||
prep.append()
|
||||
data.close()
|
||||
|
||||
h5file.close()
|
||||
|
||||
|
||||
|
41
requirements.txt
Normal file
41
requirements.txt
Normal file
|
@ -0,0 +1,41 @@
|
|||
argcomplete==1.12.0
|
||||
CherryPy==18.6.0
|
||||
coverage==5.2.1
|
||||
Cython==0.29.21
|
||||
decorator==4.4.2
|
||||
fallocate==1.6.4
|
||||
flake8==3.8.3
|
||||
nose==1.3.7
|
||||
numpy==1.19.1
|
||||
progressbar==2.5
|
||||
psutil==5.7.2
|
||||
python-datetime-tz==0.5.4
|
||||
python-dateutil==2.8.1
|
||||
requests==2.24.0
|
||||
tz==0.2.2
|
||||
yappi==1.2.5
|
||||
|
||||
## The following requirements were added by pip freeze:
|
||||
beautifulsoup4==4.9.1
|
||||
certifi==2020.6.20
|
||||
chardet==3.0.4
|
||||
cheroot==8.4.2
|
||||
idna==2.10
|
||||
jaraco.classes==3.1.0
|
||||
jaraco.collections==3.0.0
|
||||
jaraco.functools==3.0.1
|
||||
jaraco.text==3.2.0
|
||||
mccabe==0.6.1
|
||||
more-itertools==8.4.0
|
||||
portend==2.6
|
||||
pycodestyle==2.6.0
|
||||
pyflakes==2.2.0
|
||||
pytz==2020.1
|
||||
six==1.15.0
|
||||
soupsieve==2.0.1
|
||||
tempora==4.0.0
|
||||
urllib3==1.25.10
|
||||
waitress==1.4.4
|
||||
WebOb==1.8.6
|
||||
WebTest==2.0.35
|
||||
zc.lockfile==2.0
|
50
setup.cfg
50
setup.cfg
|
@ -1,9 +1,59 @@
|
|||
[aliases]
|
||||
test = nosetests
|
||||
|
||||
[nosetests]
|
||||
# Note: values must be set to 1, and have no comments on the same line,
|
||||
# for "python setup.py nosetests" to work correctly.
|
||||
nocapture=1
|
||||
# Comment this out to see CherryPy logs on failure:
|
||||
nologcapture=1
|
||||
with-coverage=1
|
||||
cover-inclusive=1
|
||||
cover-package=nilmdb
|
||||
cover-erase=1
|
||||
# this works, puts html output in cover/ dir:
|
||||
# cover-html=1
|
||||
#debug=nose
|
||||
#debug-log=nose.log
|
||||
stop=1
|
||||
verbosity=2
|
||||
tests=tests
|
||||
#tests=tests/test_threadsafety.py
|
||||
#tests=tests/test_bulkdata.py
|
||||
#tests=tests/test_mustclose.py
|
||||
#tests=tests/test_lrucache.py
|
||||
#tests=tests/test_cmdline.py
|
||||
#tests=tests/test_layout.py
|
||||
#tests=tests/test_rbtree.py
|
||||
#tests=tests/test_interval.py
|
||||
#tests=tests/test_rbtree.py,tests/test_interval.py
|
||||
#tests=tests/test_interval.py
|
||||
#tests=tests/test_client.py
|
||||
#tests=tests/test_timestamper.py
|
||||
#tests=tests/test_serializer.py
|
||||
#tests=tests/test_iteratorizer.py
|
||||
#tests=tests/test_client.py:TestClient.test_client_nilmdb
|
||||
#tests=tests/test_nilmdb.py
|
||||
#with-profile=1
|
||||
#profile-sort=time
|
||||
##profile-restrict=10 # doesn't work right, treated as string or something
|
||||
|
||||
[versioneer]
|
||||
VCS=git
|
||||
style=pep440
|
||||
versionfile_source=nilmdb/_version.py
|
||||
versionfile_build=nilmdb/_version.py
|
||||
tag_prefix=nilmdb-
|
||||
parentdir_prefix=nilmdb-
|
||||
|
||||
[flake8]
|
||||
exclude=_version.py
|
||||
extend-ignore=E731
|
||||
per-file-ignores=__init__.py:F401,E402 \
|
||||
serializer.py:E722 \
|
||||
mustclose.py:E722 \
|
||||
fsck.py:E266
|
||||
|
||||
[pylint]
|
||||
ignore=_version.py
|
||||
disable=C0103,C0111,R0913,R0914
|
||||
|
|
67
setup.py
67
setup.py
|
@ -1,9 +1,64 @@
|
|||
#!/usr/bin/python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from distutils.core import setup
|
||||
# To release a new version, tag it:
|
||||
# git tag -a nilmdb-1.1 -m "Version 1.1"
|
||||
# git push --tags
|
||||
# Then just package it up:
|
||||
# python3 setup.py sdist
|
||||
|
||||
setup(name = 'nilmdb',
|
||||
version = '1.0',
|
||||
scripts = [ 'bin/nilm-test.py' ],
|
||||
packages = [ 'nilmdb' ],
|
||||
import sys
|
||||
import os
|
||||
from setuptools import setup
|
||||
from distutils.extension import Extension
|
||||
|
||||
# Versioneer manages version numbers from git tags.
|
||||
# https://github.com/warner/python-versioneer
|
||||
import versioneer
|
||||
|
||||
# External modules that need to be built
|
||||
ext_modules = [ Extension('nilmdb.server.rocket', ['nilmdb/server/rocket.c' ]) ]
|
||||
|
||||
# Use Cython.
|
||||
cython_modules = [ 'nilmdb.server.interval', 'nilmdb.server.rbtree' ]
|
||||
import Cython
|
||||
from Cython.Build import cythonize
|
||||
for modulename in cython_modules:
|
||||
filename = modulename.replace('.','/')
|
||||
ext_modules.extend(cythonize(filename + ".pyx"))
|
||||
|
||||
# Get list of requirements to use in `install_requires` below. Note
|
||||
# that we don't make a distinction between things that are actually
|
||||
# required for end-users vs developers (or use `test_requires` or
|
||||
# anything else) -- just install everything for simplicity.
|
||||
install_requires = open('requirements.txt').readlines()
|
||||
|
||||
# Run setup
|
||||
setup(name='nilmdb',
|
||||
version = versioneer.get_version(),
|
||||
cmdclass = versioneer.get_cmdclass(),
|
||||
url = 'https://git.jim.sh/nilm/nilmdb.git',
|
||||
author = 'Jim Paris',
|
||||
description = "NILM Database",
|
||||
long_description = "NILM Database",
|
||||
license = "Proprietary",
|
||||
author_email = 'jim@jtan.com',
|
||||
setup_requires = [ 'setuptools' ],
|
||||
install_requires = install_requires,
|
||||
packages = [ 'nilmdb',
|
||||
'nilmdb.utils',
|
||||
'nilmdb.server',
|
||||
'nilmdb.client',
|
||||
'nilmdb.cmdline',
|
||||
'nilmdb.scripts',
|
||||
'nilmdb.fsck',
|
||||
],
|
||||
entry_points = {
|
||||
'console_scripts': [
|
||||
'nilmtool = nilmdb.scripts.nilmtool:main',
|
||||
'nilmdb-server = nilmdb.scripts.nilmdb_server:main',
|
||||
'nilmdb-fsck = nilmdb.scripts.nilmdb_fsck:main',
|
||||
],
|
||||
},
|
||||
ext_modules = ext_modules,
|
||||
zip_safe = False,
|
||||
)
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
all:
|
||||
python speed-readbinary.py
|
||||
|
||||
clean:
|
||||
rm -f *pyc
|
|
@ -1,4 +0,0 @@
|
|||
from __future__ import print_function
|
||||
def printf(str, *args):
|
||||
print(str % args, end='')
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
from printf import printf
|
||||
import time
|
||||
import re
|
||||
import numpy as np
|
||||
import itertools
|
||||
|
||||
class Timer():
|
||||
def __init__(self, arg):
|
||||
self.arg = arg
|
||||
def __enter__(self): self.start = time.time()
|
||||
def __exit__(self, *args): printf("%s: %f lines/sec\n", self.arg, 1e6 / (time.time() - self.start))
|
||||
|
||||
def test_split():
|
||||
for n, line in enumerate(open('1m.raw', 'r')):
|
||||
out = [0]*6
|
||||
tmp = [ int(i) for i in line.partition('#')[0].split() ]
|
||||
out[0:len(tmp)] = tmp
|
||||
if (n % 100000 == 0):
|
||||
printf("line %d = %s\n", n, str(out))
|
||||
|
||||
def test_split2():
|
||||
for n, line in enumerate(open('1m.raw', 'r')):
|
||||
out = [0]*6
|
||||
tmp = [ int(i,10) for i in line.partition('#')[0].split() ]
|
||||
out[0:len(tmp)] = tmp
|
||||
if (n % 100000 == 0):
|
||||
printf("line %d = %s\n", n, str(out))
|
||||
|
||||
def test_regex():
|
||||
for n, line in enumerate(open('1m.raw', 'r')):
|
||||
out = [0]*6
|
||||
tmp = [ int(x) for x in re.findall('(\d+)\s+',line.partition('#')[0]) ]
|
||||
out[0:len(tmp)] = tmp
|
||||
if (n % 100000 == 0):
|
||||
printf("line %d = %s\n", n, str(out))
|
||||
|
||||
def test_bigregex():
|
||||
regex = re.compile('^(?:\s*)' + '(?:(\d+)\s+)?' * 6)
|
||||
for n, line in enumerate(open('1m.raw', 'r')):
|
||||
out = [ int(x or 0) for x in re.match(regex, line).groups() ]
|
||||
if (n % 100000 == 0):
|
||||
printf("line %d = %s\n", n, str(out))
|
||||
|
||||
with Timer("regex"):
|
||||
test_regex() # 102k/sec
|
||||
|
||||
with Timer("split"):
|
||||
test_split() # 175k/sec
|
||||
|
||||
with Timer("split2"):
|
||||
test_split2() # 275k/sec
|
||||
|
||||
with Timer("bigregex"):
|
||||
test_bigregex() # 110k/sec
|
||||
|
||||
# The "int" operation takes quite a while -- int(x,10) is twice as fast
|
||||
# Perl does about 500k/sec
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
from printf import printf
|
||||
import time
|
||||
import re
|
||||
import numpy as np
|
||||
import itertools
|
||||
import struct
|
||||
import array
|
||||
import os
|
||||
import mmap
|
||||
|
||||
class Timer():
|
||||
def __init__(self, arg):
|
||||
self.arg = arg
|
||||
def __enter__(self): self.start = time.time()
|
||||
def __exit__(self, *args): printf("%s: %f klines/sec\n", self.arg, 1e3 / (time.time() - self.start))
|
||||
|
||||
def test_struct1():
|
||||
"""read with struct.unpack"""
|
||||
f = open('1m.bin', 'rb')
|
||||
f.seek(0,os.SEEK_END)
|
||||
filesize = f.tell()
|
||||
f.seek(0,os.SEEK_SET)
|
||||
packer = struct.Struct('!dHHHHHH')
|
||||
items = filesize / packer.size
|
||||
for n in xrange(items):
|
||||
s = f.read(packer.size)
|
||||
out = packer.unpack(s)
|
||||
if (n % 100000 == 0):
|
||||
printf("line %d = %s\n", n, str(out))
|
||||
|
||||
def test_struct2():
|
||||
"""read with struct.unpack, convert to string"""
|
||||
f = open('1m.bin', 'rb')
|
||||
f.seek(0,os.SEEK_END)
|
||||
filesize = f.tell()
|
||||
f.seek(0,os.SEEK_SET)
|
||||
packer = struct.Struct('!dHHHHHH')
|
||||
items = filesize / packer.size
|
||||
for n in xrange(items):
|
||||
s = f.read(packer.size)
|
||||
out = packer.unpack(s)
|
||||
x = str(out)
|
||||
if (n % 100000 == 0):
|
||||
printf("line %d = %s\n", n, str(out))
|
||||
|
||||
def test_mmap():
|
||||
"""struct.unpack with mmap"""
|
||||
with open('1m.bin', 'rb') as f:
|
||||
f.seek(0,os.SEEK_END)
|
||||
filesize = f.tell()
|
||||
f.seek(0,os.SEEK_SET)
|
||||
m = mmap.mmap(f.fileno(), filesize, access=mmap.ACCESS_READ)
|
||||
packer = struct.Struct('!dHHHHHH')
|
||||
items = filesize / packer.size
|
||||
for n in xrange(items):
|
||||
out = packer.unpack(m[packer.size*n : packer.size*(n+1)])
|
||||
if (n % 100000 == 0):
|
||||
printf("line %d = %s\n", n, str(out))
|
||||
|
||||
with Timer("mmap"):
|
||||
test_mmap() # 1600k
|
||||
|
||||
with Timer("struct1"):
|
||||
test_struct1() # 1460k
|
||||
|
||||
with Timer("struct2"):
|
||||
test_struct2() # 210k
|
||||
|
||||
# Reading from the file is again much quicker than converting to string
|
||||
# Use mmap, it's good
|
||||
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
from printf import printf
|
||||
import time
|
||||
import re
|
||||
import numpy as np
|
||||
import itertools
|
||||
import struct
|
||||
import array
|
||||
|
||||
class Timer():
|
||||
def __init__(self, arg):
|
||||
self.arg = arg
|
||||
def __enter__(self): self.start = time.time()
|
||||
def __exit__(self, *args): printf("%s: %f klines/sec\n", self.arg, 1e3 / (time.time() - self.start))
|
||||
|
||||
def read_ascii():
|
||||
for n in xrange(1000000):
|
||||
yield (1234, 2345, 3456, 4576, 5678, 6789)
|
||||
# for n, line in enumerate(open('1m.raw', 'r')):
|
||||
# out = [0]*6
|
||||
# tmp = [ int(i,10) for i in line.partition('#')[0].split() ]
|
||||
# out[0:len(tmp)] = tmp
|
||||
# if (n % 100000 == 0):
|
||||
# printf("line %d = %s\n", n, str(out))
|
||||
# yield out
|
||||
|
||||
def test_struct1():
|
||||
"""write with struct.pack"""
|
||||
f = open('1m.bin', 'wb')
|
||||
for out in read_ascii():
|
||||
s = struct.pack('!HHHHHH', *out)
|
||||
f.write(s)
|
||||
|
||||
def test_struct2():
|
||||
"""use constant format string"""
|
||||
f = open('1m.bin', 'wb')
|
||||
packer = struct.Struct('!HHHHHH')
|
||||
for out in read_ascii():
|
||||
f.write(packer.pack(*out))
|
||||
f.close()
|
||||
printf("size was %d\n", packer.size)
|
||||
|
||||
def test_struct3():
|
||||
"""like struct1, with timestamp"""
|
||||
f = open('1m.bin', 'wb')
|
||||
for out in read_ascii():
|
||||
s = struct.pack('!dHHHHHH', time.time(), *out)
|
||||
f.write(s)
|
||||
|
||||
def test_struct4():
|
||||
"""like struct2, with timestamp"""
|
||||
f = open('1m.bin', 'wb')
|
||||
packer = struct.Struct('!dHHHHHH')
|
||||
for out in read_ascii():
|
||||
f.write(packer.pack(time.time(), *out))
|
||||
f.close()
|
||||
printf("size was %d\n", packer.size)
|
||||
|
||||
#raise Exception('done')
|
||||
|
||||
with Timer("struct1"):
|
||||
test_struct1() # 1089k
|
||||
|
||||
with Timer("struct2"):
|
||||
test_struct2() # 1249k
|
||||
|
||||
with Timer("struct3"):
|
||||
test_struct3() # 845k
|
||||
|
||||
with Timer("struct4"):
|
||||
test_struct4() # 922k
|
||||
|
||||
# This seems fast enough for writing new data, since it's faster than
|
||||
# we read ascii data anyway. Use e.g. struct4
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import struct
|
||||
import mmap
|
||||
|
||||
f = open("test.dat", "rb+")
|
||||
mm = mmap.mmap(f.fileno(),3)
|
||||
|
||||
print len(mm)
|
||||
print "first 3 bytes: " + mm[0:3];
|
||||
|
|
@ -1 +0,0 @@
|
|||
|
|
@ -1,7 +0,0 @@
|
|||
- Have a class representing the file contents
|
||||
- Looks like an array
|
||||
- len(), get(), index
|
||||
- some form of bisect search
|
||||
- get_extents = return [0].timestamp, [-1].timestamp
|
||||
-
|
||||
- Can append? Sure, why not. Just write to the file, extend mmap accordingly.
|
124
tests/data/extract-1
Normal file
124
tests/data/extract-1
Normal file
|
@ -0,0 +1,124 @@
|
|||
# path: /newton/prep
|
||||
# layout: float32_8
|
||||
# start: Fri, 23 Mar 2012 10:00:30.000000 +0000
|
||||
# end: Fri, 23 Mar 2012 10:00:31.000000 +0000
|
||||
1332496830000000 2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03
|
||||
1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
|
||||
1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
|
||||
1332496830025000 2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03
|
||||
1332496830033333 2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03
|
||||
1332496830041667 2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03
|
||||
1332496830050000 2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03
|
||||
1332496830058333 2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03
|
||||
1332496830066667 2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03
|
||||
1332496830075000 2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03
|
||||
1332496830083333 2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03
|
||||
1332496830091667 2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03
|
||||
1332496830100000 2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03
|
||||
1332496830108333 2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03
|
||||
1332496830116667 2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03
|
||||
1332496830125000 2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03
|
||||
1332496830133333 2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03
|
||||
1332496830141667 2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03
|
||||
1332496830150000 2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03
|
||||
1332496830158333 2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03
|
||||
1332496830166667 2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03
|
||||
1332496830175000 2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03
|
||||
1332496830183333 2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03
|
||||
1332496830191667 2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03
|
||||
1332496830200000 2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03
|
||||
1332496830208333 2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03
|
||||
1332496830216667 2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03
|
||||
1332496830225000 2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03
|
||||
1332496830233333 2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03
|
||||
1332496830241667 2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03
|
||||
1332496830250000 2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03
|
||||
1332496830258333 2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03
|
||||
1332496830266667 2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03
|
||||
1332496830275000 2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03
|
||||
1332496830283333 2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03
|
||||
1332496830291667 2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03
|
||||
1332496830300000 2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03
|
||||
1332496830308333 2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03
|
||||
1332496830316667 2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03
|
||||
1332496830325000 2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03
|
||||
1332496830333333 2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03
|
||||
1332496830341667 2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03
|
||||
1332496830350000 2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03
|
||||
1332496830358333 2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03
|
||||
1332496830366667 2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03
|
||||
1332496830375000 2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03
|
||||
1332496830383333 2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03
|
||||
1332496830391667 2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03
|
||||
1332496830400000 2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03
|
||||
1332496830408333 2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03
|
||||
1332496830416667 2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03
|
||||
1332496830425000 2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03
|
||||
1332496830433333 2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03
|
||||
1332496830441667 2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03
|
||||
1332496830450000 2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03
|
||||
1332496830458333 2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03
|
||||
1332496830466667 2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03
|
||||
1332496830475000 2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03
|
||||
1332496830483333 2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03
|
||||
1332496830491667 2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03
|
||||
1332496830500000 2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03
|
||||
1332496830508333 2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03
|
||||
1332496830516667 2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03
|
||||
1332496830525000 2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03
|
||||
1332496830533333 2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03
|
||||
1332496830541667 2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03
|
||||
1332496830550000 2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03
|
||||
1332496830558333 2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03
|
||||
1332496830566667 2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03
|
||||
1332496830575000 2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03
|
||||
1332496830583333 2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03
|
||||
1332496830591667 2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03
|
||||
1332496830600000 2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03
|
||||
1332496830608333 2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03
|
||||
1332496830616667 2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03
|
||||
1332496830625000 2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03
|
||||
1332496830633333 2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03
|
||||
1332496830641667 2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03
|
||||
1332496830650000 2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03
|
||||
1332496830658333 2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03
|
||||
1332496830666667 2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03
|
||||
1332496830675000 2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03
|
||||
1332496830683333 2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03
|
||||
1332496830691667 2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03
|
||||
1332496830700000 2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03
|
||||
1332496830708333 2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03
|
||||
1332496830716667 2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03
|
||||
1332496830725000 2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03
|
||||
1332496830733333 2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03
|
||||
1332496830741667 2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03
|
||||
1332496830750000 2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03
|
||||
1332496830758333 2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03
|
||||
1332496830766667 2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03
|
||||
1332496830775000 2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03
|
||||
1332496830783333 2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03
|
||||
1332496830791667 2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03
|
||||
1332496830800000 2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03
|
||||
1332496830808333 2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03
|
||||
1332496830816667 2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03
|
||||
1332496830825000 2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03
|
||||
1332496830833333 2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03
|
||||
1332496830841667 2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03
|
||||
1332496830850000 2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03
|
||||
1332496830858333 2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03
|
||||
1332496830866667 2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03
|
||||
1332496830875000 2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03
|
||||
1332496830883333 2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03
|
||||
1332496830891667 2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03
|
||||
1332496830900000 2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03
|
||||
1332496830908333 2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03
|
||||
1332496830916667 2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03
|
||||
1332496830925000 2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03
|
||||
1332496830933333 2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03
|
||||
1332496830941667 2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03
|
||||
1332496830950000 2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03
|
||||
1332496830958333 2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03
|
||||
1332496830966667 2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03
|
||||
1332496830975000 2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03
|
||||
1332496830983333 2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03
|
||||
1332496830991667 2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03
|
119
tests/data/extract-2
Normal file
119
tests/data/extract-2
Normal file
|
@ -0,0 +1,119 @@
|
|||
1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
|
||||
1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
|
||||
1332496830025000 2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03
|
||||
1332496830033333 2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03
|
||||
1332496830041667 2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03
|
||||
1332496830050000 2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03
|
||||
1332496830058333 2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03
|
||||
1332496830066667 2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03
|
||||
1332496830075000 2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03
|
||||
1332496830083333 2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03
|
||||
1332496830091667 2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03
|
||||
1332496830100000 2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03
|
||||
1332496830108333 2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03
|
||||
1332496830116667 2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03
|
||||
1332496830125000 2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03
|
||||
1332496830133333 2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03
|
||||
1332496830141667 2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03
|
||||
1332496830150000 2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03
|
||||
1332496830158333 2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03
|
||||
1332496830166667 2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03
|
||||
1332496830175000 2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03
|
||||
1332496830183333 2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03
|
||||
1332496830191667 2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03
|
||||
1332496830200000 2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03
|
||||
1332496830208333 2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03
|
||||
1332496830216667 2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03
|
||||
1332496830225000 2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03
|
||||
1332496830233333 2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03
|
||||
1332496830241667 2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03
|
||||
1332496830250000 2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03
|
||||
1332496830258333 2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03
|
||||
1332496830266667 2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03
|
||||
1332496830275000 2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03
|
||||
1332496830283333 2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03
|
||||
1332496830291667 2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03
|
||||
1332496830300000 2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03
|
||||
1332496830308333 2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03
|
||||
1332496830316667 2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03
|
||||
1332496830325000 2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03
|
||||
1332496830333333 2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03
|
||||
1332496830341667 2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03
|
||||
1332496830350000 2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03
|
||||
1332496830358333 2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03
|
||||
1332496830366667 2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03
|
||||
1332496830375000 2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03
|
||||
1332496830383333 2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03
|
||||
1332496830391667 2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03
|
||||
1332496830400000 2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03
|
||||
1332496830408333 2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03
|
||||
1332496830416667 2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03
|
||||
1332496830425000 2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03
|
||||
1332496830433333 2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03
|
||||
1332496830441667 2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03
|
||||
1332496830450000 2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03
|
||||
1332496830458333 2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03
|
||||
1332496830466667 2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03
|
||||
1332496830475000 2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03
|
||||
1332496830483333 2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03
|
||||
1332496830491667 2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03
|
||||
1332496830500000 2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03
|
||||
1332496830508333 2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03
|
||||
1332496830516667 2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03
|
||||
1332496830525000 2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03
|
||||
1332496830533333 2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03
|
||||
1332496830541667 2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03
|
||||
1332496830550000 2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03
|
||||
1332496830558333 2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03
|
||||
1332496830566667 2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03
|
||||
1332496830575000 2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03
|
||||
1332496830583333 2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03
|
||||
1332496830591667 2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03
|
||||
1332496830600000 2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03
|
||||
1332496830608333 2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03
|
||||
1332496830616667 2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03
|
||||
1332496830625000 2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03
|
||||
1332496830633333 2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03
|
||||
1332496830641667 2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03
|
||||
1332496830650000 2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03
|
||||
1332496830658333 2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03
|
||||
1332496830666667 2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03
|
||||
1332496830675000 2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03
|
||||
1332496830683333 2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03
|
||||
1332496830691667 2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03
|
||||
1332496830700000 2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03
|
||||
1332496830708333 2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03
|
||||
1332496830716667 2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03
|
||||
1332496830725000 2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03
|
||||
1332496830733333 2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03
|
||||
1332496830741667 2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03
|
||||
1332496830750000 2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03
|
||||
1332496830758333 2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03
|
||||
1332496830766667 2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03
|
||||
1332496830775000 2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03
|
||||
1332496830783333 2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03
|
||||
1332496830791667 2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03
|
||||
1332496830800000 2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03
|
||||
1332496830808333 2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03
|
||||
1332496830816667 2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03
|
||||
1332496830825000 2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03
|
||||
1332496830833333 2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03
|
||||
1332496830841667 2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03
|
||||
1332496830850000 2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03
|
||||
1332496830858333 2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03
|
||||
1332496830866667 2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03
|
||||
1332496830875000 2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03
|
||||
1332496830883333 2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03
|
||||
1332496830891667 2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03
|
||||
1332496830900000 2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03
|
||||
1332496830908333 2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03
|
||||
1332496830916667 2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03
|
||||
1332496830925000 2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03
|
||||
1332496830933333 2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03
|
||||
1332496830941667 2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03
|
||||
1332496830950000 2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03
|
||||
1332496830958333 2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03
|
||||
1332496830966667 2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03
|
||||
1332496830975000 2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03
|
||||
1332496830983333 2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03
|
||||
1332496830991667 2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03
|
1
tests/data/extract-3
Normal file
1
tests/data/extract-3
Normal file
|
@ -0,0 +1 @@
|
|||
1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
|
2
tests/data/extract-4
Normal file
2
tests/data/extract-4
Normal file
|
@ -0,0 +1,2 @@
|
|||
1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
|
||||
1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
|
124
tests/data/extract-5
Normal file
124
tests/data/extract-5
Normal file
|
@ -0,0 +1,124 @@
|
|||
# path: /newton/prep
|
||||
# layout: float32_8
|
||||
# start: Fri, 23 Mar 2012 10:00:30.000000 +0000
|
||||
# end: Fri, 23 Mar 2012 10:00:31.000000 +0000
|
||||
2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03
|
||||
2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
|
||||
2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
|
||||
2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03
|
||||
2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03
|
||||
2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03
|
||||
2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03
|
||||
2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03
|
||||
2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03
|
||||
2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03
|
||||
2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03
|
||||
2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03
|
||||
2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03
|
||||
2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03
|
||||
2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03
|
||||
2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03
|
||||
2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03
|
||||
2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03
|
||||
2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03
|
||||
2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03
|
||||
2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03
|
||||
2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03
|
||||
2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03
|
||||
2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03
|
||||
2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03
|
||||
2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03
|
||||
2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03
|
||||
2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03
|
||||
2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03
|
||||
2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03
|
||||
2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03
|
||||
2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03
|
||||
2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03
|
||||
2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03
|
||||
2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03
|
||||
2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03
|
||||
2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03
|
||||
2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03
|
||||
2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03
|
||||
2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03
|
||||
2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03
|
||||
2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03
|
||||
2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03
|
||||
2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03
|
||||
2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03
|
||||
2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03
|
||||
2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03
|
||||
2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03
|
||||
2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03
|
||||
2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03
|
||||
2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03
|
||||
2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03
|
||||
2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03
|
||||
2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03
|
||||
2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03
|
||||
2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03
|
||||
2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03
|
||||
2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03
|
||||
2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03
|
||||
2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03
|
||||
2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03
|
||||
2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03
|
||||
2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03
|
||||
2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03
|
||||
2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03
|
||||
2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03
|
||||
2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03
|
||||
2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03
|
||||
2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03
|
||||
2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03
|
||||
2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03
|
||||
2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03
|
||||
2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03
|
||||
2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03
|
||||
2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03
|
||||
2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03
|
||||
2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03
|
||||
2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03
|
||||
2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03
|
||||
2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03
|
||||
2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03
|
||||
2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03
|
||||
2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03
|
||||
2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03
|
||||
2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03
|
||||
2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03
|
||||
2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03
|
||||
2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03
|
||||
2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03
|
||||
2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03
|
||||
2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03
|
||||
2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03
|
||||
2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03
|
||||
2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03
|
||||
2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03
|
||||
2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03
|
||||
2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03
|
||||
2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03
|
||||
2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03
|
||||
2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03
|
||||
2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03
|
||||
2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03
|
||||
2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03
|
||||
2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03
|
||||
2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03
|
||||
2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03
|
||||
2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03
|
||||
2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03
|
||||
2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03
|
||||
2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03
|
||||
2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03
|
||||
2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03
|
||||
2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03
|
||||
2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03
|
||||
2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03
|
||||
2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03
|
||||
2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03
|
||||
2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03
|
||||
2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03
|
||||
2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03
|
120
tests/data/extract-6
Normal file
120
tests/data/extract-6
Normal file
|
@ -0,0 +1,120 @@
|
|||
2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03
|
||||
2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
|
||||
2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
|
||||
2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03
|
||||
2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03
|
||||
2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03
|
||||
2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03
|
||||
2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03
|
||||
2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03
|
||||
2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03
|
||||
2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03
|
||||
2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03
|
||||
2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03
|
||||
2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03
|
||||
2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03
|
||||
2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03
|
||||
2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03
|
||||
2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03
|
||||
2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03
|
||||
2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03
|
||||
2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03
|
||||
2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03
|
||||
2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03
|
||||
2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03
|
||||
2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03
|
||||
2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03
|
||||
2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03
|
||||
2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03
|
||||
2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03
|
||||
2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03
|
||||
2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03
|
||||
2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03
|
||||
2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03
|
||||
2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03
|
||||
2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03
|
||||
2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03
|
||||
2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03
|
||||
2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03
|
||||
2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03
|
||||
2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03
|
||||
2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03
|
||||
2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03
|
||||
2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03
|
||||
2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03
|
||||
2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03
|
||||
2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03
|
||||
2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03
|
||||
2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03
|
||||
2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03
|
||||
2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03
|
||||
2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03
|
||||
2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03
|
||||
2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03
|
||||
2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03
|
||||
2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03
|
||||
2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03
|
||||
2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03
|
||||
2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03
|
||||
2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03
|
||||
2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03
|
||||
2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03
|
||||
2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03
|
||||
2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03
|
||||
2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03
|
||||
2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03
|
||||
2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03
|
||||
2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03
|
||||
2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03
|
||||
2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03
|
||||
2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03
|
||||
2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03
|
||||
2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03
|
||||
2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03
|
||||
2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03
|
||||
2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03
|
||||
2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03
|
||||
2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03
|
||||
2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03
|
||||
2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03
|
||||
2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03
|
||||
2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03
|
||||
2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03
|
||||
2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03
|
||||
2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03
|
||||
2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03
|
||||
2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03
|
||||
2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03
|
||||
2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03
|
||||
2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03
|
||||
2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03
|
||||
2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03
|
||||
2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03
|
||||
2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03
|
||||
2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03
|
||||
2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03
|
||||
2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03
|
||||
2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03
|
||||
2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03
|
||||
2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03
|
||||
2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03
|
||||
2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03
|
||||
2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03
|
||||
2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03
|
||||
2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03
|
||||
2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03
|
||||
2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03
|
||||
2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03
|
||||
2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03
|
||||
2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03
|
||||
2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03
|
||||
2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03
|
||||
2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03
|
||||
2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03
|
||||
2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03
|
||||
2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03
|
||||
2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03
|
||||
2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03
|
||||
2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03
|
||||
2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03
|
||||
2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03
|
124
tests/data/extract-7
Normal file
124
tests/data/extract-7
Normal file
|
@ -0,0 +1,124 @@
|
|||
# path: /newton/prep
|
||||
# layout: float32_8
|
||||
# start: 1332496830000000
|
||||
# end: 1332496830999000
|
||||
1332496830000000 2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03
|
||||
1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
|
||||
1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
|
||||
1332496830025000 2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03
|
||||
1332496830033333 2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03
|
||||
1332496830041667 2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03
|
||||
1332496830050000 2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03
|
||||
1332496830058333 2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03
|
||||
1332496830066667 2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03
|
||||
1332496830075000 2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03
|
||||
1332496830083333 2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03
|
||||
1332496830091667 2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03
|
||||
1332496830100000 2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03
|
||||
1332496830108333 2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03
|
||||
1332496830116667 2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03
|
||||
1332496830125000 2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03
|
||||
1332496830133333 2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03
|
||||
1332496830141667 2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03
|
||||
1332496830150000 2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03
|
||||
1332496830158333 2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03
|
||||
1332496830166667 2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03
|
||||
1332496830175000 2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03
|
||||
1332496830183333 2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03
|
||||
1332496830191667 2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03
|
||||
1332496830200000 2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03
|
||||
1332496830208333 2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03
|
||||
1332496830216667 2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03
|
||||
1332496830225000 2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03
|
||||
1332496830233333 2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03
|
||||
1332496830241667 2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03
|
||||
1332496830250000 2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03
|
||||
1332496830258333 2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03
|
||||
1332496830266667 2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03
|
||||
1332496830275000 2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03
|
||||
1332496830283333 2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03
|
||||
1332496830291667 2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03
|
||||
1332496830300000 2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03
|
||||
1332496830308333 2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03
|
||||
1332496830316667 2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03
|
||||
1332496830325000 2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03
|
||||
1332496830333333 2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03
|
||||
1332496830341667 2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03
|
||||
1332496830350000 2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03
|
||||
1332496830358333 2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03
|
||||
1332496830366667 2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03
|
||||
1332496830375000 2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03
|
||||
1332496830383333 2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03
|
||||
1332496830391667 2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03
|
||||
1332496830400000 2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03
|
||||
1332496830408333 2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03
|
||||
1332496830416667 2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03
|
||||
1332496830425000 2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03
|
||||
1332496830433333 2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03
|
||||
1332496830441667 2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03
|
||||
1332496830450000 2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03
|
||||
1332496830458333 2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03
|
||||
1332496830466667 2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03
|
||||
1332496830475000 2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03
|
||||
1332496830483333 2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03
|
||||
1332496830491667 2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03
|
||||
1332496830500000 2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03
|
||||
1332496830508333 2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03
|
||||
1332496830516667 2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03
|
||||
1332496830525000 2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03
|
||||
1332496830533333 2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03
|
||||
1332496830541667 2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03
|
||||
1332496830550000 2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03
|
||||
1332496830558333 2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03
|
||||
1332496830566667 2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03
|
||||
1332496830575000 2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03
|
||||
1332496830583333 2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03
|
||||
1332496830591667 2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03
|
||||
1332496830600000 2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03
|
||||
1332496830608333 2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03
|
||||
1332496830616667 2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03
|
||||
1332496830625000 2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03
|
||||
1332496830633333 2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03
|
||||
1332496830641667 2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03
|
||||
1332496830650000 2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03
|
||||
1332496830658333 2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03
|
||||
1332496830666667 2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03
|
||||
1332496830675000 2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03
|
||||
1332496830683333 2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03
|
||||
1332496830691667 2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03
|
||||
1332496830700000 2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03
|
||||
1332496830708333 2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03
|
||||
1332496830716667 2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03
|
||||
1332496830725000 2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03
|
||||
1332496830733333 2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03
|
||||
1332496830741667 2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03
|
||||
1332496830750000 2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03
|
||||
1332496830758333 2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03
|
||||
1332496830766667 2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03
|
||||
1332496830775000 2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03
|
||||
1332496830783333 2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03
|
||||
1332496830791667 2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03
|
||||
1332496830800000 2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03
|
||||
1332496830808333 2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03
|
||||
1332496830816667 2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03
|
||||
1332496830825000 2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03
|
||||
1332496830833333 2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03
|
||||
1332496830841667 2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03
|
||||
1332496830850000 2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03
|
||||
1332496830858333 2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03
|
||||
1332496830866667 2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03
|
||||
1332496830875000 2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03
|
||||
1332496830883333 2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03
|
||||
1332496830891667 2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03
|
||||
1332496830900000 2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03
|
||||
1332496830908333 2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03
|
||||
1332496830916667 2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03
|
||||
1332496830925000 2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03
|
||||
1332496830933333 2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03
|
||||
1332496830941667 2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03
|
||||
1332496830950000 2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03
|
||||
1332496830958333 2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03
|
||||
1332496830966667 2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03
|
||||
1332496830975000 2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03
|
||||
1332496830983333 2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03
|
||||
1332496830991667 2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03
|
28
tests/data/extract-8
Normal file
28
tests/data/extract-8
Normal file
|
@ -0,0 +1,28 @@
|
|||
# interval-start 1332496919900000
|
||||
1332496919900000 2.523050e+05 2.254020e+05 4.779410e+03 3.638030e+03 8.138070e+03 4.334460e+03 1.083780e+03 3.743730e+03
|
||||
1332496919908333 2.551190e+05 2.237870e+05 5.965640e+03 2.076350e+03 9.468790e+03 3.693880e+03 1.247860e+03 3.393680e+03
|
||||
1332496919916667 2.616370e+05 2.247980e+05 4.848970e+03 2.315620e+03 9.323300e+03 4.225460e+03 1.805780e+03 2.593050e+03
|
||||
1332496919925000 2.606460e+05 2.251300e+05 3.061360e+03 3.951840e+03 7.662910e+03 5.341410e+03 1.986520e+03 2.276780e+03
|
||||
1332496919933333 2.559710e+05 2.235030e+05 4.096030e+03 3.296970e+03 7.827080e+03 5.452120e+03 2.492520e+03 2.929450e+03
|
||||
1332496919941667 2.579260e+05 2.217080e+05 5.472320e+03 1.555700e+03 8.495760e+03 4.491140e+03 2.379780e+03 3.741710e+03
|
||||
1332496919950000 2.610180e+05 2.242350e+05 4.669770e+03 1.876190e+03 8.366680e+03 3.677510e+03 9.021690e+02 3.549040e+03
|
||||
1332496919958333 2.569150e+05 2.274650e+05 2.785070e+03 3.751930e+03 7.440320e+03 3.964860e+03 -3.227860e+02 2.460890e+03
|
||||
1332496919966667 2.509510e+05 2.262000e+05 3.772710e+03 3.131950e+03 8.159860e+03 4.539860e+03 7.375190e+02 2.126750e+03
|
||||
1332496919975000 2.556710e+05 2.223720e+05 5.826200e+03 8.715560e+02 9.120240e+03 4.545110e+03 2.804310e+03 2.721000e+03
|
||||
1332496919983333 2.649730e+05 2.214860e+05 5.839130e+03 4.659180e+02 8.628300e+03 3.934870e+03 2.972490e+03 3.773730e+03
|
||||
1332496919991667 2.652170e+05 2.233920e+05 3.718770e+03 2.834970e+03 7.209900e+03 3.460260e+03 1.324930e+03 4.075960e+03
|
||||
# interval-end 1332496919991668
|
||||
# interval-start 1332496920000000
|
||||
1332496920000000 2.564370e+05 2.244300e+05 4.011610e+03 3.475340e+03 7.495890e+03 3.388940e+03 2.613970e+02 3.731260e+03
|
||||
1332496920008333 2.539630e+05 2.241670e+05 5.621070e+03 1.548010e+03 9.165170e+03 3.522930e+03 1.058930e+03 2.996960e+03
|
||||
1332496920016667 2.585080e+05 2.249300e+05 6.011400e+03 8.188660e+02 9.039950e+03 4.482440e+03 2.490390e+03 2.679340e+03
|
||||
1332496920025000 2.596270e+05 2.260220e+05 4.474500e+03 2.423020e+03 7.414190e+03 5.071970e+03 2.439380e+03 2.962960e+03
|
||||
1332496920033333 2.551870e+05 2.246320e+05 4.738570e+03 3.398040e+03 7.395120e+03 4.726450e+03 1.839030e+03 3.393530e+03
|
||||
1332496920041667 2.571020e+05 2.216230e+05 6.144130e+03 1.441090e+03 8.756480e+03 3.495320e+03 1.869940e+03 3.752530e+03
|
||||
1332496920050000 2.636530e+05 2.217700e+05 6.221770e+03 7.389620e+02 9.547600e+03 2.666820e+03 1.462660e+03 3.332570e+03
|
||||
1332496920058333 2.636130e+05 2.252560e+05 4.477120e+03 2.437450e+03 8.510210e+03 3.855630e+03 9.594420e+02 2.387180e+03
|
||||
1332496920066667 2.553500e+05 2.262640e+05 4.283720e+03 3.923940e+03 7.912470e+03 5.466520e+03 1.284990e+03 2.093720e+03
|
||||
1332496920075000 2.527270e+05 2.246090e+05 5.851930e+03 2.491980e+03 8.540630e+03 5.623050e+03 2.339780e+03 3.007140e+03
|
||||
1332496920083333 2.584750e+05 2.235780e+05 5.924870e+03 1.394480e+03 8.779620e+03 4.544180e+03 2.132030e+03 3.849760e+03
|
||||
1332496920091667 2.615630e+05 2.246090e+05 4.336140e+03 2.455750e+03 8.055380e+03 3.469110e+03 6.278730e+02 3.664200e+03
|
||||
# interval-end 1332496920100000
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user